From 426061446b79d41704b081152bd9439e115ec186 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Fri, 29 Sep 2023 17:39:49 +0200
Subject: [PATCH 01/18] Add softmax -> dropout -> mm <- non pattern pattern /
 add new names to pattern

---
 nncf/torch/quantization/ignored_patterns.py | 48 ++++++++++++++++++++-
 1 file changed, 46 insertions(+), 2 deletions(-)

diff --git a/nncf/torch/quantization/ignored_patterns.py b/nncf/torch/quantization/ignored_patterns.py
index d4816cc482e..00c48f49dfd 100644
--- a/nncf/torch/quantization/ignored_patterns.py
+++ b/nncf/torch/quantization/ignored_patterns.py
@@ -35,6 +35,33 @@ def _add_softmax_matmul(
     pattern.add_edge(matmul_branch_nodes, matmul)
 
 
+def _add_softmax_dropout_matmul(
+    pattern: GraphPattern, matmul_aliases, reshape_squeeze_aliases, gather_aliases, transpose_aliases
+) -> None:
+    #       SOFTMAX
+    #           \
+    #            \
+    #             \
+    #             DROPOUT   RESHAPE||TRANSPOSE||GATHER||SQUEEZE
+    #                 \                 /
+    #                  \               /
+    #                   \             /
+    #                    \           /
+    #                     \         /
+    #                      \       /
+    #                        MATMUL
+    branch_matmul_nodes = reshape_squeeze_aliases + gather_aliases + transpose_aliases
+    softmax = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SOFTMAX", GraphPattern.METATYPE_ATTR: "softmax"})
+    dropout = pattern.add_node(**{GraphPattern.LABEL_ATTR: "DROPOUT", GraphPattern.METATYPE_ATTR: "dropout"})
+    matmul = pattern.add_node(**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: matmul_aliases})
+    matmul_branch_nodes = pattern.add_node(
+        **{GraphPattern.LABEL_ATTR: "NON_PATTERN", GraphPattern.METATYPE_ATTR: branch_matmul_nodes}
+    )
+    pattern.add_edge(softmax, dropout)
+    pattern.add_edge(dropout, matmul)
+    pattern.add_edge(matmul_branch_nodes, matmul)
+
+
 def _add_softmax_reshape_matmul(
     pattern: GraphPattern, matmul_aliases, reshape_squeeze_aliases, gather_aliases, transpose_aliases
 ) -> None:
@@ -67,8 +94,18 @@ def _add_softmax_reshape_matmul(
 
 @PT_IGNORED_PATTERNS.register(IgnoredPatternNames.MULTIHEAD_ATTENTION_OUTPUT)
 def create_multihead_attention_output() -> GraphPattern:
-    matmul_aliases = ["linear", "addmm", "matmul", "bmm", "mm", "baddbmm"]
-    reshape_squeeze_aliases = ["reshape", "view", "flatten", "squeeze", "unsqueeze", "squeeze", "flatten", "unsqueeze"]
+    matmul_aliases = ["linear", "addmm", "matmul", "bmm", "mm", "baddbmm", "__matmul__"]
+    reshape_squeeze_aliases = [
+        "reshape",
+        "view",
+        "flatten",
+        "squeeze",
+        "unsqueeze",
+        "squeeze",
+        "flatten",
+        "unsqueeze",
+        "unbind",
+    ]
     gather_aliases = ["gather", "index_select", "where", "index_select", "__getitem__"]
     transpose_aliases = ["transpose", "permute", "transpose_"]
 
@@ -80,6 +117,13 @@ def create_multihead_attention_output() -> GraphPattern:
         gather_aliases=gather_aliases,
         transpose_aliases=transpose_aliases,
     )
+    _add_softmax_dropout_matmul(
+        pattern,
+        matmul_aliases=matmul_aliases,
+        reshape_squeeze_aliases=reshape_squeeze_aliases,
+        gather_aliases=gather_aliases,
+        transpose_aliases=transpose_aliases,
+    )
     _add_softmax_reshape_matmul(
         pattern,
         matmul_aliases=matmul_aliases,

From 9698e6cf8e8c647dc553f4d3df516552272c19b7 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Wed, 4 Oct 2023 13:54:42 +0200
Subject: [PATCH 02/18] LayerNorm metatype was added to ignored metatypes for
 MinMax

---
 nncf/quantization/algorithms/min_max/torch_backend.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py
index 0a8fe5778c5..d31f4897641 100644
--- a/nncf/quantization/algorithms/min_max/torch_backend.py
+++ b/nncf/quantization/algorithms/min_max/torch_backend.py
@@ -315,6 +315,8 @@ def get_ignored_metatypes(model_type: ModelType, device: TargetDevice) -> List[O
                 om.PTDivMetatype,
                 om.PTMaxMetatype,
                 om.PTSqueezeMetatype,
+                om.PTLayerNormMetatype,
+                om.PTModuleLayerNormMetatype,
             ]
             if device != TargetDevice.CPU_SPR:
                 types.append(om.PTMulMetatype)

From 39d785b2169a56415ecd2c6bf92edc8b0c9f7574 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Thu, 5 Oct 2023 11:28:45 +0200
Subject: [PATCH 03/18] Add GroupNorm to ignored MinMax metatypes

---
 nncf/quantization/algorithms/min_max/torch_backend.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py
index d31f4897641..a4a4ef549aa 100644
--- a/nncf/quantization/algorithms/min_max/torch_backend.py
+++ b/nncf/quantization/algorithms/min_max/torch_backend.py
@@ -317,6 +317,8 @@ def get_ignored_metatypes(model_type: ModelType, device: TargetDevice) -> List[O
                 om.PTSqueezeMetatype,
                 om.PTLayerNormMetatype,
                 om.PTModuleLayerNormMetatype,
+                om.PTGroupNormMetatype,
+                om.PTModuleGroupNormMetatype,
             ]
             if device != TargetDevice.CPU_SPR:
                 types.append(om.PTMulMetatype)

From 4aa081103274234e60e18331cc782d9ef396b59f Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Thu, 5 Oct 2023 11:32:50 +0200
Subject: [PATCH 04/18] Duplicates are removed from ignored torch patterns

---
 nncf/torch/quantization/ignored_patterns.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/nncf/torch/quantization/ignored_patterns.py b/nncf/torch/quantization/ignored_patterns.py
index 00c48f49dfd..c49aac002f7 100644
--- a/nncf/torch/quantization/ignored_patterns.py
+++ b/nncf/torch/quantization/ignored_patterns.py
@@ -99,11 +99,8 @@ def create_multihead_attention_output() -> GraphPattern:
         "reshape",
         "view",
         "flatten",
-        "squeeze",
         "unsqueeze",
         "squeeze",
-        "flatten",
-        "unsqueeze",
         "unbind",
     ]
     gather_aliases = ["gather", "index_select", "where", "index_select", "__getitem__"]

From c6dcc08e87c0b63628954d1197bd978634c15cee Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Fri, 6 Oct 2023 18:10:49 +0200
Subject: [PATCH 05/18] Dropout removing pass is added to  function

---
 nncf/common/graph/graph.py                    | 12 ++-
 .../algorithms/accuracy_control/ranker.py     |  7 +-
 .../algorithms/min_max/algorithm.py           |  5 +-
 .../algorithms/min_max/backend.py             |  7 ++
 .../algorithms/min_max/onnx_backend.py        |  4 +
 .../algorithms/min_max/openvino_backend.py    |  4 +
 .../algorithms/min_max/torch_backend.py       |  4 +
 nncf/quantization/passes.py                   | 74 ++++++++++++++---
 nncf/torch/quantization/ignored_patterns.py   | 34 --------
 tests/common/quantization/test_passes.py      | 44 ++++++++++
 .../quantization/test_quantizer_removal.py    |  5 +-
 tests/post_training/test_templates/models.py  | 80 +++++++++++++++++++
 .../test_templates/test_ptq_params.py         | 12 +--
 .../test_templates/test_quantizer_config.py   | 10 ++-
 .../dropout_synthetic_model.dot               | 19 +++++
 15 files changed, 258 insertions(+), 63 deletions(-)
 create mode 100644 tests/common/quantization/test_passes.py
 create mode 100644 tests/torch/data/reference_graphs/passes/dropout_removed/dropout_synthetic_model.dot

diff --git a/nncf/common/graph/graph.py b/nncf/common/graph/graph.py
index 0b5b3cf3db4..9ec7fa2ebf0 100644
--- a/nncf/common/graph/graph.py
+++ b/nncf/common/graph/graph.py
@@ -594,12 +594,22 @@ def get_graph_for_structure_analysis(self, extended: bool = False) -> nx.DiGraph
             attrs_edge = {}
             u = u.replace(__RESERVED_DOT_CHARACTER, __CHARACTER_REPLACE_TO)
             v = v.replace(__RESERVED_DOT_CHARACTER, __CHARACTER_REPLACE_TO)
+            label = {}
+            if edge[NNCFGraph.PARALLEL_INPUT_PORT_IDS_ATTR]:
+                label["parallel_input_port_ids"] = edge[NNCFGraph.PARALLEL_INPUT_PORT_IDS_ATTR]
+
             if extended:
                 if edge[NNCFGraph.DTYPE_EDGE_ATTR] is Dtype.INTEGER:
                     attrs_edge["style"] = "dashed"
                 else:
                     attrs_edge["style"] = "solid"
-                attrs_edge["label"] = edge[NNCFGraph.ACTIVATION_SHAPE_EDGE_ATTR]
+                label["shape"] = edge[NNCFGraph.ACTIVATION_SHAPE_EDGE_ATTR]
+
+            if label:
+                if len(label) == 1 and extended:
+                    attrs_edge["label"] = label.popitem()[1]
+                else:
+                    attrs_edge["label"] = ", ".join((f"{k}:{v}" for k, v in label.items()))
             out_graph.add_edge(u, v, **attrs_edge)
         return out_graph
 
diff --git a/nncf/quantization/algorithms/accuracy_control/ranker.py b/nncf/quantization/algorithms/accuracy_control/ranker.py
index 58e78807685..9449aec74b3 100644
--- a/nncf/quantization/algorithms/accuracy_control/ranker.py
+++ b/nncf/quantization/algorithms/accuracy_control/ranker.py
@@ -28,7 +28,7 @@
 from nncf.quantization.algorithms.accuracy_control.evaluator import Evaluator
 from nncf.quantization.algorithms.accuracy_control.rank_functions import create_normalized_mse_func
 from nncf.quantization.algorithms.accuracy_control.subset_selection import select_subset
-from nncf.quantization.passes import remove_shapeof_subgraphs
+from nncf.quantization.passes import remove_shapeof_subgraphs_inplace
 
 TModel = TypeVar("TModel")
 TPModel = TypeVar("TPModel")
@@ -98,8 +98,9 @@ def find_groups_of_quantizers_to_rank(self, quantized_model_graph: NNCFGraph) ->
             if x.metatype in self._algo_backend.get_quantizer_metatypes()
         ]
 
-        quantized_model_graph_without_shapeof = remove_shapeof_subgraphs(
-            deepcopy(quantized_model_graph), self._algo_backend.get_shapeof_metatypes()
+        quantized_model_graph_without_shapeof = deepcopy(quantized_model_graph)
+        remove_shapeof_subgraphs_inplace(
+            quantized_model_graph_without_shapeof, self._algo_backend.get_shapeof_metatypes()
         )
 
         for quantizer_node in reversed(quantizers):
diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py
index 861e0226873..10342cdd198 100644
--- a/nncf/quantization/algorithms/min_max/algorithm.py
+++ b/nncf/quantization/algorithms/min_max/algorithm.py
@@ -505,7 +505,10 @@ def _get_quantization_target_points(
         hw_patterns = PatternsManager.get_full_hw_pattern_graph(backend=backend, device=device, model_type=model_type)
 
         inference_nncf_graph = transform_to_inference_graph(
-            deepcopy(nncf_graph), self._backend_entity.shapeof_metatypes, self._backend_entity.read_variable_metatypes
+            nncf_graph,
+            self._backend_entity.shapeof_metatypes,
+            self._backend_entity.dropout_metatypes,
+            self._backend_entity.read_variable_metatypes,
         )
 
         quantizer_setup = self._get_quantizer_setup(nncf_graph, inference_nncf_graph, hw_patterns, ignored_patterns)
diff --git a/nncf/quantization/algorithms/min_max/backend.py b/nncf/quantization/algorithms/min_max/backend.py
index 254a1c949cf..2e6018ffa7c 100644
--- a/nncf/quantization/algorithms/min_max/backend.py
+++ b/nncf/quantization/algorithms/min_max/backend.py
@@ -56,6 +56,13 @@ def shapeof_metatypes(self) -> List[OperatorMetatype]:
         Property for the backend-specific ShapeOf metatypes.
         """
 
+    @property
+    @abstractmethod
+    def dropout_metatypes(self) -> List[OperatorMetatype]:
+        """
+        Property for the backend-specific Dropout metatypes.
+        """
+
     @property
     @abstractmethod
     def conv_metatypes(self) -> List[OperatorMetatype]:
diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py
index 47cf5695832..6be88004ee0 100644
--- a/nncf/quantization/algorithms/min_max/onnx_backend.py
+++ b/nncf/quantization/algorithms/min_max/onnx_backend.py
@@ -60,6 +60,10 @@ def post_processing_metatypes(self) -> List[OperatorMetatype]:
     def shapeof_metatypes(self) -> List[OperatorMetatype]:
         return [om.ONNXShapeMetatype]
 
+    @property
+    def dropout_metatypes(self) -> List[OperatorMetatype]:
+        return []
+
     @property
     def conv_metatypes(self) -> List[OperatorMetatype]:
         return [om.ONNXConvolutionMetatype]
diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py
index 4ad4e309dc8..991e832155e 100644
--- a/nncf/quantization/algorithms/min_max/openvino_backend.py
+++ b/nncf/quantization/algorithms/min_max/openvino_backend.py
@@ -60,6 +60,10 @@ def post_processing_metatypes(self) -> List[OperatorMetatype]:
     def shapeof_metatypes(self) -> List[OperatorMetatype]:
         return [om.OVShapeOfMetatype]
 
+    @property
+    def dropout_metatypes(self) -> List[OperatorMetatype]:
+        return []
+
     @property
     def conv_metatypes(self) -> List[OperatorMetatype]:
         return [om.OVConvolutionMetatype]
diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py
index a4a4ef549aa..f1d33bfeaae 100644
--- a/nncf/quantization/algorithms/min_max/torch_backend.py
+++ b/nncf/quantization/algorithms/min_max/torch_backend.py
@@ -72,6 +72,10 @@ def post_processing_metatypes(self) -> List[OperatorMetatype]:
     def shapeof_metatypes(self) -> List[OperatorMetatype]:
         return []
 
+    @property
+    def dropout_metatypes(self) -> List[OperatorMetatype]:
+        return [om.PTDropoutMetatype]
+
     @property
     def conv_metatypes(self) -> List[OperatorMetatype]:
         return [om.PTModuleConv1dMetatype, om.PTModuleConv2dMetatype, om.PTModuleConv3dMetatype]
diff --git a/nncf/quantization/passes.py b/nncf/quantization/passes.py
index e6af74c8271..833e6c19f62 100644
--- a/nncf/quantization/passes.py
+++ b/nncf/quantization/passes.py
@@ -10,6 +10,7 @@
 # limitations under the License.
 
 import collections
+from copy import deepcopy
 from typing import List, Optional, TypeVar
 
 from nncf.common.graph.graph import NNCFGraph
@@ -23,6 +24,7 @@
 def transform_to_inference_graph(
     nncf_graph: NNCFGraph,
     shapeof_metatypes: List[OperatorMetatype],
+    dropout_metatypes: List[OperatorMetatype],
     read_variable_metatypes: Optional[List[OperatorMetatype]] = None,
 ) -> NNCFGraph:
     """
@@ -30,28 +32,30 @@ def transform_to_inference_graph(
 
     :param nncf_graph: NNCFGraph instance for the transformation.
     :param shapeof_metatypes: List of backend-specific ShapeOf metatypes.
+    :param dropout_metatypes: List of backend-specific Dropout metatypes.
     :param read_variable_metatypes: List of backend-specific metatypes
         that also can be interpreted as inputs (ReadValue).
     :return: NNCFGraph in the inference style.
     """
-    inference_nncf_graph = remove_shapeof_subgraphs(nncf_graph, shapeof_metatypes, read_variable_metatypes)
-    inference_nncf_graph = filter_constant_nodes(nncf_graph, read_variable_metatypes)
-    return inference_nncf_graph
+    inference_graph = deepcopy(nncf_graph)
+    remove_shapeof_subgraphs_inplace(inference_graph, shapeof_metatypes, read_variable_metatypes)
+    remove_dropout_nodes_inplace(inference_graph, dropout_metatypes)
+    filter_constant_nodes_inplace(inference_graph, read_variable_metatypes)
+    return inference_graph
 
 
-def remove_shapeof_subgraphs(
+def remove_shapeof_subgraphs_inplace(
     nncf_graph: NNCFGraph,
     shapeof_metatypes: List[OperatorMetatype],
     read_variable_metatypes: Optional[List[OperatorMetatype]] = None,
-) -> NNCFGraph:
+) -> None:
     """
-    Removes the ShapeOf subgraphs from the provided NNCFGraph instance.
+    Removes the ShapeOf subgraphs from the provided NNCFGraph instance inplace.
 
     :param nncf_graph: NNCFGraph instance for the transformation.
     :param shapeof_metatypes: List of backend-specific ShapeOf metatypes.
     :param read_variable_metatypes: List of backend-specific metatypes
         that also can be interpreted as inputs (ReadValue).
-    :return: NNCFGraph without ShapeOf subgraphs.
     """
     read_variable_metatypes = read_variable_metatypes if read_variable_metatypes else []
     nodes_to_drop = set()
@@ -85,20 +89,65 @@ def remove_shapeof_subgraphs(
             shape_of_queue.extend(nncf_graph.get_next_nodes(node) + nncf_graph.get_previous_nodes(node))
 
     nncf_graph.remove_nodes_from(nodes_to_drop)
-    return nncf_graph
 
 
-def filter_constant_nodes(
+def remove_dropout_nodes_inplace(
+    nncf_graph: NNCFGraph,
+    dropout_metatypes: List[OperatorMetatype],
+) -> None:
+    """
+    Removes the Dropout nodes from the provided NNCFGraph instance and
+        connects droput previous node with dropout next nodes inplace
+        for each dropout node.
+
+    :param nncf_graph: NNCFGraph instance for the transformation.
+    :param dropout_metatypes: List of backend-specific Dropout metatypes.
+    """
+    if not dropout_metatypes:
+        return
+
+    nodes_to_drop = []
+    for node in nncf_graph.get_all_nodes():
+        if node.metatype in dropout_metatypes:
+            nodes_to_drop.append(node)
+
+            prev_nodes = nncf_graph.get_previous_nodes(node)
+            input_edges = nncf_graph.get_input_edges(node)
+            assert len(prev_nodes) == len(input_edges) == 1
+            prev_node = prev_nodes[0]
+            input_edge = input_edges[0]
+            assert not input_edge.parallel_input_port_ids
+
+            # nncf_graph.get_next_edges is not used to preserve
+            # parallel_input_port_ids
+            for output_node in nncf_graph.get_next_nodes(node):
+                output_edge = nncf_graph.get_edge(node, output_node)
+                # Connects dropout previous node with all next nodes
+                # to keep NNCFGraph connected.
+                assert input_edge.dtype == output_edge.dtype
+                assert input_edge.tensor_shape == output_edge.tensor_shape
+                nncf_graph.add_edge_between_nncf_nodes(
+                    from_node_id=prev_node.node_id,
+                    to_node_id=output_edge.to_node.node_id,
+                    tensor_shape=input_edge.tensor_shape,
+                    input_port_id=output_edge.input_port_id,
+                    output_port_id=input_edge.output_port_id,
+                    dtype=input_edge.dtype,
+                    parallel_input_port_ids=output_edge.parallel_input_port_ids,
+                )
+    nncf_graph.remove_nodes_from(nodes_to_drop)
+
+
+def filter_constant_nodes_inplace(
     nncf_graph: NNCFGraph, read_variable_metatypes: Optional[List[OperatorMetatype]] = None
-) -> NNCFGraph:
+) -> None:
     """
-    Removes all Constant nodes from NNCFGraph, making it inference graph.
+    Removes all Constant nodes from NNCFGraph inplace, making it inference graph.
     The traversing starts from the input nodes and nodes with weights.
 
     :param nncf_graph: NNCFGraph instance for the transformation.
     :param read_variable_metatypes: List of backend-specific metatypes
         that also can be interpreted as inputs (ReadValue).
-    :return: NNCFGraph without Constant nodes.
     """
     read_variable_metatypes = read_variable_metatypes if read_variable_metatypes else []
     input_nodes = nncf_graph.get_input_nodes()
@@ -119,7 +168,6 @@ def filter_constant_nodes(
         nodes_queue.extend(nncf_graph.get_next_nodes(node))
     constant_nodes = [node for node in nncf_graph.get_all_nodes() if node not in visited_nodes]
     nncf_graph.remove_nodes_from(constant_nodes)
-    return nncf_graph
 
 
 def insert_null_biases_pass(model: TModel, graph: NNCFGraph) -> TModel:
diff --git a/nncf/torch/quantization/ignored_patterns.py b/nncf/torch/quantization/ignored_patterns.py
index c49aac002f7..799a709a1fb 100644
--- a/nncf/torch/quantization/ignored_patterns.py
+++ b/nncf/torch/quantization/ignored_patterns.py
@@ -35,33 +35,6 @@ def _add_softmax_matmul(
     pattern.add_edge(matmul_branch_nodes, matmul)
 
 
-def _add_softmax_dropout_matmul(
-    pattern: GraphPattern, matmul_aliases, reshape_squeeze_aliases, gather_aliases, transpose_aliases
-) -> None:
-    #       SOFTMAX
-    #           \
-    #            \
-    #             \
-    #             DROPOUT   RESHAPE||TRANSPOSE||GATHER||SQUEEZE
-    #                 \                 /
-    #                  \               /
-    #                   \             /
-    #                    \           /
-    #                     \         /
-    #                      \       /
-    #                        MATMUL
-    branch_matmul_nodes = reshape_squeeze_aliases + gather_aliases + transpose_aliases
-    softmax = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SOFTMAX", GraphPattern.METATYPE_ATTR: "softmax"})
-    dropout = pattern.add_node(**{GraphPattern.LABEL_ATTR: "DROPOUT", GraphPattern.METATYPE_ATTR: "dropout"})
-    matmul = pattern.add_node(**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: matmul_aliases})
-    matmul_branch_nodes = pattern.add_node(
-        **{GraphPattern.LABEL_ATTR: "NON_PATTERN", GraphPattern.METATYPE_ATTR: branch_matmul_nodes}
-    )
-    pattern.add_edge(softmax, dropout)
-    pattern.add_edge(dropout, matmul)
-    pattern.add_edge(matmul_branch_nodes, matmul)
-
-
 def _add_softmax_reshape_matmul(
     pattern: GraphPattern, matmul_aliases, reshape_squeeze_aliases, gather_aliases, transpose_aliases
 ) -> None:
@@ -114,13 +87,6 @@ def create_multihead_attention_output() -> GraphPattern:
         gather_aliases=gather_aliases,
         transpose_aliases=transpose_aliases,
     )
-    _add_softmax_dropout_matmul(
-        pattern,
-        matmul_aliases=matmul_aliases,
-        reshape_squeeze_aliases=reshape_squeeze_aliases,
-        gather_aliases=gather_aliases,
-        transpose_aliases=transpose_aliases,
-    )
     _add_softmax_reshape_matmul(
         pattern,
         matmul_aliases=matmul_aliases,
diff --git a/tests/common/quantization/test_passes.py b/tests/common/quantization/test_passes.py
new file mode 100644
index 00000000000..ed352cdfdfc
--- /dev/null
+++ b/tests/common/quantization/test_passes.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2023 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from enum import Enum
+
+import pytest
+
+from nncf.quantization.passes import remove_dropout_nodes_inplace
+from tests.post_training.test_templates.models import NNCFGraphDropoutRemovingCase
+from tests.torch.test_compressed_graph import check_graph
+
+REF_DIR = "passes/dropout_removed"
+
+
+class TestModes(Enum):
+    VALID = "valid"
+    WRONG_TENSOR_SHAPE = "wrong_dropout_node"
+    WRONG_PARALLEL_EDGES = "wrong_parallel_edges"
+
+
+@pytest.mark.parametrize("mode", [TestModes.VALID, TestModes.WRONG_TENSOR_SHAPE, TestModes.WRONG_PARALLEL_EDGES])
+def test_remove_dropout_nodes_inplace(mode: TestModes):
+    dot_reference_path = "dropout_synthetic_model.dot"
+    dropout_metatype = "DROPOUT_METATYPE"
+    kwargs = {}
+    if mode != TestModes.VALID:
+        kwargs.update({mode.value: True})
+
+    nncf_graph = NNCFGraphDropoutRemovingCase(dropout_metatype, **kwargs).nncf_graph
+    if mode != TestModes.VALID:
+        with pytest.raises(AssertionError):
+            remove_dropout_nodes_inplace(nncf_graph, [dropout_metatype])
+        return
+
+    remove_dropout_nodes_inplace(nncf_graph, [dropout_metatype])
+    check_graph(nncf_graph, dot_reference_path, REF_DIR)
diff --git a/tests/common/quantization/test_quantizer_removal.py b/tests/common/quantization/test_quantizer_removal.py
index 159095fd0ae..bb46b6585e0 100644
--- a/tests/common/quantization/test_quantizer_removal.py
+++ b/tests/common/quantization/test_quantizer_removal.py
@@ -18,7 +18,7 @@
 from nncf.common.graph import NNCFGraph
 from nncf.common.graph.layer_attributes import Dtype
 from nncf.common.quantization.quantizer_removal import find_quantizer_nodes_to_cut
-from nncf.quantization.passes import remove_shapeof_subgraphs
+from nncf.quantization.passes import remove_shapeof_subgraphs_inplace
 from tests.common.quantization.metatypes import CONSTANT_METATYPES
 from tests.common.quantization.metatypes import METATYPES_FOR_TEST
 from tests.common.quantization.metatypes import QUANTIZABLE_METATYPES
@@ -226,7 +226,8 @@ def create_test_params():
 @pytest.mark.parametrize("nncf_graph,test_case", create_test_params())
 def test_find_quantizer_nodes_to_cut(nncf_graph: NNCFGraph, test_case: TestCase):
     quantizer_node = nncf_graph.get_node_by_name(test_case.node_name)
-    nncf_graph_without_shapeof = remove_shapeof_subgraphs(deepcopy(nncf_graph), SHAPEOF_METATYPES)
+    nncf_graph_without_shapeof = deepcopy(nncf_graph)
+    remove_shapeof_subgraphs_inplace(nncf_graph_without_shapeof, SHAPEOF_METATYPES)
     nodes, ops = find_quantizer_nodes_to_cut(
         nncf_graph_without_shapeof,
         quantizer_node,
diff --git a/tests/post_training/test_templates/models.py b/tests/post_training/test_templates/models.py
index 546a4104318..e22a012c34a 100644
--- a/tests/post_training/test_templates/models.py
+++ b/tests/post_training/test_templates/models.py
@@ -10,6 +10,7 @@
 # limitations under the License.
 
 from nncf.common.graph import NNCFGraph
+from nncf.common.graph.layer_attributes import Dtype
 from nncf.common.graph.operator_metatypes import InputNoopMetatype
 from nncf.common.graph.operator_metatypes import OutputNoopMetatype
 from tests.common.quantization.metatypes import ConstantTestMetatype
@@ -218,3 +219,82 @@ def __init__(
             node_edges.extend([("Conv_2", "Output_1")])
         original_mock_graph = create_mock_graph(nodes, node_edges)
         self.nncf_graph = get_nncf_graph_from_mock_nx_graph(original_mock_graph, nncf_graph_cls)
+
+
+class NNCFGraphDropoutRemovingCase:
+    def __init__(
+        self,
+        dropout_metatype,
+        wrong_dropout_node: bool = False,
+        wrong_parallel_edges: bool = False,
+        nncf_graph_cls=NNCFGraph,
+    ):
+        nodes = [
+            NodeWithType("Input_1", InputNoopMetatype),
+            NodeWithType("Split_1", None),
+            NodeWithType(
+                "Dropout_1",
+                dropout_metatype,
+            ),
+            NodeWithType("Output_1", OutputNoopMetatype),
+            NodeWithType(
+                "Dropout_2",
+                dropout_metatype,
+            ),
+            NodeWithType("Output_2_1", OutputNoopMetatype),
+            NodeWithType("Output_2_2", OutputNoopMetatype),
+            NodeWithType("Output_2_3", OutputNoopMetatype),
+            NodeWithType(
+                "Dropout_3",
+                dropout_metatype,
+            ),
+            NodeWithType("Output_3", OutputNoopMetatype),
+        ]
+        node_edges = [
+            ("Input_1", "Split_1"),
+            ("Split_1", "Dropout_1"),
+            ("Dropout_1", "Output_1"),
+            ("Split_1", "Dropout_2"),
+            ("Dropout_2", "Output_2_1"),
+            ("Dropout_2", "Output_2_2"),
+            ("Dropout_2", "Output_2_3"),
+            ("Split_1", "Dropout_3"),
+            ("Dropout_3", "Output_3"),
+        ]
+        original_mock_graph = create_mock_graph(nodes, node_edges)
+        self.nncf_graph = get_nncf_graph_from_mock_nx_graph(original_mock_graph, nncf_graph_cls)
+
+        dropout_2 = self.nncf_graph.get_node_by_key("3 /Dropout_2_0")
+        output = self.nncf_graph.add_nncf_node("/Output_2_4_0", "output", OutputNoopMetatype)
+        tensor_shape = [1, 2, 1, 1] if wrong_dropout_node else [1, 1, 1, 1]
+        self.nncf_graph.add_edge_between_nncf_nodes(
+            dropout_2.node_id,
+            output.node_id,
+            tensor_shape=tensor_shape,
+            input_port_id=15,
+            output_port_id=1,
+            dtype=Dtype.FLOAT,
+        )
+
+        dropout_2 = self.nncf_graph.get_node_by_key("4 /Dropout_3_0")
+        output = self.nncf_graph.add_nncf_node("/Output_3_1_0", "output", OutputNoopMetatype)
+        self.nncf_graph.add_edge_between_nncf_nodes(
+            dropout_2.node_id,
+            output.node_id,
+            tensor_shape=tensor_shape,
+            input_port_id=1,
+            output_port_id=1,
+            dtype=Dtype.FLOAT,
+            parallel_input_port_ids=list(range(2, 10)),
+        )
+        if wrong_parallel_edges:
+            dropout_4 = self.nncf_graph.add_nncf_node("100 /dropoiut", "dropout", dropout_metatype)
+            self.nncf_graph.add_edge_between_nncf_nodes(
+                self.nncf_graph.get_node_by_key("0 /Input_1_0").node_id,
+                dropout_4.node_id,
+                tensor_shape=[1, 1, 1, 1],
+                input_port_id=0,
+                output_port_id=0,
+                dtype=Dtype.FLOAT,
+                parallel_input_port_ids=list(range(1, 10)),
+            )
diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py
index a2ec340c2ff..84c1332baed 100644
--- a/tests/post_training/test_templates/test_ptq_params.py
+++ b/tests/post_training/test_templates/test_ptq_params.py
@@ -10,7 +10,6 @@
 # limitations under the License.
 from abc import abstractmethod
 from collections import Counter
-from copy import deepcopy
 from typing import Dict
 
 import pytest
@@ -173,8 +172,9 @@ def test_quantize_outputs(self, test_params, quantize_outputs):
         hw_patterns = test_params["test_model_type_pass"]["hw_patterns"]
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
         inference_nncf_graph = transform_to_inference_graph(
-            deepcopy(nncf_graph),
+            nncf_graph,
             min_max_algo._backend_entity.shapeof_metatypes,
+            min_max_algo._backend_entity.dropout_metatypes,
             min_max_algo._backend_entity.read_variable_metatypes,
         )
         q_setup = min_max_algo._get_quantizer_setup(nncf_graph, inference_nncf_graph, hw_patterns, ignored_patterns)
@@ -198,8 +198,9 @@ def test_ignored_scopes(self, test_params, ignored_scopes_data):
         hw_patterns = test_params["test_model_type_pass"]["hw_patterns"]
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
         inference_nncf_graph = transform_to_inference_graph(
-            deepcopy(nncf_graph),
+            nncf_graph,
             min_max_algo._backend_entity.shapeof_metatypes,
+            min_max_algo._backend_entity.dropout_metatypes,
             min_max_algo._backend_entity.read_variable_metatypes,
         )
         q_setup = min_max_algo._get_quantizer_setup(nncf_graph, inference_nncf_graph, hw_patterns, ignored_patterns)
@@ -223,8 +224,9 @@ def test_model_type_pass(self, test_params, model_type):
         hw_patterns = test_params["test_model_type_pass"]["hw_patterns"]
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
         inference_nncf_graph = transform_to_inference_graph(
-            deepcopy(nncf_graph),
+            nncf_graph,
             min_max_algo._backend_entity.shapeof_metatypes,
+            min_max_algo._backend_entity.dropout_metatypes,
             min_max_algo._backend_entity.read_variable_metatypes,
         )
         q_setup = min_max_algo._get_quantizer_setup(nncf_graph, inference_nncf_graph, hw_patterns, ignored_patterns)
@@ -288,7 +290,7 @@ def test_quantization_points_overflow_fix(self, overflow_fix, affected_target_po
     @pytest.mark.parametrize("validate_scopes", (True, False))
     def test_validate_scope(self, test_params, validate_scopes):
         nncf_graph = test_params["test_model_type_pass"]["nncf_graph"]
-        inference_nncf_graph = transform_to_inference_graph(deepcopy(nncf_graph), [])
+        inference_nncf_graph = transform_to_inference_graph(nncf_graph, [], [])
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
         algo = MinMaxQuantization(
             ignored_scope=IgnoredScope(names=["some_node"], validate=validate_scopes),
diff --git a/tests/post_training/test_templates/test_quantizer_config.py b/tests/post_training/test_templates/test_quantizer_config.py
index 72da2111a36..2f73dc3df78 100644
--- a/tests/post_training/test_templates/test_quantizer_config.py
+++ b/tests/post_training/test_templates/test_quantizer_config.py
@@ -10,7 +10,6 @@
 # limitations under the License.
 
 from abc import abstractmethod
-from copy import deepcopy
 from dataclasses import dataclass
 from typing import List
 
@@ -91,8 +90,9 @@ def test_default_quantizer_config(self, single_conv_nncf_graph):
         min_max_algo._backend_entity = self.get_algo_backend()
         nncf_graph = single_conv_nncf_graph.nncf_graph
         inference_nncf_graph = transform_to_inference_graph(
-            deepcopy(nncf_graph),
+            nncf_graph,
             min_max_algo._backend_entity.shapeof_metatypes,
+            min_max_algo._backend_entity.dropout_metatypes,
             min_max_algo._backend_entity.read_variable_metatypes,
         )
         q_setup = min_max_algo._get_quantizer_setup(
@@ -147,8 +147,9 @@ def test_quantizer_config_from_ptq_params_for_CPU(
         min_max_algo._backend_entity = self.get_algo_backend()
         nncf_graph = single_conv_nncf_graph.nncf_graph
         inference_nncf_graph = transform_to_inference_graph(
-            deepcopy(nncf_graph),
+            nncf_graph,
             min_max_algo._backend_entity.shapeof_metatypes,
+            min_max_algo._backend_entity.dropout_metatypes,
             min_max_algo._backend_entity.read_variable_metatypes,
         )
         if signed_weights is False or signed_activations in [True, False]:  # Incompatible with HW CPU config
@@ -189,8 +190,9 @@ def test_depthwise_conv_default_quantizer_config(self, depthwise_conv_nncf_graph
         min_max_algo._backend_entity = self.get_algo_backend()
         nncf_graph = depthwise_conv_nncf_graph.nncf_graph
         inference_nncf_graph = transform_to_inference_graph(
-            deepcopy(nncf_graph),
+            nncf_graph,
             min_max_algo._backend_entity.shapeof_metatypes,
+            min_max_algo._backend_entity.dropout_metatypes,
             min_max_algo._backend_entity.read_variable_metatypes,
         )
         q_setup = min_max_algo._get_quantizer_setup(
diff --git a/tests/torch/data/reference_graphs/passes/dropout_removed/dropout_synthetic_model.dot b/tests/torch/data/reference_graphs/passes/dropout_removed/dropout_synthetic_model.dot
new file mode 100644
index 00000000000..abbe8c0b642
--- /dev/null
+++ b/tests/torch/data/reference_graphs/passes/dropout_removed/dropout_synthetic_model.dot
@@ -0,0 +1,19 @@
+strict digraph  {
+"0 /Input_1_0" [id=0, type=Input_1];
+"1 /Split_1_0" [id=1, type=Split_1];
+"5 /Output_1_0" [id=5, type=Output_1];
+"6 /Output_2_1_0" [id=6, type=Output_2_1];
+"7 /Output_2_2_0" [id=7, type=Output_2_2];
+"8 /Output_2_3_0" [id=8, type=Output_2_3];
+"9 /Output_3_0" [id=9, type=Output_3];
+"10 /Output_2_4_0" [id=10, type=output];
+"11 /Output_3_1_0" [id=11, type=output];
+"0 /Input_1_0" -> "1 /Split_1_0";
+"1 /Split_1_0" -> "5 /Output_1_0";
+"1 /Split_1_0" -> "6 /Output_2_1_0";
+"1 /Split_1_0" -> "7 /Output_2_2_0";
+"1 /Split_1_0" -> "8 /Output_2_3_0";
+"1 /Split_1_0" -> "10 /Output_2_4_0";
+"1 /Split_1_0" -> "9 /Output_3_0";
+"1 /Split_1_0" -> "11 /Output_3_1_0"  [label="parallel_input_port_ids:[2, 3, 4, 5, 6, 7, 8, 9]"];
+}

From 48da94eca5d47f517780d24e79ba6e3279623bef Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Mon, 9 Oct 2023 12:26:04 +0200
Subject: [PATCH 06/18] metatypes_to_ignore quantization propagation solver
 test

---
 .../test_quantizer_propagation_solver.py      | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tests/common/quantization/test_quantizer_propagation_solver.py b/tests/common/quantization/test_quantizer_propagation_solver.py
index dc065a2e7e2..1f7495bd1c4 100644
--- a/tests/common/quantization/test_quantizer_propagation_solver.py
+++ b/tests/common/quantization/test_quantizer_propagation_solver.py
@@ -1839,3 +1839,28 @@ def test_quantizers_are_not_set_up_for_integer_inputs(self, ip_graph_with_int_ed
         assert double_input_pq.current_location_node_key == InsertionPointGraph.get_pre_hook_node_key(
             "5 /E_0", input_port_id=1
         )
+
+
+def test_metatypes_to_ignore(mocker):
+    # pylint: disable=protected-access
+    NOT_IGNORED_METATYHPE = "not_ignored_metatype"
+    IGNORED_METATYPE = "target_metatype"
+
+    nncf_graph = NNCFGraph()
+    nodes = []
+    for node_name, node_metatype in zip("ABC", [NOT_IGNORED_METATYHPE, IGNORED_METATYPE, NOT_IGNORED_METATYHPE]):
+        nodes.append(nncf_graph.add_nncf_node(node_name, node_name, node_metatype=node_metatype))
+    for idx in range(1, len(nodes)):
+        nncf_graph.add_edge_between_nncf_nodes(
+            nodes[idx - 1].node_id, nodes[idx].node_id, [1, 1, 1, 1], 0, 0, Dtype.FLOAT
+        )
+    ip_graph = InsertionPointGraph(nncf_graph=nncf_graph, weight_modifiable_node_names=["A", "B", "C"])
+
+    solver = QuantizerPropagationSolver(
+        metatypes_to_ignore=[IGNORED_METATYPE],
+    )
+    solver._add_node_to_ignored = mocker.MagicMock()
+    solver.run_on_ip_graph(ip_graph)
+
+    solver._add_node_to_ignored.assert_called_once()
+    assert "1 B" in solver._add_node_to_ignored.call_args[0]

From 42fa63b3ac0ab7ffa1c80b4ef351aa251ecb9e25 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Mon, 9 Oct 2023 15:56:07 +0200
Subject: [PATCH 07/18] Fix test_passes

---
 .../passes}/dropout_synthetic_model.dot             |  0
 tests/common/quantization/test_passes.py            | 13 +++++++++----
 2 files changed, 9 insertions(+), 4 deletions(-)
 rename tests/{torch/data/reference_graphs/passes/dropout_removed => common/data/reference_graphs/passes}/dropout_synthetic_model.dot (100%)

diff --git a/tests/torch/data/reference_graphs/passes/dropout_removed/dropout_synthetic_model.dot b/tests/common/data/reference_graphs/passes/dropout_synthetic_model.dot
similarity index 100%
rename from tests/torch/data/reference_graphs/passes/dropout_removed/dropout_synthetic_model.dot
rename to tests/common/data/reference_graphs/passes/dropout_synthetic_model.dot
diff --git a/tests/common/quantization/test_passes.py b/tests/common/quantization/test_passes.py
index ed352cdfdfc..76014ef0911 100644
--- a/tests/common/quantization/test_passes.py
+++ b/tests/common/quantization/test_passes.py
@@ -10,14 +10,16 @@
 # limitations under the License.
 
 from enum import Enum
+from pathlib import Path
 
 import pytest
 
 from nncf.quantization.passes import remove_dropout_nodes_inplace
 from tests.post_training.test_templates.models import NNCFGraphDropoutRemovingCase
-from tests.torch.test_compressed_graph import check_graph
+from tests.shared.nx_graph import compare_nx_graph_with_reference
+from tests.shared.paths import TEST_ROOT
 
-REF_DIR = "passes/dropout_removed"
+DATA_ROOT = TEST_ROOT / "common" / "data" / "reference_graphs"
 
 
 class TestModes(Enum):
@@ -28,7 +30,7 @@ class TestModes(Enum):
 
 @pytest.mark.parametrize("mode", [TestModes.VALID, TestModes.WRONG_TENSOR_SHAPE, TestModes.WRONG_PARALLEL_EDGES])
 def test_remove_dropout_nodes_inplace(mode: TestModes):
-    dot_reference_path = "dropout_synthetic_model.dot"
+    dot_reference_path = Path("passes") / "dropout_synthetic_model.dot"
     dropout_metatype = "DROPOUT_METATYPE"
     kwargs = {}
     if mode != TestModes.VALID:
@@ -41,4 +43,7 @@ def test_remove_dropout_nodes_inplace(mode: TestModes):
         return
 
     remove_dropout_nodes_inplace(nncf_graph, [dropout_metatype])
-    check_graph(nncf_graph, dot_reference_path, REF_DIR)
+
+    nx_graph = nncf_graph.get_graph_for_structure_analysis()
+    path_to_dot = DATA_ROOT / dot_reference_path
+    compare_nx_graph_with_reference(nx_graph, path_to_dot, check_edge_attrs=True)

From 8abc6ea46f891ceb57b80c314b79489c7d3bf564 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Wed, 11 Oct 2023 18:44:41 +0200
Subject: [PATCH 08/18] get_inference_graph method is refactored

---
 .../algorithms/min_max/algorithm.py           |  8 +-----
 .../algorithms/min_max/backend.py             | 28 +++++--------------
 .../algorithms/min_max/onnx_backend.py        | 22 +++++++--------
 .../algorithms/min_max/openvino_backend.py    | 27 ++++++++++--------
 .../algorithms/min_max/torch_backend.py       | 20 ++++++-------
 nncf/quantization/passes.py                   | 24 ----------------
 .../test_templates/test_ptq_params.py         | 25 ++++-------------
 .../test_templates/test_quantizer_config.py   | 22 ++-------------
 8 files changed, 49 insertions(+), 127 deletions(-)

diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py
index 10342cdd198..43796076bac 100644
--- a/nncf/quantization/algorithms/min_max/algorithm.py
+++ b/nncf/quantization/algorithms/min_max/algorithm.py
@@ -54,7 +54,6 @@
 from nncf.quantization.algorithms.min_max.backend import ALGO_BACKENDS
 from nncf.quantization.fake_quantize import calculate_quantizer_parameters
 from nncf.quantization.fake_quantize import get_quantizer_narrow_range
-from nncf.quantization.passes import transform_to_inference_graph
 from nncf.quantization.range_estimator import RangeEstimatorParameters
 from nncf.quantization.range_estimator import RangeEstimatorParametersSet
 from nncf.scopes import IgnoredScope
@@ -504,12 +503,7 @@ def _get_quantization_target_points(
         )
         hw_patterns = PatternsManager.get_full_hw_pattern_graph(backend=backend, device=device, model_type=model_type)
 
-        inference_nncf_graph = transform_to_inference_graph(
-            nncf_graph,
-            self._backend_entity.shapeof_metatypes,
-            self._backend_entity.dropout_metatypes,
-            self._backend_entity.read_variable_metatypes,
-        )
+        inference_nncf_graph = self._backend_entity.transform_to_inference_graph(nncf_graph)
 
         quantizer_setup = self._get_quantizer_setup(nncf_graph, inference_nncf_graph, hw_patterns, ignored_patterns)
         self._apply_model_type_pass(self._model_type, quantizer_setup, nncf_graph)
diff --git a/nncf/quantization/algorithms/min_max/backend.py b/nncf/quantization/algorithms/min_max/backend.py
index 2e6018ffa7c..8a95a5e83b5 100644
--- a/nncf/quantization/algorithms/min_max/backend.py
+++ b/nncf/quantization/algorithms/min_max/backend.py
@@ -49,20 +49,6 @@ def post_processing_metatypes(self) -> List[OperatorMetatype]:
         Property for the backend-specific post-processing metatypes (NonMaximumSupression, TopK, etc.).
         """
 
-    @property
-    @abstractmethod
-    def shapeof_metatypes(self) -> List[OperatorMetatype]:
-        """
-        Property for the backend-specific ShapeOf metatypes.
-        """
-
-    @property
-    @abstractmethod
-    def dropout_metatypes(self) -> List[OperatorMetatype]:
-        """
-        Property for the backend-specific Dropout metatypes.
-        """
-
     @property
     @abstractmethod
     def conv_metatypes(self) -> List[OperatorMetatype]:
@@ -77,13 +63,6 @@ def overflow_fix_metatypes(self) -> List[OperatorMetatype]:
         Property for the backend-specific metatypes for which overflow_fix is applicable.
         """
 
-    @property
-    @abstractmethod
-    def read_variable_metatypes(self) -> List[OperatorMetatype]:
-        """
-        Property for the backend-specific metatypes that also can be interpreted as inputs (ReadValue).
-        """
-
     @property
     @abstractmethod
     def add_metatypes(self) -> List[OperatorMetatype]:
@@ -181,6 +160,13 @@ def get_statistic_collector(
         :return: Backend-specific TensorStatisticCollectorBase for the statistics calculation.
         """
 
+    @staticmethod
+    @abstractmethod
+    def transform_to_inference_graph(graph: NNCFGraph) -> NNCFGraph:
+        """
+        Returns inference NNCFGraph without constant flows and training time operations.
+        """
+
     @staticmethod
     @abstractmethod
     def get_weight_tensor_port_ids(node: NNCFNode) -> List[Optional[int]]:
diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py
index 6be88004ee0..57e667ff335 100644
--- a/nncf/quantization/algorithms/min_max/onnx_backend.py
+++ b/nncf/quantization/algorithms/min_max/onnx_backend.py
@@ -9,6 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from copy import deepcopy
 from typing import Dict, List, Optional, Set, Union
 
 import numpy as np
@@ -42,6 +43,7 @@
 from nncf.quantization.algorithms.min_max.backend import ALGO_BACKENDS
 from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend
 from nncf.quantization.fake_quantize import FakeQuantizeParameters
+from nncf.quantization.passes import remove_shapeof_subgraphs_inplace
 from nncf.quantization.range_estimator import RangeEstimatorParameters
 
 
@@ -56,14 +58,6 @@ def mat_mul_metatypes(self) -> List[OperatorMetatype]:
     def post_processing_metatypes(self) -> List[OperatorMetatype]:
         return [om.ONNXTopKMetatype, om.ONNXNonMaxSuppressionMetatype]
 
-    @property
-    def shapeof_metatypes(self) -> List[OperatorMetatype]:
-        return [om.ONNXShapeMetatype]
-
-    @property
-    def dropout_metatypes(self) -> List[OperatorMetatype]:
-        return []
-
     @property
     def conv_metatypes(self) -> List[OperatorMetatype]:
         return [om.ONNXConvolutionMetatype]
@@ -72,10 +66,6 @@ def conv_metatypes(self) -> List[OperatorMetatype]:
     def overflow_fix_metatypes(self) -> List[OperatorMetatype]:
         return [om.ONNXConvolutionMetatype, om.ONNXConvolutionTransposeMetatype, *MATMUL_METATYPES]
 
-    @property
-    def read_variable_metatypes(self) -> List[OperatorMetatype]:
-        return []
-
     @property
     def add_metatypes(self) -> List[OperatorMetatype]:
         return [om.ONNXAddLayerMetatype]
@@ -174,6 +164,14 @@ def get_statistic_collector(
             f"{str(range_estimator_params)}"
         )
 
+    @staticmethod
+    def transform_to_inference_graph(graph: NNCFGraph) -> NNCFGraph:
+        inference_graph = deepcopy(graph)
+        remove_shapeof_subgraphs_inplace(
+            nncf_graph=inference_graph, shapeof_metatypes=[om.ONNXShapeMetatype], read_variable_metatypes=[]
+        )
+        return inference_graph
+
     @staticmethod
     def get_weight_tensor_port_ids(node: NNCFNode) -> List[Optional[int]]:
         return list(node.layer_attributes.weight_attrs.keys())
diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py
index 991e832155e..f1d48bd0c55 100644
--- a/nncf/quantization/algorithms/min_max/openvino_backend.py
+++ b/nncf/quantization/algorithms/min_max/openvino_backend.py
@@ -9,6 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from copy import deepcopy
 from typing import Dict, List, Optional, Set, Tuple
 
 import numpy as np
@@ -43,6 +44,8 @@
 from nncf.quantization.algorithms.min_max.backend import ALGO_BACKENDS
 from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend
 from nncf.quantization.fake_quantize import FakeQuantizeParameters
+from nncf.quantization.passes import filter_constant_nodes_inplace
+from nncf.quantization.passes import remove_shapeof_subgraphs_inplace
 
 
 # pylint:disable=too-many-public-methods
@@ -56,14 +59,6 @@ def mat_mul_metatypes(self) -> List[OperatorMetatype]:
     def post_processing_metatypes(self) -> List[OperatorMetatype]:
         return [om.OVTopKMetatype, om.OVNonMaxSuppressionMetatype]
 
-    @property
-    def shapeof_metatypes(self) -> List[OperatorMetatype]:
-        return [om.OVShapeOfMetatype]
-
-    @property
-    def dropout_metatypes(self) -> List[OperatorMetatype]:
-        return []
-
     @property
     def conv_metatypes(self) -> List[OperatorMetatype]:
         return [om.OVConvolutionMetatype]
@@ -78,10 +73,6 @@ def overflow_fix_metatypes(self) -> List[OperatorMetatype]:
             om.OVMatMulMetatype,
         ]
 
-    @property
-    def read_variable_metatypes(self) -> List[OperatorMetatype]:
-        return [om.OVReadValueMetatype]
-
     @property
     def add_metatypes(self) -> List[OperatorMetatype]:
         return [om.OVAddMetatype]
@@ -204,6 +195,18 @@ def get_statistic_collector(
             collector.register_statistic_branch(container_key, reducer, aggregator)
         return collector
 
+    @staticmethod
+    def transform_to_inference_graph(graph: NNCFGraph) -> NNCFGraph:
+        inference_graph = deepcopy(graph)
+        read_variable_metatypes = [om.OVReadValueMetatype]
+        remove_shapeof_subgraphs_inplace(
+            nncf_graph=inference_graph,
+            shapeof_metatypes=[om.OVShapeOfMetatype],
+            read_variable_metatypes=read_variable_metatypes,
+        )
+        filter_constant_nodes_inplace(nncf_graph=inference_graph, read_variable_metatypes=read_variable_metatypes)
+        return inference_graph
+
     @staticmethod
     def get_weight_tensor_port_ids(node: NNCFNode) -> List[Optional[int]]:
         return node.layer_attributes.get_const_port_ids()
diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py
index f1d33bfeaae..a78cfde6686 100644
--- a/nncf/quantization/algorithms/min_max/torch_backend.py
+++ b/nncf/quantization/algorithms/min_max/torch_backend.py
@@ -9,6 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from copy import deepcopy
 from typing import Dict, List, Optional, Set, Tuple
 
 import numpy as np
@@ -34,6 +35,7 @@
 from nncf.quantization.algorithms.min_max.backend import ALGO_BACKENDS
 from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend
 from nncf.quantization.fake_quantize import FakeQuantizeParameters
+from nncf.quantization.passes import remove_dropout_nodes_inplace
 from nncf.quantization.range_estimator import RangeEstimatorParameters
 from nncf.torch.graph.graph import PTTargetPoint
 from nncf.torch.graph.transformations.commands import PTQuantizerInsertionCommand
@@ -68,14 +70,6 @@ def mat_mul_metatypes(self) -> List[OperatorMetatype]:
     def post_processing_metatypes(self) -> List[OperatorMetatype]:
         return []
 
-    @property
-    def shapeof_metatypes(self) -> List[OperatorMetatype]:
-        return []
-
-    @property
-    def dropout_metatypes(self) -> List[OperatorMetatype]:
-        return [om.PTDropoutMetatype]
-
     @property
     def conv_metatypes(self) -> List[OperatorMetatype]:
         return [om.PTModuleConv1dMetatype, om.PTModuleConv2dMetatype, om.PTModuleConv3dMetatype]
@@ -92,10 +86,6 @@ def overflow_fix_metatypes(self) -> List[OperatorMetatype]:
             om.PTModuleConvTranspose3dMetatype,
         ]
 
-    @property
-    def read_variable_metatypes(self) -> List[OperatorMetatype]:
-        return []
-
     @property
     def add_metatypes(self) -> List[OperatorMetatype]:
         return [om.PTAddMetatype]
@@ -199,6 +189,12 @@ def get_statistic_collector(
             collector.register_statistic_branch(container_key, reducer, aggregator)
         return collector
 
+    @staticmethod
+    def transform_to_inference_graph(graph: NNCFGraph) -> NNCFGraph:
+        inference_graph = deepcopy(graph)
+        remove_dropout_nodes_inplace(nncf_graph=inference_graph, dropout_metatypes=[om.PTDropoutMetatype])
+        return inference_graph
+
     @staticmethod
     def get_weight_tensor_port_ids(node: NNCFNode) -> List[Optional[int]]:
         return [None]
diff --git a/nncf/quantization/passes.py b/nncf/quantization/passes.py
index 833e6c19f62..2d54bebd338 100644
--- a/nncf/quantization/passes.py
+++ b/nncf/quantization/passes.py
@@ -10,7 +10,6 @@
 # limitations under the License.
 
 import collections
-from copy import deepcopy
 from typing import List, Optional, TypeVar
 
 from nncf.common.graph.graph import NNCFGraph
@@ -21,29 +20,6 @@
 TModel = TypeVar("TModel")
 
 
-def transform_to_inference_graph(
-    nncf_graph: NNCFGraph,
-    shapeof_metatypes: List[OperatorMetatype],
-    dropout_metatypes: List[OperatorMetatype],
-    read_variable_metatypes: Optional[List[OperatorMetatype]] = None,
-) -> NNCFGraph:
-    """
-    This method contains pipeline of the passes that uses to provide inference graph without constant flows.
-
-    :param nncf_graph: NNCFGraph instance for the transformation.
-    :param shapeof_metatypes: List of backend-specific ShapeOf metatypes.
-    :param dropout_metatypes: List of backend-specific Dropout metatypes.
-    :param read_variable_metatypes: List of backend-specific metatypes
-        that also can be interpreted as inputs (ReadValue).
-    :return: NNCFGraph in the inference style.
-    """
-    inference_graph = deepcopy(nncf_graph)
-    remove_shapeof_subgraphs_inplace(inference_graph, shapeof_metatypes, read_variable_metatypes)
-    remove_dropout_nodes_inplace(inference_graph, dropout_metatypes)
-    filter_constant_nodes_inplace(inference_graph, read_variable_metatypes)
-    return inference_graph
-
-
 def remove_shapeof_subgraphs_inplace(
     nncf_graph: NNCFGraph,
     shapeof_metatypes: List[OperatorMetatype],
diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py
index 84c1332baed..58e8aa1f7f4 100644
--- a/tests/post_training/test_templates/test_ptq_params.py
+++ b/tests/post_training/test_templates/test_ptq_params.py
@@ -10,6 +10,7 @@
 # limitations under the License.
 from abc import abstractmethod
 from collections import Counter
+from copy import deepcopy
 from typing import Dict
 
 import pytest
@@ -30,7 +31,6 @@
 from nncf.quantization.advanced_parameters import OverflowFix
 from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
 from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization
-from nncf.quantization.passes import transform_to_inference_graph
 from nncf.quantization.range_estimator import RangeEstimatorParametersSet
 from nncf.scopes import IgnoredScope
 from tests.common.quantization.metatypes import Conv2dTestMetatype
@@ -171,12 +171,7 @@ def test_quantize_outputs(self, test_params, quantize_outputs):
         assert min_max_algo._quantize_outputs == quantize_outputs
         hw_patterns = test_params["test_model_type_pass"]["hw_patterns"]
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
-        inference_nncf_graph = transform_to_inference_graph(
-            nncf_graph,
-            min_max_algo._backend_entity.shapeof_metatypes,
-            min_max_algo._backend_entity.dropout_metatypes,
-            min_max_algo._backend_entity.read_variable_metatypes,
-        )
+        inference_nncf_graph = min_max_algo._backend_entity.transform_to_inference_graph(nncf_graph)
         q_setup = min_max_algo._get_quantizer_setup(nncf_graph, inference_nncf_graph, hw_patterns, ignored_patterns)
         act_num_q, weight_num_q = 0, 0
         for quantization_point in q_setup.quantization_points.values():
@@ -197,12 +192,7 @@ def test_ignored_scopes(self, test_params, ignored_scopes_data):
         nncf_graph = test_params["test_ignored_scopes"]["nncf_graph"]
         hw_patterns = test_params["test_model_type_pass"]["hw_patterns"]
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
-        inference_nncf_graph = transform_to_inference_graph(
-            nncf_graph,
-            min_max_algo._backend_entity.shapeof_metatypes,
-            min_max_algo._backend_entity.dropout_metatypes,
-            min_max_algo._backend_entity.read_variable_metatypes,
-        )
+        inference_nncf_graph = min_max_algo._backend_entity.transform_to_inference_graph(nncf_graph)
         q_setup = min_max_algo._get_quantizer_setup(nncf_graph, inference_nncf_graph, hw_patterns, ignored_patterns)
         act_num_q, weight_num_q = 0, 0
         for quantization_point in q_setup.quantization_points.values():
@@ -223,12 +213,7 @@ def test_model_type_pass(self, test_params, model_type):
         nncf_graph = test_params["test_model_type_pass"]["nncf_graph"]
         hw_patterns = test_params["test_model_type_pass"]["hw_patterns"]
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
-        inference_nncf_graph = transform_to_inference_graph(
-            nncf_graph,
-            min_max_algo._backend_entity.shapeof_metatypes,
-            min_max_algo._backend_entity.dropout_metatypes,
-            min_max_algo._backend_entity.read_variable_metatypes,
-        )
+        inference_nncf_graph = min_max_algo._backend_entity.transform_to_inference_graph(nncf_graph)
         q_setup = min_max_algo._get_quantizer_setup(nncf_graph, inference_nncf_graph, hw_patterns, ignored_patterns)
         for quantization_point in q_setup.quantization_points.values():
             if quantization_point.is_activation_quantization_point():
@@ -290,7 +275,7 @@ def test_quantization_points_overflow_fix(self, overflow_fix, affected_target_po
     @pytest.mark.parametrize("validate_scopes", (True, False))
     def test_validate_scope(self, test_params, validate_scopes):
         nncf_graph = test_params["test_model_type_pass"]["nncf_graph"]
-        inference_nncf_graph = transform_to_inference_graph(nncf_graph, [], [])
+        inference_nncf_graph = deepcopy(nncf_graph)
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
         algo = MinMaxQuantization(
             ignored_scope=IgnoredScope(names=["some_node"], validate=validate_scopes),
diff --git a/tests/post_training/test_templates/test_quantizer_config.py b/tests/post_training/test_templates/test_quantizer_config.py
index 2f73dc3df78..8fa55c9c1d0 100644
--- a/tests/post_training/test_templates/test_quantizer_config.py
+++ b/tests/post_training/test_templates/test_quantizer_config.py
@@ -32,7 +32,6 @@
 from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters
 from nncf.quantization.advanced_parameters import QuantizationParameters
 from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization
-from nncf.quantization.passes import transform_to_inference_graph
 from nncf.quantization.range_estimator import RangeEstimatorParametersSet
 from tests.post_training.test_templates.models import NNCFGraphToTest
 from tests.post_training.test_templates.models import NNCFGraphToTestDepthwiseConv
@@ -89,12 +88,7 @@ def test_default_quantizer_config(self, single_conv_nncf_graph):
         min_max_algo = algo.algorithms[0]
         min_max_algo._backend_entity = self.get_algo_backend()
         nncf_graph = single_conv_nncf_graph.nncf_graph
-        inference_nncf_graph = transform_to_inference_graph(
-            nncf_graph,
-            min_max_algo._backend_entity.shapeof_metatypes,
-            min_max_algo._backend_entity.dropout_metatypes,
-            min_max_algo._backend_entity.read_variable_metatypes,
-        )
+        inference_nncf_graph = min_max_algo._backend_entity.transform_to_inference_graph(nncf_graph)
         q_setup = min_max_algo._get_quantizer_setup(
             nncf_graph, inference_nncf_graph, hw_patterns=GraphPattern(), ignored_patterns=GraphPattern()
         )
@@ -146,12 +140,7 @@ def test_quantizer_config_from_ptq_params_for_CPU(
         min_max_algo = algo.algorithms[0]
         min_max_algo._backend_entity = self.get_algo_backend()
         nncf_graph = single_conv_nncf_graph.nncf_graph
-        inference_nncf_graph = transform_to_inference_graph(
-            nncf_graph,
-            min_max_algo._backend_entity.shapeof_metatypes,
-            min_max_algo._backend_entity.dropout_metatypes,
-            min_max_algo._backend_entity.read_variable_metatypes,
-        )
+        inference_nncf_graph = min_max_algo._backend_entity.transform_to_inference_graph(nncf_graph)
         if signed_weights is False or signed_activations in [True, False]:  # Incompatible with HW CPU config
             with pytest.raises(
                 ValueError,
@@ -189,12 +178,7 @@ def test_depthwise_conv_default_quantizer_config(self, depthwise_conv_nncf_graph
         min_max_algo = algo.algorithms[0]
         min_max_algo._backend_entity = self.get_algo_backend()
         nncf_graph = depthwise_conv_nncf_graph.nncf_graph
-        inference_nncf_graph = transform_to_inference_graph(
-            nncf_graph,
-            min_max_algo._backend_entity.shapeof_metatypes,
-            min_max_algo._backend_entity.dropout_metatypes,
-            min_max_algo._backend_entity.read_variable_metatypes,
-        )
+        inference_nncf_graph = min_max_algo._backend_entity.transform_to_inference_graph(nncf_graph)
         q_setup = min_max_algo._get_quantizer_setup(
             nncf_graph, inference_nncf_graph, hw_patterns=GraphPattern(), ignored_patterns=GraphPattern()
         )

From bb90e814c263215a1683661b08c3ce0dba545e7d Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Wed, 11 Oct 2023 19:09:24 +0200
Subject: [PATCH 09/18] Metrics update

---
 tests/post_training/reference_data.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/post_training/reference_data.yaml b/tests/post_training/reference_data.yaml
index 250bfa3546b..2ea64d6cf1a 100644
--- a/tests/post_training/reference_data.yaml
+++ b/tests/post_training/reference_data.yaml
@@ -24,7 +24,7 @@ hf/hf-internal-testing/tiny-random-GPTNeoXForCausalLM_backend_OPTIMUM:
 
 # Timm
 timm/crossvit_9_240_backend_TORCH:
-    metric_value: 0.68136
+    metric_value: 0.689
     metric_value_fp32: 0.69966
 timm/crossvit_9_240_backend_ONNX:
     metric_value: 0.68906
@@ -56,7 +56,7 @@ timm/deit3_small_patch16_224_backend_OLD_TORCH:
     metric_value: 0.76514
     metric_value_fp32: 0.76974
 timm/deit3_small_patch16_224_backend_TORCH:
-    metric_value: 0.7621
+    metric_value: 0.76816
     metric_value_fp32: 0.76974
 timm/deit3_small_patch16_224_backend_ONNX:
     metric_value: 0.76806
@@ -276,7 +276,7 @@ timm/swin_base_patch4_window7_224_backend_OLD_TORCH:
     metric_value: 0.81376
     metric_value_fp32: 0.81462
 timm/swin_base_patch4_window7_224_backend_TORCH:
-    metric_value: 0.80696
+    metric_value: 0.8131
     metric_value_fp32: 0.81462
 timm/swin_base_patch4_window7_224_backend_ONNX:
     metric_value: 0.81294

From b4bb2430ce3ab1b9518d225afa473bd29a5778fe Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Wed, 11 Oct 2023 19:56:13 +0200
Subject: [PATCH 10/18] get_inference_graph fix

---
 nncf/quantization/algorithms/min_max/onnx_backend.py  | 2 ++
 nncf/quantization/algorithms/min_max/torch_backend.py | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py
index 57e667ff335..9c4e56ee318 100644
--- a/nncf/quantization/algorithms/min_max/onnx_backend.py
+++ b/nncf/quantization/algorithms/min_max/onnx_backend.py
@@ -43,6 +43,7 @@
 from nncf.quantization.algorithms.min_max.backend import ALGO_BACKENDS
 from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend
 from nncf.quantization.fake_quantize import FakeQuantizeParameters
+from nncf.quantization.passes import filter_constant_nodes_inplace
 from nncf.quantization.passes import remove_shapeof_subgraphs_inplace
 from nncf.quantization.range_estimator import RangeEstimatorParameters
 
@@ -170,6 +171,7 @@ def transform_to_inference_graph(graph: NNCFGraph) -> NNCFGraph:
         remove_shapeof_subgraphs_inplace(
             nncf_graph=inference_graph, shapeof_metatypes=[om.ONNXShapeMetatype], read_variable_metatypes=[]
         )
+        filter_constant_nodes_inplace(nncf_graph=inference_graph)
         return inference_graph
 
     @staticmethod
diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py
index a78cfde6686..cd51d23c76b 100644
--- a/nncf/quantization/algorithms/min_max/torch_backend.py
+++ b/nncf/quantization/algorithms/min_max/torch_backend.py
@@ -35,6 +35,7 @@
 from nncf.quantization.algorithms.min_max.backend import ALGO_BACKENDS
 from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend
 from nncf.quantization.fake_quantize import FakeQuantizeParameters
+from nncf.quantization.passes import filter_constant_nodes_inplace
 from nncf.quantization.passes import remove_dropout_nodes_inplace
 from nncf.quantization.range_estimator import RangeEstimatorParameters
 from nncf.torch.graph.graph import PTTargetPoint
@@ -193,6 +194,7 @@ def get_statistic_collector(
     def transform_to_inference_graph(graph: NNCFGraph) -> NNCFGraph:
         inference_graph = deepcopy(graph)
         remove_dropout_nodes_inplace(nncf_graph=inference_graph, dropout_metatypes=[om.PTDropoutMetatype])
+        filter_constant_nodes_inplace(nncf_graph=inference_graph)
         return inference_graph
 
     @staticmethod

From 9837d2b524996427bf1ffc48de09b71fa7711edb Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Wed, 11 Oct 2023 20:44:25 +0200
Subject: [PATCH 11/18] ptq_params test microfix

---
 tests/post_training/test_templates/test_ptq_params.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py
index 58e8aa1f7f4..44824876193 100644
--- a/tests/post_training/test_templates/test_ptq_params.py
+++ b/tests/post_training/test_templates/test_ptq_params.py
@@ -31,6 +31,7 @@
 from nncf.quantization.advanced_parameters import OverflowFix
 from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
 from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization
+from nncf.quantization.passes import filter_constant_nodes_inplace
 from nncf.quantization.range_estimator import RangeEstimatorParametersSet
 from nncf.scopes import IgnoredScope
 from tests.common.quantization.metatypes import Conv2dTestMetatype
@@ -276,6 +277,7 @@ def test_quantization_points_overflow_fix(self, overflow_fix, affected_target_po
     def test_validate_scope(self, test_params, validate_scopes):
         nncf_graph = test_params["test_model_type_pass"]["nncf_graph"]
         inference_nncf_graph = deepcopy(nncf_graph)
+        filter_constant_nodes_inplace(inference_nncf_graph)
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
         algo = MinMaxQuantization(
             ignored_scope=IgnoredScope(names=["some_node"], validate=validate_scopes),

From d478e2b26cc79b3c446ecfa26b4086e86787035f Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Tue, 17 Oct 2023 15:17:49 +0200
Subject: [PATCH 12/18] Revert transform_to_inferece_graph function

---
 .../algorithms/accuracy_control/ranker.py     |  6 ++--
 .../algorithms/min_max/algorithm.py           |  8 ++++-
 .../algorithms/min_max/backend.py             | 28 ++++++++++++----
 .../algorithms/min_max/onnx_backend.py        | 24 +++++++-------
 .../algorithms/min_max/openvino_backend.py    | 27 +++++++---------
 .../algorithms/min_max/torch_backend.py       | 22 +++++++------
 nncf/quantization/passes.py                   | 32 ++++++++++++++++---
 tests/common/quantization/test_passes.py      |  6 ++--
 .../quantization/test_quantizer_removal.py    |  4 +--
 tests/post_training/reference_data.yaml       |  8 ++---
 .../test_templates/test_ptq_params.py         |  4 +--
 11 files changed, 105 insertions(+), 64 deletions(-)

diff --git a/nncf/quantization/algorithms/accuracy_control/ranker.py b/nncf/quantization/algorithms/accuracy_control/ranker.py
index 9449aec74b3..51b38eb8d0f 100644
--- a/nncf/quantization/algorithms/accuracy_control/ranker.py
+++ b/nncf/quantization/algorithms/accuracy_control/ranker.py
@@ -28,7 +28,7 @@
 from nncf.quantization.algorithms.accuracy_control.evaluator import Evaluator
 from nncf.quantization.algorithms.accuracy_control.rank_functions import create_normalized_mse_func
 from nncf.quantization.algorithms.accuracy_control.subset_selection import select_subset
-from nncf.quantization.passes import remove_shapeof_subgraphs_inplace
+from nncf.quantization.passes import remove_shapeof_subgraphs
 
 TModel = TypeVar("TModel")
 TPModel = TypeVar("TPModel")
@@ -99,9 +99,7 @@ def find_groups_of_quantizers_to_rank(self, quantized_model_graph: NNCFGraph) ->
         ]
 
         quantized_model_graph_without_shapeof = deepcopy(quantized_model_graph)
-        remove_shapeof_subgraphs_inplace(
-            quantized_model_graph_without_shapeof, self._algo_backend.get_shapeof_metatypes()
-        )
+        remove_shapeof_subgraphs(quantized_model_graph_without_shapeof, self._algo_backend.get_shapeof_metatypes())
 
         for quantizer_node in reversed(quantizers):
             if processed.get(quantizer_node.node_name, False):
diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py
index 2fe6be7968e..4b1c0547f86 100644
--- a/nncf/quantization/algorithms/min_max/algorithm.py
+++ b/nncf/quantization/algorithms/min_max/algorithm.py
@@ -54,6 +54,7 @@
 from nncf.quantization.algorithms.min_max.backend import ALGO_BACKENDS
 from nncf.quantization.fake_quantize import calculate_quantizer_parameters
 from nncf.quantization.fake_quantize import get_quantizer_narrow_range
+from nncf.quantization.passes import transform_to_inference_graph
 from nncf.quantization.range_estimator import RangeEstimatorParameters
 from nncf.quantization.range_estimator import RangeEstimatorParametersSet
 from nncf.scopes import IgnoredScope
@@ -503,7 +504,12 @@ def _get_quantization_target_points(
         )
         hw_patterns = PatternsManager.get_full_hw_pattern_graph(backend=backend, device=device, model_type=model_type)
 
-        inference_nncf_graph = self._backend_entity.transform_to_inference_graph(nncf_graph)
+        inference_nncf_graph = transform_to_inference_graph(
+            nncf_graph,
+            self._backend_entity.shapeof_metatypes,
+            self._backend_entity.dropout_metatypes,
+            self._backend_entity.read_variable_metatypes,
+        )
 
         quantizer_setup = self._get_quantizer_setup(nncf_graph, inference_nncf_graph, hw_patterns, ignored_patterns)
         self._apply_model_type_pass(self._model_type, quantizer_setup, nncf_graph)
diff --git a/nncf/quantization/algorithms/min_max/backend.py b/nncf/quantization/algorithms/min_max/backend.py
index 8a95a5e83b5..0764885a945 100644
--- a/nncf/quantization/algorithms/min_max/backend.py
+++ b/nncf/quantization/algorithms/min_max/backend.py
@@ -56,6 +56,27 @@ def conv_metatypes(self) -> List[OperatorMetatype]:
         Property for the backend-specific Convolution metatypes.
         """
 
+    @property
+    @abstractmethod
+    def shapeof_metatypes(self) -> List[OperatorMetatype]:
+        """
+        Property for the backend-specific ShapeOf metatypes.
+        """
+
+    @property
+    @abstractmethod
+    def dropout_metatypes(self) -> List[OperatorMetatype]:
+        """
+        Property for the backend-specific Dropout metatypes.
+        """
+
+    @property
+    @abstractmethod
+    def read_variable_metatypes(self) -> List[OperatorMetatype]:
+        """
+        Property for the backend-specific metatypes that also can be interpreted as inputs (ReadValue).
+        """
+
     @property
     @abstractmethod
     def overflow_fix_metatypes(self) -> List[OperatorMetatype]:
@@ -160,13 +181,6 @@ def get_statistic_collector(
         :return: Backend-specific TensorStatisticCollectorBase for the statistics calculation.
         """
 
-    @staticmethod
-    @abstractmethod
-    def transform_to_inference_graph(graph: NNCFGraph) -> NNCFGraph:
-        """
-        Returns inference NNCFGraph without constant flows and training time operations.
-        """
-
     @staticmethod
     @abstractmethod
     def get_weight_tensor_port_ids(node: NNCFNode) -> List[Optional[int]]:
diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py
index 404bc45435f..e202252bf59 100644
--- a/nncf/quantization/algorithms/min_max/onnx_backend.py
+++ b/nncf/quantization/algorithms/min_max/onnx_backend.py
@@ -9,7 +9,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from copy import deepcopy
 from typing import Dict, List, Optional, Set, Union
 
 import numpy as np
@@ -43,8 +42,6 @@
 from nncf.quantization.algorithms.min_max.backend import ALGO_BACKENDS
 from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend
 from nncf.quantization.fake_quantize import FakeQuantizeParameters
-from nncf.quantization.passes import filter_constant_nodes_inplace
-from nncf.quantization.passes import remove_shapeof_subgraphs_inplace
 from nncf.quantization.range_estimator import RangeEstimatorParameters
 
 
@@ -75,6 +72,18 @@ def add_metatypes(self) -> List[OperatorMetatype]:
     def group_conv_metatypes(self) -> List[OperatorMetatype]:
         return self.conv_metatypes
 
+    @property
+    def shapeof_metatypes(self) -> List[OperatorMetatype]:
+        return [om.ONNXShapeMetatype]
+
+    @property
+    def dropout_metatypes(self) -> List[OperatorMetatype]:
+        return []
+
+    @property
+    def read_variable_metatypes(self) -> List[OperatorMetatype]:
+        return []
+
     @property
     def scales_unification_map(self) -> Dict[OperatorMetatype, OperatorMetatype]:
         return {om.ONNXConcatMetatype: self.overflow_fix_metatypes}
@@ -165,15 +174,6 @@ def get_statistic_collector(
             f"{str(range_estimator_params)}"
         )
 
-    @staticmethod
-    def transform_to_inference_graph(graph: NNCFGraph) -> NNCFGraph:
-        inference_graph = deepcopy(graph)
-        remove_shapeof_subgraphs_inplace(
-            nncf_graph=inference_graph, shapeof_metatypes=[om.ONNXShapeMetatype], read_variable_metatypes=[]
-        )
-        filter_constant_nodes_inplace(nncf_graph=inference_graph)
-        return inference_graph
-
     @staticmethod
     def get_weight_tensor_port_ids(node: NNCFNode) -> List[Optional[int]]:
         return list(node.layer_attributes.weight_attrs.keys())
diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py
index e49fbd4d48f..103969142c2 100644
--- a/nncf/quantization/algorithms/min_max/openvino_backend.py
+++ b/nncf/quantization/algorithms/min_max/openvino_backend.py
@@ -9,7 +9,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from copy import deepcopy
 from typing import Dict, List, Optional, Set, Tuple
 
 import numpy as np
@@ -44,8 +43,6 @@
 from nncf.quantization.algorithms.min_max.backend import ALGO_BACKENDS
 from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend
 from nncf.quantization.fake_quantize import FakeQuantizeParameters
-from nncf.quantization.passes import filter_constant_nodes_inplace
-from nncf.quantization.passes import remove_shapeof_subgraphs_inplace
 
 
 # pylint:disable=too-many-public-methods
@@ -81,6 +78,18 @@ def add_metatypes(self) -> List[OperatorMetatype]:
     def group_conv_metatypes(self) -> List[OperatorMetatype]:
         return [om.OVGroupConvolutionMetatype]
 
+    @property
+    def shapeof_metatypes(self) -> List[OperatorMetatype]:
+        return [om.OVShapeOfMetatype]
+
+    @property
+    def dropout_metatypes(self) -> List[OperatorMetatype]:
+        return []
+
+    @property
+    def read_variable_metatypes(self) -> List[OperatorMetatype]:
+        return [om.OVReadValueMetatype]
+
     @property
     def scales_unification_map(self) -> Dict[OperatorMetatype, OperatorMetatype]:
         return {om.OVConcatMetatype: self.overflow_fix_metatypes}
@@ -195,18 +204,6 @@ def get_statistic_collector(
             collector.register_statistic_branch(container_key, reducer, aggregator)
         return collector
 
-    @staticmethod
-    def transform_to_inference_graph(graph: NNCFGraph) -> NNCFGraph:
-        inference_graph = deepcopy(graph)
-        read_variable_metatypes = [om.OVReadValueMetatype]
-        remove_shapeof_subgraphs_inplace(
-            nncf_graph=inference_graph,
-            shapeof_metatypes=[om.OVShapeOfMetatype],
-            read_variable_metatypes=read_variable_metatypes,
-        )
-        filter_constant_nodes_inplace(nncf_graph=inference_graph, read_variable_metatypes=read_variable_metatypes)
-        return inference_graph
-
     @staticmethod
     def get_weight_tensor_port_ids(node: NNCFNode) -> List[Optional[int]]:
         return node.layer_attributes.get_const_port_ids()
diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py
index 053639456a1..cae2c4fa6c2 100644
--- a/nncf/quantization/algorithms/min_max/torch_backend.py
+++ b/nncf/quantization/algorithms/min_max/torch_backend.py
@@ -9,7 +9,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from copy import deepcopy
 from typing import Dict, List, Optional, Set, Tuple
 
 import torch
@@ -33,8 +32,6 @@
 from nncf.quantization.algorithms.min_max.backend import ALGO_BACKENDS
 from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend
 from nncf.quantization.fake_quantize import FakeQuantizeParameters
-from nncf.quantization.passes import filter_constant_nodes_inplace
-from nncf.quantization.passes import remove_dropout_nodes_inplace
 from nncf.quantization.range_estimator import RangeEstimatorParameters
 from nncf.torch.graph.graph import PTTargetPoint
 from nncf.torch.graph.transformations.commands import PTQuantizerInsertionCommand
@@ -68,6 +65,18 @@ def mat_mul_metatypes(self) -> List[OperatorMetatype]:
     def post_processing_metatypes(self) -> List[OperatorMetatype]:
         return []
 
+    @property
+    def shapeof_metatypes(self) -> List[OperatorMetatype]:
+        return []
+
+    @property
+    def dropout_metatypes(self) -> List[OperatorMetatype]:
+        return [om.PTDropoutMetatype]
+
+    @property
+    def read_variable_metatypes(self) -> List[OperatorMetatype]:
+        return []
+
     @property
     def conv_metatypes(self) -> List[OperatorMetatype]:
         return [om.PTModuleConv1dMetatype, om.PTModuleConv2dMetatype, om.PTModuleConv3dMetatype]
@@ -183,13 +192,6 @@ def get_statistic_collector(
             collector.register_statistic_branch(container_key, reducer, aggregator)
         return collector
 
-    @staticmethod
-    def transform_to_inference_graph(graph: NNCFGraph) -> NNCFGraph:
-        inference_graph = deepcopy(graph)
-        remove_dropout_nodes_inplace(nncf_graph=inference_graph, dropout_metatypes=[om.PTDropoutMetatype])
-        filter_constant_nodes_inplace(nncf_graph=inference_graph)
-        return inference_graph
-
     @staticmethod
     def get_weight_tensor_port_ids(node: NNCFNode) -> List[Optional[int]]:
         return [None]
diff --git a/nncf/quantization/passes.py b/nncf/quantization/passes.py
index 2d54bebd338..32ad5fc3ebf 100644
--- a/nncf/quantization/passes.py
+++ b/nncf/quantization/passes.py
@@ -10,6 +10,7 @@
 # limitations under the License.
 
 import collections
+from copy import deepcopy
 from typing import List, Optional, TypeVar
 
 from nncf.common.graph.graph import NNCFGraph
@@ -20,7 +21,30 @@
 TModel = TypeVar("TModel")
 
 
-def remove_shapeof_subgraphs_inplace(
+def transform_to_inference_graph(
+    nncf_graph: NNCFGraph,
+    shapeof_metatypes: List[OperatorMetatype],
+    dropout_metatypes: List[OperatorMetatype],
+    read_variable_metatypes: Optional[List[OperatorMetatype]] = None,
+) -> NNCFGraph:
+    """
+    This method contains pipeline of the passes that uses to provide inference graph without constant flows.
+
+    :param nncf_graph: NNCFGraph instance for the transformation.
+    :param shapeof_metatypes: List of backend-specific ShapeOf metatypes.
+    :param dropout_metatypes: List of backend-specific Dropout metatypes.
+    :param read_variable_metatypes: List of backend-specific metatypes
+        that also can be interpreted as inputs (ReadValue).
+    :return: NNCFGraph in the inference style.
+    """
+    inference_graph = deepcopy(nncf_graph)
+    remove_shapeof_subgraphs(inference_graph, shapeof_metatypes, read_variable_metatypes)
+    remove_dropout_nodes(inference_graph, dropout_metatypes)
+    filter_constant_nodes(inference_graph, read_variable_metatypes)
+    return inference_graph
+
+
+def remove_shapeof_subgraphs(
     nncf_graph: NNCFGraph,
     shapeof_metatypes: List[OperatorMetatype],
     read_variable_metatypes: Optional[List[OperatorMetatype]] = None,
@@ -67,7 +91,7 @@ def remove_shapeof_subgraphs_inplace(
     nncf_graph.remove_nodes_from(nodes_to_drop)
 
 
-def remove_dropout_nodes_inplace(
+def remove_dropout_nodes(
     nncf_graph: NNCFGraph,
     dropout_metatypes: List[OperatorMetatype],
 ) -> None:
@@ -83,7 +107,7 @@ def remove_dropout_nodes_inplace(
         return
 
     nodes_to_drop = []
-    for node in nncf_graph.get_all_nodes():
+    for node in nncf_graph.get_nodes_by_metatypes(dropout_metatypes):
         if node.metatype in dropout_metatypes:
             nodes_to_drop.append(node)
 
@@ -114,7 +138,7 @@ def remove_dropout_nodes_inplace(
     nncf_graph.remove_nodes_from(nodes_to_drop)
 
 
-def filter_constant_nodes_inplace(
+def filter_constant_nodes(
     nncf_graph: NNCFGraph, read_variable_metatypes: Optional[List[OperatorMetatype]] = None
 ) -> None:
     """
diff --git a/tests/common/quantization/test_passes.py b/tests/common/quantization/test_passes.py
index 76014ef0911..a6d822bc649 100644
--- a/tests/common/quantization/test_passes.py
+++ b/tests/common/quantization/test_passes.py
@@ -14,7 +14,7 @@
 
 import pytest
 
-from nncf.quantization.passes import remove_dropout_nodes_inplace
+from nncf.quantization.passes import remove_dropout_nodes
 from tests.post_training.test_templates.models import NNCFGraphDropoutRemovingCase
 from tests.shared.nx_graph import compare_nx_graph_with_reference
 from tests.shared.paths import TEST_ROOT
@@ -39,10 +39,10 @@ def test_remove_dropout_nodes_inplace(mode: TestModes):
     nncf_graph = NNCFGraphDropoutRemovingCase(dropout_metatype, **kwargs).nncf_graph
     if mode != TestModes.VALID:
         with pytest.raises(AssertionError):
-            remove_dropout_nodes_inplace(nncf_graph, [dropout_metatype])
+            remove_dropout_nodes(nncf_graph, [dropout_metatype])
         return
 
-    remove_dropout_nodes_inplace(nncf_graph, [dropout_metatype])
+    remove_dropout_nodes(nncf_graph, [dropout_metatype])
 
     nx_graph = nncf_graph.get_graph_for_structure_analysis()
     path_to_dot = DATA_ROOT / dot_reference_path
diff --git a/tests/common/quantization/test_quantizer_removal.py b/tests/common/quantization/test_quantizer_removal.py
index bb46b6585e0..911056f0d64 100644
--- a/tests/common/quantization/test_quantizer_removal.py
+++ b/tests/common/quantization/test_quantizer_removal.py
@@ -18,7 +18,7 @@
 from nncf.common.graph import NNCFGraph
 from nncf.common.graph.layer_attributes import Dtype
 from nncf.common.quantization.quantizer_removal import find_quantizer_nodes_to_cut
-from nncf.quantization.passes import remove_shapeof_subgraphs_inplace
+from nncf.quantization.passes import remove_shapeof_subgraphs
 from tests.common.quantization.metatypes import CONSTANT_METATYPES
 from tests.common.quantization.metatypes import METATYPES_FOR_TEST
 from tests.common.quantization.metatypes import QUANTIZABLE_METATYPES
@@ -227,7 +227,7 @@ def create_test_params():
 def test_find_quantizer_nodes_to_cut(nncf_graph: NNCFGraph, test_case: TestCase):
     quantizer_node = nncf_graph.get_node_by_name(test_case.node_name)
     nncf_graph_without_shapeof = deepcopy(nncf_graph)
-    remove_shapeof_subgraphs_inplace(nncf_graph_without_shapeof, SHAPEOF_METATYPES)
+    remove_shapeof_subgraphs(nncf_graph_without_shapeof, SHAPEOF_METATYPES)
     nodes, ops = find_quantizer_nodes_to_cut(
         nncf_graph_without_shapeof,
         quantizer_node,
diff --git a/tests/post_training/reference_data.yaml b/tests/post_training/reference_data.yaml
index 2ea64d6cf1a..39e231d592c 100644
--- a/tests/post_training/reference_data.yaml
+++ b/tests/post_training/reference_data.yaml
@@ -24,7 +24,7 @@ hf/hf-internal-testing/tiny-random-GPTNeoXForCausalLM_backend_OPTIMUM:
 
 # Timm
 timm/crossvit_9_240_backend_TORCH:
-    metric_value: 0.689
+    metric_value: 0.68136
     metric_value_fp32: 0.69966
 timm/crossvit_9_240_backend_ONNX:
     metric_value: 0.68906
@@ -56,7 +56,7 @@ timm/deit3_small_patch16_224_backend_OLD_TORCH:
     metric_value: 0.76514
     metric_value_fp32: 0.76974
 timm/deit3_small_patch16_224_backend_TORCH:
-    metric_value: 0.76816
+    metric_value: 0.7621
     metric_value_fp32: 0.76974
 timm/deit3_small_patch16_224_backend_ONNX:
     metric_value: 0.76806
@@ -104,7 +104,7 @@ timm/efficientnet_b0_backend_OLD_TORCH:
     metric_value: 0.75538
     metric_value_fp32: 0.77292
 timm/efficientnet_b0_backend_TORCH:
-    metric_value: 0.76476
+    metric_value: 0.76798
     metric_value_fp32: 0.77292
 timm/efficientnet_b0_backend_ONNX:
     metric_value: 0.76582
@@ -276,7 +276,7 @@ timm/swin_base_patch4_window7_224_backend_OLD_TORCH:
     metric_value: 0.81376
     metric_value_fp32: 0.81462
 timm/swin_base_patch4_window7_224_backend_TORCH:
-    metric_value: 0.8131
+    metric_value: 0.80696
     metric_value_fp32: 0.81462
 timm/swin_base_patch4_window7_224_backend_ONNX:
     metric_value: 0.81294
diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py
index 44824876193..7254533c5c9 100644
--- a/tests/post_training/test_templates/test_ptq_params.py
+++ b/tests/post_training/test_templates/test_ptq_params.py
@@ -31,7 +31,7 @@
 from nncf.quantization.advanced_parameters import OverflowFix
 from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
 from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization
-from nncf.quantization.passes import filter_constant_nodes_inplace
+from nncf.quantization.passes import filter_constant_nodes
 from nncf.quantization.range_estimator import RangeEstimatorParametersSet
 from nncf.scopes import IgnoredScope
 from tests.common.quantization.metatypes import Conv2dTestMetatype
@@ -277,7 +277,7 @@ def test_quantization_points_overflow_fix(self, overflow_fix, affected_target_po
     def test_validate_scope(self, test_params, validate_scopes):
         nncf_graph = test_params["test_model_type_pass"]["nncf_graph"]
         inference_nncf_graph = deepcopy(nncf_graph)
-        filter_constant_nodes_inplace(inference_nncf_graph)
+        filter_constant_nodes(inference_nncf_graph)
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
         algo = MinMaxQuantization(
             ignored_scope=IgnoredScope(names=["some_node"], validate=validate_scopes),

From 237ba5014cd64d5aa6f9ec538d398cd142a5975e Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Tue, 17 Oct 2023 15:51:20 +0200
Subject: [PATCH 13/18] Fix tests

---
 .../test_templates/test_ptq_params.py         | 22 ++++++++++++++++---
 .../test_templates/test_quantizer_config.py   | 22 ++++++++++++++++---
 2 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py
index 7254533c5c9..53c00a9e4a9 100644
--- a/tests/post_training/test_templates/test_ptq_params.py
+++ b/tests/post_training/test_templates/test_ptq_params.py
@@ -32,6 +32,7 @@
 from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
 from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization
 from nncf.quantization.passes import filter_constant_nodes
+from nncf.quantization.passes import transform_to_inference_graph
 from nncf.quantization.range_estimator import RangeEstimatorParametersSet
 from nncf.scopes import IgnoredScope
 from tests.common.quantization.metatypes import Conv2dTestMetatype
@@ -172,7 +173,12 @@ def test_quantize_outputs(self, test_params, quantize_outputs):
         assert min_max_algo._quantize_outputs == quantize_outputs
         hw_patterns = test_params["test_model_type_pass"]["hw_patterns"]
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
-        inference_nncf_graph = min_max_algo._backend_entity.transform_to_inference_graph(nncf_graph)
+        inference_nncf_graph = transform_to_inference_graph(
+            nncf_graph,
+            min_max_algo._backend_entity.shapeof_metatypes,
+            min_max_algo._backend_entity.dropout_metatypes,
+            min_max_algo._backend_entity.read_variable_metatypes,
+        )
         q_setup = min_max_algo._get_quantizer_setup(nncf_graph, inference_nncf_graph, hw_patterns, ignored_patterns)
         act_num_q, weight_num_q = 0, 0
         for quantization_point in q_setup.quantization_points.values():
@@ -193,7 +199,12 @@ def test_ignored_scopes(self, test_params, ignored_scopes_data):
         nncf_graph = test_params["test_ignored_scopes"]["nncf_graph"]
         hw_patterns = test_params["test_model_type_pass"]["hw_patterns"]
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
-        inference_nncf_graph = min_max_algo._backend_entity.transform_to_inference_graph(nncf_graph)
+        inference_nncf_graph = transform_to_inference_graph(
+            nncf_graph,
+            min_max_algo._backend_entity.shapeof_metatypes,
+            min_max_algo._backend_entity.dropout_metatypes,
+            min_max_algo._backend_entity.read_variable_metatypes,
+        )
         q_setup = min_max_algo._get_quantizer_setup(nncf_graph, inference_nncf_graph, hw_patterns, ignored_patterns)
         act_num_q, weight_num_q = 0, 0
         for quantization_point in q_setup.quantization_points.values():
@@ -214,7 +225,12 @@ def test_model_type_pass(self, test_params, model_type):
         nncf_graph = test_params["test_model_type_pass"]["nncf_graph"]
         hw_patterns = test_params["test_model_type_pass"]["hw_patterns"]
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
-        inference_nncf_graph = min_max_algo._backend_entity.transform_to_inference_graph(nncf_graph)
+        inference_nncf_graph = transform_to_inference_graph(
+            nncf_graph,
+            min_max_algo._backend_entity.shapeof_metatypes,
+            min_max_algo._backend_entity.dropout_metatypes,
+            min_max_algo._backend_entity.read_variable_metatypes,
+        )
         q_setup = min_max_algo._get_quantizer_setup(nncf_graph, inference_nncf_graph, hw_patterns, ignored_patterns)
         for quantization_point in q_setup.quantization_points.values():
             if quantization_point.is_activation_quantization_point():
diff --git a/tests/post_training/test_templates/test_quantizer_config.py b/tests/post_training/test_templates/test_quantizer_config.py
index 8fa55c9c1d0..2f73dc3df78 100644
--- a/tests/post_training/test_templates/test_quantizer_config.py
+++ b/tests/post_training/test_templates/test_quantizer_config.py
@@ -32,6 +32,7 @@
 from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters
 from nncf.quantization.advanced_parameters import QuantizationParameters
 from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization
+from nncf.quantization.passes import transform_to_inference_graph
 from nncf.quantization.range_estimator import RangeEstimatorParametersSet
 from tests.post_training.test_templates.models import NNCFGraphToTest
 from tests.post_training.test_templates.models import NNCFGraphToTestDepthwiseConv
@@ -88,7 +89,12 @@ def test_default_quantizer_config(self, single_conv_nncf_graph):
         min_max_algo = algo.algorithms[0]
         min_max_algo._backend_entity = self.get_algo_backend()
         nncf_graph = single_conv_nncf_graph.nncf_graph
-        inference_nncf_graph = min_max_algo._backend_entity.transform_to_inference_graph(nncf_graph)
+        inference_nncf_graph = transform_to_inference_graph(
+            nncf_graph,
+            min_max_algo._backend_entity.shapeof_metatypes,
+            min_max_algo._backend_entity.dropout_metatypes,
+            min_max_algo._backend_entity.read_variable_metatypes,
+        )
         q_setup = min_max_algo._get_quantizer_setup(
             nncf_graph, inference_nncf_graph, hw_patterns=GraphPattern(), ignored_patterns=GraphPattern()
         )
@@ -140,7 +146,12 @@ def test_quantizer_config_from_ptq_params_for_CPU(
         min_max_algo = algo.algorithms[0]
         min_max_algo._backend_entity = self.get_algo_backend()
         nncf_graph = single_conv_nncf_graph.nncf_graph
-        inference_nncf_graph = min_max_algo._backend_entity.transform_to_inference_graph(nncf_graph)
+        inference_nncf_graph = transform_to_inference_graph(
+            nncf_graph,
+            min_max_algo._backend_entity.shapeof_metatypes,
+            min_max_algo._backend_entity.dropout_metatypes,
+            min_max_algo._backend_entity.read_variable_metatypes,
+        )
         if signed_weights is False or signed_activations in [True, False]:  # Incompatible with HW CPU config
             with pytest.raises(
                 ValueError,
@@ -178,7 +189,12 @@ def test_depthwise_conv_default_quantizer_config(self, depthwise_conv_nncf_graph
         min_max_algo = algo.algorithms[0]
         min_max_algo._backend_entity = self.get_algo_backend()
         nncf_graph = depthwise_conv_nncf_graph.nncf_graph
-        inference_nncf_graph = min_max_algo._backend_entity.transform_to_inference_graph(nncf_graph)
+        inference_nncf_graph = transform_to_inference_graph(
+            nncf_graph,
+            min_max_algo._backend_entity.shapeof_metatypes,
+            min_max_algo._backend_entity.dropout_metatypes,
+            min_max_algo._backend_entity.read_variable_metatypes,
+        )
         q_setup = min_max_algo._get_quantizer_setup(
             nncf_graph, inference_nncf_graph, hw_patterns=GraphPattern(), ignored_patterns=GraphPattern()
         )

From 2ecb439d0146826f4a87a5288c1c2462ff871b5b Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Wed, 18 Oct 2023 09:24:17 +0200
Subject: [PATCH 14/18] Metrics update

---
 tests/post_training/reference_data.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/post_training/reference_data.yaml b/tests/post_training/reference_data.yaml
index 39e231d592c..2ea64d6cf1a 100644
--- a/tests/post_training/reference_data.yaml
+++ b/tests/post_training/reference_data.yaml
@@ -24,7 +24,7 @@ hf/hf-internal-testing/tiny-random-GPTNeoXForCausalLM_backend_OPTIMUM:
 
 # Timm
 timm/crossvit_9_240_backend_TORCH:
-    metric_value: 0.68136
+    metric_value: 0.689
     metric_value_fp32: 0.69966
 timm/crossvit_9_240_backend_ONNX:
     metric_value: 0.68906
@@ -56,7 +56,7 @@ timm/deit3_small_patch16_224_backend_OLD_TORCH:
     metric_value: 0.76514
     metric_value_fp32: 0.76974
 timm/deit3_small_patch16_224_backend_TORCH:
-    metric_value: 0.7621
+    metric_value: 0.76816
     metric_value_fp32: 0.76974
 timm/deit3_small_patch16_224_backend_ONNX:
     metric_value: 0.76806
@@ -104,7 +104,7 @@ timm/efficientnet_b0_backend_OLD_TORCH:
     metric_value: 0.75538
     metric_value_fp32: 0.77292
 timm/efficientnet_b0_backend_TORCH:
-    metric_value: 0.76798
+    metric_value: 0.76476
     metric_value_fp32: 0.77292
 timm/efficientnet_b0_backend_ONNX:
     metric_value: 0.76582
@@ -276,7 +276,7 @@ timm/swin_base_patch4_window7_224_backend_OLD_TORCH:
     metric_value: 0.81376
     metric_value_fp32: 0.81462
 timm/swin_base_patch4_window7_224_backend_TORCH:
-    metric_value: 0.80696
+    metric_value: 0.8131
     metric_value_fp32: 0.81462
 timm/swin_base_patch4_window7_224_backend_ONNX:
     metric_value: 0.81294

From a662180f2037c4cdf85933fe920ff041ea053e9b Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Wed, 18 Oct 2023 10:20:39 +0200
Subject: [PATCH 15/18] Dropout removal original graph reference

---
 ....dot => dropout_synthetic_model_after.dot} |  2 +-
 .../passes/dropout_synthetic_model_before.dot | 25 +++++++++++++++++++
 tests/common/quantization/test_passes.py      | 15 +++++++----
 tests/post_training/test_templates/models.py  |  2 +-
 4 files changed, 37 insertions(+), 7 deletions(-)
 rename tests/common/data/reference_graphs/passes/{dropout_synthetic_model.dot => dropout_synthetic_model_after.dot} (100%)
 create mode 100644 tests/common/data/reference_graphs/passes/dropout_synthetic_model_before.dot

diff --git a/tests/common/data/reference_graphs/passes/dropout_synthetic_model.dot b/tests/common/data/reference_graphs/passes/dropout_synthetic_model_after.dot
similarity index 100%
rename from tests/common/data/reference_graphs/passes/dropout_synthetic_model.dot
rename to tests/common/data/reference_graphs/passes/dropout_synthetic_model_after.dot
index abbe8c0b642..bb1b1c72b51 100644
--- a/tests/common/data/reference_graphs/passes/dropout_synthetic_model.dot
+++ b/tests/common/data/reference_graphs/passes/dropout_synthetic_model_after.dot
@@ -13,7 +13,7 @@ strict digraph  {
 "1 /Split_1_0" -> "6 /Output_2_1_0";
 "1 /Split_1_0" -> "7 /Output_2_2_0";
 "1 /Split_1_0" -> "8 /Output_2_3_0";
-"1 /Split_1_0" -> "10 /Output_2_4_0";
 "1 /Split_1_0" -> "9 /Output_3_0";
+"1 /Split_1_0" -> "10 /Output_2_4_0";
 "1 /Split_1_0" -> "11 /Output_3_1_0"  [label="parallel_input_port_ids:[2, 3, 4, 5, 6, 7, 8, 9]"];
 }
diff --git a/tests/common/data/reference_graphs/passes/dropout_synthetic_model_before.dot b/tests/common/data/reference_graphs/passes/dropout_synthetic_model_before.dot
new file mode 100644
index 00000000000..f8c29563d5b
--- /dev/null
+++ b/tests/common/data/reference_graphs/passes/dropout_synthetic_model_before.dot
@@ -0,0 +1,25 @@
+strict digraph  {
+"0 /Input_1_0" [id=0, type=Input_1];
+"1 /Split_1_0" [id=1, type=Split_1];
+"2 /Dropout_1_0" [id=2, type=Dropout_1];
+"3 /Dropout_2_0" [id=3, type=Dropout_2];
+"4 /Dropout_3_0" [id=4, type=Dropout_3];
+"5 /Output_1_0" [id=5, type=Output_1];
+"6 /Output_2_1_0" [id=6, type=Output_2_1];
+"7 /Output_2_2_0" [id=7, type=Output_2_2];
+"8 /Output_2_3_0" [id=8, type=Output_2_3];
+"9 /Output_3_0" [id=9, type=Output_3];
+"10 /Output_2_4_0" [id=10, type=output];
+"11 /Output_3_1_0" [id=11, type=output];
+"0 /Input_1_0" -> "1 /Split_1_0";
+"1 /Split_1_0" -> "2 /Dropout_1_0";
+"1 /Split_1_0" -> "3 /Dropout_2_0";
+"1 /Split_1_0" -> "4 /Dropout_3_0";
+"2 /Dropout_1_0" -> "5 /Output_1_0";
+"3 /Dropout_2_0" -> "6 /Output_2_1_0";
+"3 /Dropout_2_0" -> "7 /Output_2_2_0";
+"3 /Dropout_2_0" -> "8 /Output_2_3_0";
+"3 /Dropout_2_0" -> "10 /Output_2_4_0";
+"4 /Dropout_3_0" -> "9 /Output_3_0";
+"4 /Dropout_3_0" -> "11 /Output_3_1_0"  [label="parallel_input_port_ids:[2, 3, 4, 5, 6, 7, 8, 9]"];
+}
diff --git a/tests/common/quantization/test_passes.py b/tests/common/quantization/test_passes.py
index a6d822bc649..991affc4d6e 100644
--- a/tests/common/quantization/test_passes.py
+++ b/tests/common/quantization/test_passes.py
@@ -30,20 +30,25 @@ class TestModes(Enum):
 
 @pytest.mark.parametrize("mode", [TestModes.VALID, TestModes.WRONG_TENSOR_SHAPE, TestModes.WRONG_PARALLEL_EDGES])
 def test_remove_dropout_nodes_inplace(mode: TestModes):
-    dot_reference_path = Path("passes") / "dropout_synthetic_model.dot"
+    def _check_graphs(dot_file_name, nncf_graph) -> None:
+        nx_graph = nncf_graph.get_graph_for_structure_analysis()
+        path_to_dot = DATA_ROOT / dot_file_name
+        compare_nx_graph_with_reference(nx_graph, path_to_dot, check_edge_attrs=True)
+
+    dot_reference_path_before = Path("passes") / "dropout_synthetic_model_before.dot"
+    dot_reference_path_after = Path("passes") / "dropout_synthetic_model_after.dot"
     dropout_metatype = "DROPOUT_METATYPE"
     kwargs = {}
     if mode != TestModes.VALID:
         kwargs.update({mode.value: True})
 
     nncf_graph = NNCFGraphDropoutRemovingCase(dropout_metatype, **kwargs).nncf_graph
+
     if mode != TestModes.VALID:
         with pytest.raises(AssertionError):
             remove_dropout_nodes(nncf_graph, [dropout_metatype])
         return
 
+    _check_graphs(dot_reference_path_before, nncf_graph)
     remove_dropout_nodes(nncf_graph, [dropout_metatype])
-
-    nx_graph = nncf_graph.get_graph_for_structure_analysis()
-    path_to_dot = DATA_ROOT / dot_reference_path
-    compare_nx_graph_with_reference(nx_graph, path_to_dot, check_edge_attrs=True)
+    _check_graphs(dot_reference_path_after, nncf_graph)
diff --git a/tests/post_training/test_templates/models.py b/tests/post_training/test_templates/models.py
index efd13cb4a0c..24203fbeb21 100644
--- a/tests/post_training/test_templates/models.py
+++ b/tests/post_training/test_templates/models.py
@@ -288,7 +288,7 @@ def __init__(
             parallel_input_port_ids=list(range(2, 10)),
         )
         if wrong_parallel_edges:
-            dropout_4 = self.nncf_graph.add_nncf_node("100 /dropoiut", "dropout", dropout_metatype)
+            dropout_4 = self.nncf_graph.add_nncf_node("100 /dropout", "dropout", dropout_metatype)
             self.nncf_graph.add_edge_between_nncf_nodes(
                 self.nncf_graph.get_node_by_key("0 /Input_1_0").node_id,
                 dropout_4.node_id,

From a9d99d05b9612453c198696778152fa2d2397872 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Wed, 18 Oct 2023 10:25:16 +0200
Subject: [PATCH 16/18] Clean

---
 nncf/common/graph/graph.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nncf/common/graph/graph.py b/nncf/common/graph/graph.py
index 4f640ffe9bc..756f3d3718e 100644
--- a/nncf/common/graph/graph.py
+++ b/nncf/common/graph/graph.py
@@ -609,8 +609,8 @@ def get_graph_for_structure_analysis(self, extended: bool = False) -> nx.DiGraph
                 label["shape"] = edge[NNCFGraph.ACTIVATION_SHAPE_EDGE_ATTR]
 
             if label:
-                if len(label) == 1 and extended:
-                    attrs_edge["label"] = label.popitem()[1]
+                if "shape" in label and len(label) == 1:
+                    attrs_edge["label"] = label["shape"]
                 else:
                     attrs_edge["label"] = ", ".join((f"{k}:{v}" for k, v in label.items()))
             out_graph.add_edge(u, v, **attrs_edge)

From 03c2b5e0cb9ec0329e271c406bdd364f34a63a78 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Thu, 19 Oct 2023 17:03:13 +0200
Subject: [PATCH 17/18]  function remove_dropout_node is refactored

---
 nncf/quantization/passes.py              | 23 ++++++++++++-----------
 tests/common/quantization/test_passes.py |  8 ++++----
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/nncf/quantization/passes.py b/nncf/quantization/passes.py
index 32ad5fc3ebf..f31a38281bf 100644
--- a/nncf/quantization/passes.py
+++ b/nncf/quantization/passes.py
@@ -39,7 +39,7 @@ def transform_to_inference_graph(
     """
     inference_graph = deepcopy(nncf_graph)
     remove_shapeof_subgraphs(inference_graph, shapeof_metatypes, read_variable_metatypes)
-    remove_dropout_nodes(inference_graph, dropout_metatypes)
+    remove_nodes_and_reconnect_graph(inference_graph, dropout_metatypes)
     filter_constant_nodes(inference_graph, read_variable_metatypes)
     return inference_graph
 
@@ -91,24 +91,25 @@ def remove_shapeof_subgraphs(
     nncf_graph.remove_nodes_from(nodes_to_drop)
 
 
-def remove_dropout_nodes(
+def remove_nodes_and_reconnect_graph(
     nncf_graph: NNCFGraph,
-    dropout_metatypes: List[OperatorMetatype],
+    metatypes: List[OperatorMetatype],
 ) -> None:
     """
-    Removes the Dropout nodes from the provided NNCFGraph instance and
-        connects droput previous node with dropout next nodes inplace
-        for each dropout node.
+    Removes nodes with metatypes specified by `metatypes` parameter from
+    the provided NNCFGraph instance and connects previous node of a matched node
+    with next nodes of a matched node inplace for each matched node.
+    Matched nodes should have only one input node and only one output port.
 
     :param nncf_graph: NNCFGraph instance for the transformation.
-    :param dropout_metatypes: List of backend-specific Dropout metatypes.
+    :param metatypes: List of backend-specific metatypes.
     """
-    if not dropout_metatypes:
+    if not metatypes:
         return
 
     nodes_to_drop = []
-    for node in nncf_graph.get_nodes_by_metatypes(dropout_metatypes):
-        if node.metatype in dropout_metatypes:
+    for node in nncf_graph.get_nodes_by_metatypes(metatypes):
+        if node.metatype in metatypes:
             nodes_to_drop.append(node)
 
             prev_nodes = nncf_graph.get_previous_nodes(node)
@@ -122,7 +123,7 @@ def remove_dropout_nodes(
             # parallel_input_port_ids
             for output_node in nncf_graph.get_next_nodes(node):
                 output_edge = nncf_graph.get_edge(node, output_node)
-                # Connects dropout previous node with all next nodes
+                # Connects previous node with all next nodes
                 # to keep NNCFGraph connected.
                 assert input_edge.dtype == output_edge.dtype
                 assert input_edge.tensor_shape == output_edge.tensor_shape
diff --git a/tests/common/quantization/test_passes.py b/tests/common/quantization/test_passes.py
index 991affc4d6e..c744c2297a4 100644
--- a/tests/common/quantization/test_passes.py
+++ b/tests/common/quantization/test_passes.py
@@ -14,7 +14,7 @@
 
 import pytest
 
-from nncf.quantization.passes import remove_dropout_nodes
+from nncf.quantization.passes import remove_nodes_and_reconnect_graph
 from tests.post_training.test_templates.models import NNCFGraphDropoutRemovingCase
 from tests.shared.nx_graph import compare_nx_graph_with_reference
 from tests.shared.paths import TEST_ROOT
@@ -29,7 +29,7 @@ class TestModes(Enum):
 
 
 @pytest.mark.parametrize("mode", [TestModes.VALID, TestModes.WRONG_TENSOR_SHAPE, TestModes.WRONG_PARALLEL_EDGES])
-def test_remove_dropout_nodes_inplace(mode: TestModes):
+def test_remove_nodes_and_reconnect_graph(mode: TestModes):
     def _check_graphs(dot_file_name, nncf_graph) -> None:
         nx_graph = nncf_graph.get_graph_for_structure_analysis()
         path_to_dot = DATA_ROOT / dot_file_name
@@ -46,9 +46,9 @@ def _check_graphs(dot_file_name, nncf_graph) -> None:
 
     if mode != TestModes.VALID:
         with pytest.raises(AssertionError):
-            remove_dropout_nodes(nncf_graph, [dropout_metatype])
+            remove_nodes_and_reconnect_graph(nncf_graph, [dropout_metatype])
         return
 
     _check_graphs(dot_reference_path_before, nncf_graph)
-    remove_dropout_nodes(nncf_graph, [dropout_metatype])
+    remove_nodes_and_reconnect_graph(nncf_graph, [dropout_metatype])
     _check_graphs(dot_reference_path_after, nncf_graph)

From ce141758ad79fe4e158294ff074805ec587db5d5 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Thu, 19 Oct 2023 17:56:45 +0200
Subject: [PATCH 18/18] Make passes return values

---
 .../algorithms/accuracy_control/ranker.py     |  5 ++--
 .../algorithms/min_max/algorithm.py           |  2 +-
 nncf/quantization/passes.py                   | 26 +++++++++++--------
 .../quantization/test_quantizer_removal.py    |  3 +--
 .../test_templates/test_ptq_params.py         |  9 ++++---
 .../test_templates/test_quantizer_config.py   |  7 ++---
 6 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/nncf/quantization/algorithms/accuracy_control/ranker.py b/nncf/quantization/algorithms/accuracy_control/ranker.py
index 87a2bb485cf..fc946b00140 100644
--- a/nncf/quantization/algorithms/accuracy_control/ranker.py
+++ b/nncf/quantization/algorithms/accuracy_control/ranker.py
@@ -98,8 +98,9 @@ def find_groups_of_quantizers_to_rank(self, quantized_model_graph: NNCFGraph) ->
             if x.metatype in self._algo_backend.get_quantizer_metatypes()
         ]
 
-        quantized_model_graph_without_shapeof = deepcopy(quantized_model_graph)
-        remove_shapeof_subgraphs(quantized_model_graph_without_shapeof, self._algo_backend.get_shapeof_metatypes())
+        quantized_model_graph_without_shapeof = remove_shapeof_subgraphs(
+            deepcopy(quantized_model_graph), self._algo_backend.get_shapeof_metatypes()
+        )
 
         for quantizer_node in reversed(quantizers):
             if processed.get(quantizer_node.node_name, False):
diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py
index 4b1c0547f86..ddaa6755cd4 100644
--- a/nncf/quantization/algorithms/min_max/algorithm.py
+++ b/nncf/quantization/algorithms/min_max/algorithm.py
@@ -505,7 +505,7 @@ def _get_quantization_target_points(
         hw_patterns = PatternsManager.get_full_hw_pattern_graph(backend=backend, device=device, model_type=model_type)
 
         inference_nncf_graph = transform_to_inference_graph(
-            nncf_graph,
+            deepcopy(nncf_graph),
             self._backend_entity.shapeof_metatypes,
             self._backend_entity.dropout_metatypes,
             self._backend_entity.read_variable_metatypes,
diff --git a/nncf/quantization/passes.py b/nncf/quantization/passes.py
index f31a38281bf..881b7f0a8c9 100644
--- a/nncf/quantization/passes.py
+++ b/nncf/quantization/passes.py
@@ -10,7 +10,6 @@
 # limitations under the License.
 
 import collections
-from copy import deepcopy
 from typing import List, Optional, TypeVar
 
 from nncf.common.graph.graph import NNCFGraph
@@ -28,7 +27,7 @@ def transform_to_inference_graph(
     read_variable_metatypes: Optional[List[OperatorMetatype]] = None,
 ) -> NNCFGraph:
     """
-    This method contains pipeline of the passes that uses to provide inference graph without constant flows.
+    This method contains inplace pipeline of the passes that uses to provide inference graph without constant flows.
 
     :param nncf_graph: NNCFGraph instance for the transformation.
     :param shapeof_metatypes: List of backend-specific ShapeOf metatypes.
@@ -37,18 +36,17 @@ def transform_to_inference_graph(
         that also can be interpreted as inputs (ReadValue).
     :return: NNCFGraph in the inference style.
     """
-    inference_graph = deepcopy(nncf_graph)
-    remove_shapeof_subgraphs(inference_graph, shapeof_metatypes, read_variable_metatypes)
-    remove_nodes_and_reconnect_graph(inference_graph, dropout_metatypes)
-    filter_constant_nodes(inference_graph, read_variable_metatypes)
-    return inference_graph
+    remove_shapeof_subgraphs(nncf_graph, shapeof_metatypes, read_variable_metatypes)
+    remove_nodes_and_reconnect_graph(nncf_graph, dropout_metatypes)
+    filter_constant_nodes(nncf_graph, read_variable_metatypes)
+    return nncf_graph
 
 
 def remove_shapeof_subgraphs(
     nncf_graph: NNCFGraph,
     shapeof_metatypes: List[OperatorMetatype],
     read_variable_metatypes: Optional[List[OperatorMetatype]] = None,
-) -> None:
+) -> NNCFGraph:
     """
     Removes the ShapeOf subgraphs from the provided NNCFGraph instance inplace.
 
@@ -56,6 +54,7 @@ def remove_shapeof_subgraphs(
     :param shapeof_metatypes: List of backend-specific ShapeOf metatypes.
     :param read_variable_metatypes: List of backend-specific metatypes
         that also can be interpreted as inputs (ReadValue).
+    :return: NNCFGraph without ShapeOf subgraphs.
     """
     read_variable_metatypes = read_variable_metatypes if read_variable_metatypes else []
     nodes_to_drop = set()
@@ -89,12 +88,13 @@ def remove_shapeof_subgraphs(
             shape_of_queue.extend(nncf_graph.get_next_nodes(node) + nncf_graph.get_previous_nodes(node))
 
     nncf_graph.remove_nodes_from(nodes_to_drop)
+    return nncf_graph
 
 
 def remove_nodes_and_reconnect_graph(
     nncf_graph: NNCFGraph,
     metatypes: List[OperatorMetatype],
-) -> None:
+) -> NNCFGraph:
     """
     Removes nodes with metatypes specified by `metatypes` parameter from
     the provided NNCFGraph instance and connects previous node of a matched node
@@ -103,9 +103,10 @@ def remove_nodes_and_reconnect_graph(
 
     :param nncf_graph: NNCFGraph instance for the transformation.
     :param metatypes: List of backend-specific metatypes.
+    :return: Resulting NNCFGraph.
     """
     if not metatypes:
-        return
+        return nncf_graph
 
     nodes_to_drop = []
     for node in nncf_graph.get_nodes_by_metatypes(metatypes):
@@ -137,11 +138,12 @@ def remove_nodes_and_reconnect_graph(
                     parallel_input_port_ids=output_edge.parallel_input_port_ids,
                 )
     nncf_graph.remove_nodes_from(nodes_to_drop)
+    return nncf_graph
 
 
 def filter_constant_nodes(
     nncf_graph: NNCFGraph, read_variable_metatypes: Optional[List[OperatorMetatype]] = None
-) -> None:
+) -> NNCFGraph:
     """
     Removes all Constant nodes from NNCFGraph inplace, making it inference graph.
     The traversing starts from the input nodes and nodes with weights.
@@ -149,6 +151,7 @@ def filter_constant_nodes(
     :param nncf_graph: NNCFGraph instance for the transformation.
     :param read_variable_metatypes: List of backend-specific metatypes
         that also can be interpreted as inputs (ReadValue).
+    :return: NNCFGraph without Constant nodes.
     """
     read_variable_metatypes = read_variable_metatypes if read_variable_metatypes else []
     input_nodes = nncf_graph.get_input_nodes()
@@ -169,6 +172,7 @@ def filter_constant_nodes(
         nodes_queue.extend(nncf_graph.get_next_nodes(node))
     constant_nodes = [node for node in nncf_graph.get_all_nodes() if node not in visited_nodes]
     nncf_graph.remove_nodes_from(constant_nodes)
+    return nncf_graph
 
 
 def insert_null_biases_pass(model: TModel, graph: NNCFGraph) -> TModel:
diff --git a/tests/common/quantization/test_quantizer_removal.py b/tests/common/quantization/test_quantizer_removal.py
index 911056f0d64..159095fd0ae 100644
--- a/tests/common/quantization/test_quantizer_removal.py
+++ b/tests/common/quantization/test_quantizer_removal.py
@@ -226,8 +226,7 @@ def create_test_params():
 @pytest.mark.parametrize("nncf_graph,test_case", create_test_params())
 def test_find_quantizer_nodes_to_cut(nncf_graph: NNCFGraph, test_case: TestCase):
     quantizer_node = nncf_graph.get_node_by_name(test_case.node_name)
-    nncf_graph_without_shapeof = deepcopy(nncf_graph)
-    remove_shapeof_subgraphs(nncf_graph_without_shapeof, SHAPEOF_METATYPES)
+    nncf_graph_without_shapeof = remove_shapeof_subgraphs(deepcopy(nncf_graph), SHAPEOF_METATYPES)
     nodes, ops = find_quantizer_nodes_to_cut(
         nncf_graph_without_shapeof,
         quantizer_node,
diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py
index cea85dced0b..71a14d8ad76 100644
--- a/tests/post_training/test_templates/test_ptq_params.py
+++ b/tests/post_training/test_templates/test_ptq_params.py
@@ -10,6 +10,7 @@
 # limitations under the License.
 from abc import abstractmethod
 from collections import Counter
+from copy import deepcopy
 from typing import Dict
 
 import pytest
@@ -162,7 +163,7 @@ def test_quantize_outputs(self, test_params, quantize_outputs):
         hw_patterns = test_params["test_model_type_pass"]["hw_patterns"]
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
         inference_nncf_graph = transform_to_inference_graph(
-            nncf_graph,
+            deepcopy(nncf_graph),
             min_max_algo._backend_entity.shapeof_metatypes,
             min_max_algo._backend_entity.dropout_metatypes,
             min_max_algo._backend_entity.read_variable_metatypes,
@@ -187,7 +188,7 @@ def test_ignored_scopes(self, test_params, ignored_scopes_data):
         hw_patterns = test_params["test_model_type_pass"]["hw_patterns"]
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
         inference_nncf_graph = transform_to_inference_graph(
-            nncf_graph,
+            deepcopy(nncf_graph),
             min_max_algo._backend_entity.shapeof_metatypes,
             min_max_algo._backend_entity.dropout_metatypes,
             min_max_algo._backend_entity.read_variable_metatypes,
@@ -212,7 +213,7 @@ def test_model_type_pass(self, test_params, model_type):
         hw_patterns = test_params["test_model_type_pass"]["hw_patterns"]
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
         inference_nncf_graph = transform_to_inference_graph(
-            nncf_graph,
+            deepcopy(nncf_graph),
             min_max_algo._backend_entity.shapeof_metatypes,
             min_max_algo._backend_entity.dropout_metatypes,
             min_max_algo._backend_entity.read_variable_metatypes,
@@ -278,7 +279,7 @@ def test_quantization_points_overflow_fix(self, overflow_fix, affected_target_po
     @pytest.mark.parametrize("validate_scopes", (True, False))
     def test_validate_scope(self, test_params, validate_scopes):
         nncf_graph = test_params["test_model_type_pass"]["nncf_graph"]
-        inference_nncf_graph = transform_to_inference_graph(nncf_graph, [], [])
+        inference_nncf_graph = transform_to_inference_graph(deepcopy(nncf_graph), [], [])
         ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"]
         algo = MinMaxQuantization(
             ignored_scope=IgnoredScope(names=["some_node"], validate=validate_scopes),
diff --git a/tests/post_training/test_templates/test_quantizer_config.py b/tests/post_training/test_templates/test_quantizer_config.py
index 9fb050b8fe3..48e5b585084 100644
--- a/tests/post_training/test_templates/test_quantizer_config.py
+++ b/tests/post_training/test_templates/test_quantizer_config.py
@@ -10,6 +10,7 @@
 # limitations under the License.
 
 from abc import abstractmethod
+from copy import deepcopy
 from dataclasses import dataclass
 from typing import List
 
@@ -88,7 +89,7 @@ def test_default_quantizer_config(self, single_conv_nncf_graph):
         min_max_algo._backend_entity = self.get_algo_backend()
         nncf_graph = single_conv_nncf_graph.nncf_graph
         inference_nncf_graph = transform_to_inference_graph(
-            nncf_graph,
+            deepcopy(nncf_graph),
             min_max_algo._backend_entity.shapeof_metatypes,
             min_max_algo._backend_entity.dropout_metatypes,
             min_max_algo._backend_entity.read_variable_metatypes,
@@ -142,7 +143,7 @@ def test_quantizer_config_from_ptq_params_for_CPU(
         min_max_algo._backend_entity = self.get_algo_backend()
         nncf_graph = single_conv_nncf_graph.nncf_graph
         inference_nncf_graph = transform_to_inference_graph(
-            nncf_graph,
+            deepcopy(nncf_graph),
             min_max_algo._backend_entity.shapeof_metatypes,
             min_max_algo._backend_entity.dropout_metatypes,
             min_max_algo._backend_entity.read_variable_metatypes,
@@ -184,7 +185,7 @@ def test_depthwise_conv_default_quantizer_config(self, depthwise_conv_nncf_graph
         min_max_algo._backend_entity = self.get_algo_backend()
         nncf_graph = depthwise_conv_nncf_graph.nncf_graph
         inference_nncf_graph = transform_to_inference_graph(
-            nncf_graph,
+            deepcopy(nncf_graph),
             min_max_algo._backend_entity.shapeof_metatypes,
             min_max_algo._backend_entity.dropout_metatypes,
             min_max_algo._backend_entity.read_variable_metatypes,