From d87936140cb16c5436289be1608ac84cf12210dd Mon Sep 17 00:00:00 2001 From: Daniil Lyakhov Date: Tue, 24 Oct 2023 08:11:47 +0200 Subject: [PATCH] [PTQ][Torch][KQV self attention] Align FQ placement between OV and Torch backend (#2166) ### Changes * "unbing" and "__matmul__" ops are added to torch patterns * Dropout removing pass is added to function `transform_to_inference_graph` * LayerNorm and GroupNorm metatypes are added to ignored metatypes in MinMax algorithm ### Reason for changes To align quantization placement between OV and Torch backends for the following models: * timm/crossvit_9_240 * timm/deit3_small_patch16_224 * timm/swin_base_patch4_window7_224 ### Related tickets 121647 ### Tests * test_metatypes_to_ignore for quantization propagation solver --- nncf/common/graph/graph.py | 12 ++- .../algorithms/min_max/algorithm.py | 5 +- .../algorithms/min_max/backend.py | 19 +++-- .../algorithms/min_max/onnx_backend.py | 20 +++-- .../algorithms/min_max/openvino_backend.py | 20 +++-- .../algorithms/min_max/torch_backend.py | 16 +++- nncf/quantization/passes.py | 65 +++++++++++++-- nncf/torch/quantization/ignored_patterns.py | 11 ++- .../passes/dropout_synthetic_model_after.dot | 19 +++++ .../passes/dropout_synthetic_model_before.dot | 25 ++++++ tests/common/quantization/test_passes.py | 54 +++++++++++++ .../test_quantizer_propagation_solver.py | 25 ++++++ tests/post_training/reference_data.yaml | 6 +- tests/post_training/test_templates/models.py | 80 +++++++++++++++++++ .../test_templates/test_ptq_params.py | 5 +- .../test_templates/test_quantizer_config.py | 3 + 16 files changed, 345 insertions(+), 40 deletions(-) create mode 100644 tests/common/data/reference_graphs/passes/dropout_synthetic_model_after.dot create mode 100644 tests/common/data/reference_graphs/passes/dropout_synthetic_model_before.dot create mode 100644 tests/common/quantization/test_passes.py diff --git a/nncf/common/graph/graph.py b/nncf/common/graph/graph.py index 08a1fd587bd..756f3d3718e 100644 --- a/nncf/common/graph/graph.py +++ b/nncf/common/graph/graph.py @@ -597,12 +597,22 @@ def get_graph_for_structure_analysis(self, extended: bool = False) -> nx.DiGraph attrs_edge = {} u = u.replace(__RESERVED_DOT_CHARACTER, __CHARACTER_REPLACE_TO) v = v.replace(__RESERVED_DOT_CHARACTER, __CHARACTER_REPLACE_TO) + label = {} + if edge[NNCFGraph.PARALLEL_INPUT_PORT_IDS_ATTR]: + label["parallel_input_port_ids"] = edge[NNCFGraph.PARALLEL_INPUT_PORT_IDS_ATTR] + if extended: if edge[NNCFGraph.DTYPE_EDGE_ATTR] is Dtype.INTEGER: attrs_edge["style"] = "dashed" else: attrs_edge["style"] = "solid" - attrs_edge["label"] = edge[NNCFGraph.ACTIVATION_SHAPE_EDGE_ATTR] + label["shape"] = edge[NNCFGraph.ACTIVATION_SHAPE_EDGE_ATTR] + + if label: + if "shape" in label and len(label) == 1: + attrs_edge["label"] = label["shape"] + else: + attrs_edge["label"] = ", ".join((f"{k}:{v}" for k, v in label.items())) out_graph.add_edge(u, v, **attrs_edge) return out_graph diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index 1b199409683..ddaa6755cd4 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -505,7 +505,10 @@ def _get_quantization_target_points( hw_patterns = PatternsManager.get_full_hw_pattern_graph(backend=backend, device=device, model_type=model_type) inference_nncf_graph = transform_to_inference_graph( - deepcopy(nncf_graph), self._backend_entity.shapeof_metatypes, self._backend_entity.read_variable_metatypes + deepcopy(nncf_graph), + self._backend_entity.shapeof_metatypes, + self._backend_entity.dropout_metatypes, + self._backend_entity.read_variable_metatypes, ) quantizer_setup = self._get_quantizer_setup(nncf_graph, inference_nncf_graph, hw_patterns, ignored_patterns) diff --git a/nncf/quantization/algorithms/min_max/backend.py b/nncf/quantization/algorithms/min_max/backend.py index 254a1c949cf..0764885a945 100644 --- a/nncf/quantization/algorithms/min_max/backend.py +++ b/nncf/quantization/algorithms/min_max/backend.py @@ -51,23 +51,23 @@ def post_processing_metatypes(self) -> List[OperatorMetatype]: @property @abstractmethod - def shapeof_metatypes(self) -> List[OperatorMetatype]: + def conv_metatypes(self) -> List[OperatorMetatype]: """ - Property for the backend-specific ShapeOf metatypes. + Property for the backend-specific Convolution metatypes. """ @property @abstractmethod - def conv_metatypes(self) -> List[OperatorMetatype]: + def shapeof_metatypes(self) -> List[OperatorMetatype]: """ - Property for the backend-specific Convolution metatypes. + Property for the backend-specific ShapeOf metatypes. """ @property @abstractmethod - def overflow_fix_metatypes(self) -> List[OperatorMetatype]: + def dropout_metatypes(self) -> List[OperatorMetatype]: """ - Property for the backend-specific metatypes for which overflow_fix is applicable. + Property for the backend-specific Dropout metatypes. """ @property @@ -77,6 +77,13 @@ def read_variable_metatypes(self) -> List[OperatorMetatype]: Property for the backend-specific metatypes that also can be interpreted as inputs (ReadValue). """ + @property + @abstractmethod + def overflow_fix_metatypes(self) -> List[OperatorMetatype]: + """ + Property for the backend-specific metatypes for which overflow_fix is applicable. + """ + @property @abstractmethod def add_metatypes(self) -> List[OperatorMetatype]: diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py index d3c1e25d0ae..e202252bf59 100644 --- a/nncf/quantization/algorithms/min_max/onnx_backend.py +++ b/nncf/quantization/algorithms/min_max/onnx_backend.py @@ -56,10 +56,6 @@ def mat_mul_metatypes(self) -> List[OperatorMetatype]: def post_processing_metatypes(self) -> List[OperatorMetatype]: return [om.ONNXTopKMetatype, om.ONNXNonMaxSuppressionMetatype] - @property - def shapeof_metatypes(self) -> List[OperatorMetatype]: - return [om.ONNXShapeMetatype] - @property def conv_metatypes(self) -> List[OperatorMetatype]: return [om.ONNXConvolutionMetatype] @@ -68,10 +64,6 @@ def conv_metatypes(self) -> List[OperatorMetatype]: def overflow_fix_metatypes(self) -> List[OperatorMetatype]: return [om.ONNXConvolutionMetatype, om.ONNXConvolutionTransposeMetatype, *MATMUL_METATYPES] - @property - def read_variable_metatypes(self) -> List[OperatorMetatype]: - return [] - @property def add_metatypes(self) -> List[OperatorMetatype]: return [om.ONNXAddLayerMetatype] @@ -80,6 +72,18 @@ def add_metatypes(self) -> List[OperatorMetatype]: def group_conv_metatypes(self) -> List[OperatorMetatype]: return self.conv_metatypes + @property + def shapeof_metatypes(self) -> List[OperatorMetatype]: + return [om.ONNXShapeMetatype] + + @property + def dropout_metatypes(self) -> List[OperatorMetatype]: + return [] + + @property + def read_variable_metatypes(self) -> List[OperatorMetatype]: + return [] + @property def scales_unification_map(self) -> Dict[OperatorMetatype, OperatorMetatype]: return {om.ONNXConcatMetatype: self.overflow_fix_metatypes} diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py index 4edba0c0a37..103969142c2 100644 --- a/nncf/quantization/algorithms/min_max/openvino_backend.py +++ b/nncf/quantization/algorithms/min_max/openvino_backend.py @@ -56,10 +56,6 @@ def mat_mul_metatypes(self) -> List[OperatorMetatype]: def post_processing_metatypes(self) -> List[OperatorMetatype]: return [om.OVTopKMetatype, om.OVNonMaxSuppressionMetatype] - @property - def shapeof_metatypes(self) -> List[OperatorMetatype]: - return [om.OVShapeOfMetatype] - @property def conv_metatypes(self) -> List[OperatorMetatype]: return [om.OVConvolutionMetatype] @@ -74,10 +70,6 @@ def overflow_fix_metatypes(self) -> List[OperatorMetatype]: om.OVMatMulMetatype, ] - @property - def read_variable_metatypes(self) -> List[OperatorMetatype]: - return [om.OVReadValueMetatype] - @property def add_metatypes(self) -> List[OperatorMetatype]: return [om.OVAddMetatype] @@ -86,6 +78,18 @@ def add_metatypes(self) -> List[OperatorMetatype]: def group_conv_metatypes(self) -> List[OperatorMetatype]: return [om.OVGroupConvolutionMetatype] + @property + def shapeof_metatypes(self) -> List[OperatorMetatype]: + return [om.OVShapeOfMetatype] + + @property + def dropout_metatypes(self) -> List[OperatorMetatype]: + return [] + + @property + def read_variable_metatypes(self) -> List[OperatorMetatype]: + return [om.OVReadValueMetatype] + @property def scales_unification_map(self) -> Dict[OperatorMetatype, OperatorMetatype]: return {om.OVConcatMetatype: self.overflow_fix_metatypes} diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py index e4a765a2d59..cae2c4fa6c2 100644 --- a/nncf/quantization/algorithms/min_max/torch_backend.py +++ b/nncf/quantization/algorithms/min_max/torch_backend.py @@ -69,6 +69,14 @@ def post_processing_metatypes(self) -> List[OperatorMetatype]: def shapeof_metatypes(self) -> List[OperatorMetatype]: return [] + @property + def dropout_metatypes(self) -> List[OperatorMetatype]: + return [om.PTDropoutMetatype] + + @property + def read_variable_metatypes(self) -> List[OperatorMetatype]: + return [] + @property def conv_metatypes(self) -> List[OperatorMetatype]: return [om.PTModuleConv1dMetatype, om.PTModuleConv2dMetatype, om.PTModuleConv3dMetatype] @@ -85,10 +93,6 @@ def overflow_fix_metatypes(self) -> List[OperatorMetatype]: om.PTModuleConvTranspose3dMetatype, ] - @property - def read_variable_metatypes(self) -> List[OperatorMetatype]: - return [] - @property def add_metatypes(self) -> List[OperatorMetatype]: return [om.PTAddMetatype] @@ -307,6 +311,10 @@ def get_ignored_metatypes(model_type: ModelType, device: TargetDevice) -> List[O om.PTDivMetatype, om.PTMaxMetatype, om.PTSqueezeMetatype, + om.PTLayerNormMetatype, + om.PTModuleLayerNormMetatype, + om.PTGroupNormMetatype, + om.PTModuleGroupNormMetatype, ] if device != TargetDevice.CPU_SPR: types.append(om.PTMulMetatype) diff --git a/nncf/quantization/passes.py b/nncf/quantization/passes.py index e6af74c8271..881b7f0a8c9 100644 --- a/nncf/quantization/passes.py +++ b/nncf/quantization/passes.py @@ -23,20 +23,23 @@ def transform_to_inference_graph( nncf_graph: NNCFGraph, shapeof_metatypes: List[OperatorMetatype], + dropout_metatypes: List[OperatorMetatype], read_variable_metatypes: Optional[List[OperatorMetatype]] = None, ) -> NNCFGraph: """ - This method contains pipeline of the passes that uses to provide inference graph without constant flows. + This method contains inplace pipeline of the passes that uses to provide inference graph without constant flows. :param nncf_graph: NNCFGraph instance for the transformation. :param shapeof_metatypes: List of backend-specific ShapeOf metatypes. + :param dropout_metatypes: List of backend-specific Dropout metatypes. :param read_variable_metatypes: List of backend-specific metatypes that also can be interpreted as inputs (ReadValue). :return: NNCFGraph in the inference style. """ - inference_nncf_graph = remove_shapeof_subgraphs(nncf_graph, shapeof_metatypes, read_variable_metatypes) - inference_nncf_graph = filter_constant_nodes(nncf_graph, read_variable_metatypes) - return inference_nncf_graph + remove_shapeof_subgraphs(nncf_graph, shapeof_metatypes, read_variable_metatypes) + remove_nodes_and_reconnect_graph(nncf_graph, dropout_metatypes) + filter_constant_nodes(nncf_graph, read_variable_metatypes) + return nncf_graph def remove_shapeof_subgraphs( @@ -45,7 +48,7 @@ def remove_shapeof_subgraphs( read_variable_metatypes: Optional[List[OperatorMetatype]] = None, ) -> NNCFGraph: """ - Removes the ShapeOf subgraphs from the provided NNCFGraph instance. + Removes the ShapeOf subgraphs from the provided NNCFGraph instance inplace. :param nncf_graph: NNCFGraph instance for the transformation. :param shapeof_metatypes: List of backend-specific ShapeOf metatypes. @@ -88,11 +91,61 @@ def remove_shapeof_subgraphs( return nncf_graph +def remove_nodes_and_reconnect_graph( + nncf_graph: NNCFGraph, + metatypes: List[OperatorMetatype], +) -> NNCFGraph: + """ + Removes nodes with metatypes specified by `metatypes` parameter from + the provided NNCFGraph instance and connects previous node of a matched node + with next nodes of a matched node inplace for each matched node. + Matched nodes should have only one input node and only one output port. + + :param nncf_graph: NNCFGraph instance for the transformation. + :param metatypes: List of backend-specific metatypes. + :return: Resulting NNCFGraph. + """ + if not metatypes: + return nncf_graph + + nodes_to_drop = [] + for node in nncf_graph.get_nodes_by_metatypes(metatypes): + if node.metatype in metatypes: + nodes_to_drop.append(node) + + prev_nodes = nncf_graph.get_previous_nodes(node) + input_edges = nncf_graph.get_input_edges(node) + assert len(prev_nodes) == len(input_edges) == 1 + prev_node = prev_nodes[0] + input_edge = input_edges[0] + assert not input_edge.parallel_input_port_ids + + # nncf_graph.get_next_edges is not used to preserve + # parallel_input_port_ids + for output_node in nncf_graph.get_next_nodes(node): + output_edge = nncf_graph.get_edge(node, output_node) + # Connects previous node with all next nodes + # to keep NNCFGraph connected. + assert input_edge.dtype == output_edge.dtype + assert input_edge.tensor_shape == output_edge.tensor_shape + nncf_graph.add_edge_between_nncf_nodes( + from_node_id=prev_node.node_id, + to_node_id=output_edge.to_node.node_id, + tensor_shape=input_edge.tensor_shape, + input_port_id=output_edge.input_port_id, + output_port_id=input_edge.output_port_id, + dtype=input_edge.dtype, + parallel_input_port_ids=output_edge.parallel_input_port_ids, + ) + nncf_graph.remove_nodes_from(nodes_to_drop) + return nncf_graph + + def filter_constant_nodes( nncf_graph: NNCFGraph, read_variable_metatypes: Optional[List[OperatorMetatype]] = None ) -> NNCFGraph: """ - Removes all Constant nodes from NNCFGraph, making it inference graph. + Removes all Constant nodes from NNCFGraph inplace, making it inference graph. The traversing starts from the input nodes and nodes with weights. :param nncf_graph: NNCFGraph instance for the transformation. diff --git a/nncf/torch/quantization/ignored_patterns.py b/nncf/torch/quantization/ignored_patterns.py index 3f081d1341d..0565d9a8083 100644 --- a/nncf/torch/quantization/ignored_patterns.py +++ b/nncf/torch/quantization/ignored_patterns.py @@ -69,8 +69,15 @@ def _add_softmax_reshape_matmul( @PT_IGNORED_PATTERNS.register(IgnoredPatternNames.MULTIHEAD_ATTENTION_OUTPUT) def create_multihead_attention_output() -> GraphPattern: - matmul_aliases = ["linear", "addmm", "matmul", "bmm", "mm", "baddbmm"] - reshape_squeeze_aliases = ["reshape", "view", "flatten", "squeeze", "unsqueeze", "squeeze", "flatten", "unsqueeze"] + matmul_aliases = ["linear", "addmm", "matmul", "bmm", "mm", "baddbmm", "__matmul__"] + reshape_squeeze_aliases = [ + "reshape", + "view", + "flatten", + "unsqueeze", + "squeeze", + "unbind", + ] gather_aliases = ["gather", "index_select", "where", "index_select", "__getitem__"] transpose_aliases = ["transpose", "permute", "transpose_"] diff --git a/tests/common/data/reference_graphs/passes/dropout_synthetic_model_after.dot b/tests/common/data/reference_graphs/passes/dropout_synthetic_model_after.dot new file mode 100644 index 00000000000..bb1b1c72b51 --- /dev/null +++ b/tests/common/data/reference_graphs/passes/dropout_synthetic_model_after.dot @@ -0,0 +1,19 @@ +strict digraph { +"0 /Input_1_0" [id=0, type=Input_1]; +"1 /Split_1_0" [id=1, type=Split_1]; +"5 /Output_1_0" [id=5, type=Output_1]; +"6 /Output_2_1_0" [id=6, type=Output_2_1]; +"7 /Output_2_2_0" [id=7, type=Output_2_2]; +"8 /Output_2_3_0" [id=8, type=Output_2_3]; +"9 /Output_3_0" [id=9, type=Output_3]; +"10 /Output_2_4_0" [id=10, type=output]; +"11 /Output_3_1_0" [id=11, type=output]; +"0 /Input_1_0" -> "1 /Split_1_0"; +"1 /Split_1_0" -> "5 /Output_1_0"; +"1 /Split_1_0" -> "6 /Output_2_1_0"; +"1 /Split_1_0" -> "7 /Output_2_2_0"; +"1 /Split_1_0" -> "8 /Output_2_3_0"; +"1 /Split_1_0" -> "9 /Output_3_0"; +"1 /Split_1_0" -> "10 /Output_2_4_0"; +"1 /Split_1_0" -> "11 /Output_3_1_0" [label="parallel_input_port_ids:[2, 3, 4, 5, 6, 7, 8, 9]"]; +} diff --git a/tests/common/data/reference_graphs/passes/dropout_synthetic_model_before.dot b/tests/common/data/reference_graphs/passes/dropout_synthetic_model_before.dot new file mode 100644 index 00000000000..f8c29563d5b --- /dev/null +++ b/tests/common/data/reference_graphs/passes/dropout_synthetic_model_before.dot @@ -0,0 +1,25 @@ +strict digraph { +"0 /Input_1_0" [id=0, type=Input_1]; +"1 /Split_1_0" [id=1, type=Split_1]; +"2 /Dropout_1_0" [id=2, type=Dropout_1]; +"3 /Dropout_2_0" [id=3, type=Dropout_2]; +"4 /Dropout_3_0" [id=4, type=Dropout_3]; +"5 /Output_1_0" [id=5, type=Output_1]; +"6 /Output_2_1_0" [id=6, type=Output_2_1]; +"7 /Output_2_2_0" [id=7, type=Output_2_2]; +"8 /Output_2_3_0" [id=8, type=Output_2_3]; +"9 /Output_3_0" [id=9, type=Output_3]; +"10 /Output_2_4_0" [id=10, type=output]; +"11 /Output_3_1_0" [id=11, type=output]; +"0 /Input_1_0" -> "1 /Split_1_0"; +"1 /Split_1_0" -> "2 /Dropout_1_0"; +"1 /Split_1_0" -> "3 /Dropout_2_0"; +"1 /Split_1_0" -> "4 /Dropout_3_0"; +"2 /Dropout_1_0" -> "5 /Output_1_0"; +"3 /Dropout_2_0" -> "6 /Output_2_1_0"; +"3 /Dropout_2_0" -> "7 /Output_2_2_0"; +"3 /Dropout_2_0" -> "8 /Output_2_3_0"; +"3 /Dropout_2_0" -> "10 /Output_2_4_0"; +"4 /Dropout_3_0" -> "9 /Output_3_0"; +"4 /Dropout_3_0" -> "11 /Output_3_1_0" [label="parallel_input_port_ids:[2, 3, 4, 5, 6, 7, 8, 9]"]; +} diff --git a/tests/common/quantization/test_passes.py b/tests/common/quantization/test_passes.py new file mode 100644 index 00000000000..c744c2297a4 --- /dev/null +++ b/tests/common/quantization/test_passes.py @@ -0,0 +1,54 @@ +# Copyright (c) 2023 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from enum import Enum +from pathlib import Path + +import pytest + +from nncf.quantization.passes import remove_nodes_and_reconnect_graph +from tests.post_training.test_templates.models import NNCFGraphDropoutRemovingCase +from tests.shared.nx_graph import compare_nx_graph_with_reference +from tests.shared.paths import TEST_ROOT + +DATA_ROOT = TEST_ROOT / "common" / "data" / "reference_graphs" + + +class TestModes(Enum): + VALID = "valid" + WRONG_TENSOR_SHAPE = "wrong_dropout_node" + WRONG_PARALLEL_EDGES = "wrong_parallel_edges" + + +@pytest.mark.parametrize("mode", [TestModes.VALID, TestModes.WRONG_TENSOR_SHAPE, TestModes.WRONG_PARALLEL_EDGES]) +def test_remove_nodes_and_reconnect_graph(mode: TestModes): + def _check_graphs(dot_file_name, nncf_graph) -> None: + nx_graph = nncf_graph.get_graph_for_structure_analysis() + path_to_dot = DATA_ROOT / dot_file_name + compare_nx_graph_with_reference(nx_graph, path_to_dot, check_edge_attrs=True) + + dot_reference_path_before = Path("passes") / "dropout_synthetic_model_before.dot" + dot_reference_path_after = Path("passes") / "dropout_synthetic_model_after.dot" + dropout_metatype = "DROPOUT_METATYPE" + kwargs = {} + if mode != TestModes.VALID: + kwargs.update({mode.value: True}) + + nncf_graph = NNCFGraphDropoutRemovingCase(dropout_metatype, **kwargs).nncf_graph + + if mode != TestModes.VALID: + with pytest.raises(AssertionError): + remove_nodes_and_reconnect_graph(nncf_graph, [dropout_metatype]) + return + + _check_graphs(dot_reference_path_before, nncf_graph) + remove_nodes_and_reconnect_graph(nncf_graph, [dropout_metatype]) + _check_graphs(dot_reference_path_after, nncf_graph) diff --git a/tests/common/quantization/test_quantizer_propagation_solver.py b/tests/common/quantization/test_quantizer_propagation_solver.py index 19937966b17..3193776aca1 100644 --- a/tests/common/quantization/test_quantizer_propagation_solver.py +++ b/tests/common/quantization/test_quantizer_propagation_solver.py @@ -1839,3 +1839,28 @@ def test_quantizers_are_not_set_up_for_integer_inputs(self, ip_graph_with_int_ed assert double_input_pq.current_location_node_key == InsertionPointGraph.get_pre_hook_node_key( "5 /E_0", input_port_id=1 ) + + +def test_metatypes_to_ignore(mocker): + # pylint: disable=protected-access + NOT_IGNORED_METATYHPE = "not_ignored_metatype" + IGNORED_METATYPE = "target_metatype" + + nncf_graph = NNCFGraph() + nodes = [] + for node_name, node_metatype in zip("ABC", [NOT_IGNORED_METATYHPE, IGNORED_METATYPE, NOT_IGNORED_METATYHPE]): + nodes.append(nncf_graph.add_nncf_node(node_name, node_name, node_metatype=node_metatype)) + for idx in range(1, len(nodes)): + nncf_graph.add_edge_between_nncf_nodes( + nodes[idx - 1].node_id, nodes[idx].node_id, [1, 1, 1, 1], 0, 0, Dtype.FLOAT + ) + ip_graph = InsertionPointGraph(nncf_graph=nncf_graph, weight_modifiable_node_names=["A", "B", "C"]) + + solver = QuantizerPropagationSolver( + metatypes_to_ignore=[IGNORED_METATYPE], + ) + solver._add_node_to_ignored = mocker.MagicMock() + solver.run_on_ip_graph(ip_graph) + + solver._add_node_to_ignored.assert_called_once() + assert "1 B" in solver._add_node_to_ignored.call_args[0] diff --git a/tests/post_training/reference_data.yaml b/tests/post_training/reference_data.yaml index ce3be340e0a..951b846fcb1 100644 --- a/tests/post_training/reference_data.yaml +++ b/tests/post_training/reference_data.yaml @@ -24,7 +24,7 @@ hf/hf-internal-testing/tiny-random-GPTNeoXForCausalLM_backend_OPTIMUM: # Timm timm/crossvit_9_240_backend_TORCH: - metric_value: 0.68136 + metric_value: 0.689 metric_value_fp32: 0.69966 timm/crossvit_9_240_backend_ONNX: metric_value: 0.68906 @@ -56,7 +56,7 @@ timm/deit3_small_patch16_224_backend_OLD_TORCH: metric_value: 0.76514 metric_value_fp32: 0.76974 timm/deit3_small_patch16_224_backend_TORCH: - metric_value: 0.7621 + metric_value: 0.76816 metric_value_fp32: 0.76974 timm/deit3_small_patch16_224_backend_ONNX: metric_value: 0.76806 @@ -276,7 +276,7 @@ timm/swin_base_patch4_window7_224_backend_OLD_TORCH: metric_value: 0.81376 metric_value_fp32: 0.81462 timm/swin_base_patch4_window7_224_backend_TORCH: - metric_value: 0.80696 + metric_value: 0.8131 metric_value_fp32: 0.81462 timm/swin_base_patch4_window7_224_backend_ONNX: metric_value: 0.81294 diff --git a/tests/post_training/test_templates/models.py b/tests/post_training/test_templates/models.py index 86d258066b4..24203fbeb21 100644 --- a/tests/post_training/test_templates/models.py +++ b/tests/post_training/test_templates/models.py @@ -10,6 +10,7 @@ # limitations under the License. from nncf.common.graph import NNCFGraph +from nncf.common.graph.layer_attributes import Dtype from nncf.common.graph.operator_metatypes import InputNoopMetatype from nncf.common.graph.operator_metatypes import OutputNoopMetatype from tests.common.quantization.metatypes import ConstantTestMetatype @@ -218,3 +219,82 @@ def __init__( node_edges.extend([("Conv_2", "Output_1")]) original_mock_graph = create_mock_graph(nodes, node_edges) self.nncf_graph = get_nncf_graph_from_mock_nx_graph(original_mock_graph, nncf_graph_cls) + + +class NNCFGraphDropoutRemovingCase: + def __init__( + self, + dropout_metatype, + wrong_dropout_node: bool = False, + wrong_parallel_edges: bool = False, + nncf_graph_cls=NNCFGraph, + ): + nodes = [ + NodeWithType("Input_1", InputNoopMetatype), + NodeWithType("Split_1", None), + NodeWithType( + "Dropout_1", + dropout_metatype, + ), + NodeWithType("Output_1", OutputNoopMetatype), + NodeWithType( + "Dropout_2", + dropout_metatype, + ), + NodeWithType("Output_2_1", OutputNoopMetatype), + NodeWithType("Output_2_2", OutputNoopMetatype), + NodeWithType("Output_2_3", OutputNoopMetatype), + NodeWithType( + "Dropout_3", + dropout_metatype, + ), + NodeWithType("Output_3", OutputNoopMetatype), + ] + node_edges = [ + ("Input_1", "Split_1"), + ("Split_1", "Dropout_1"), + ("Dropout_1", "Output_1"), + ("Split_1", "Dropout_2"), + ("Dropout_2", "Output_2_1"), + ("Dropout_2", "Output_2_2"), + ("Dropout_2", "Output_2_3"), + ("Split_1", "Dropout_3"), + ("Dropout_3", "Output_3"), + ] + original_mock_graph = create_mock_graph(nodes, node_edges) + self.nncf_graph = get_nncf_graph_from_mock_nx_graph(original_mock_graph, nncf_graph_cls) + + dropout_2 = self.nncf_graph.get_node_by_key("3 /Dropout_2_0") + output = self.nncf_graph.add_nncf_node("/Output_2_4_0", "output", OutputNoopMetatype) + tensor_shape = [1, 2, 1, 1] if wrong_dropout_node else [1, 1, 1, 1] + self.nncf_graph.add_edge_between_nncf_nodes( + dropout_2.node_id, + output.node_id, + tensor_shape=tensor_shape, + input_port_id=15, + output_port_id=1, + dtype=Dtype.FLOAT, + ) + + dropout_2 = self.nncf_graph.get_node_by_key("4 /Dropout_3_0") + output = self.nncf_graph.add_nncf_node("/Output_3_1_0", "output", OutputNoopMetatype) + self.nncf_graph.add_edge_between_nncf_nodes( + dropout_2.node_id, + output.node_id, + tensor_shape=tensor_shape, + input_port_id=1, + output_port_id=1, + dtype=Dtype.FLOAT, + parallel_input_port_ids=list(range(2, 10)), + ) + if wrong_parallel_edges: + dropout_4 = self.nncf_graph.add_nncf_node("100 /dropout", "dropout", dropout_metatype) + self.nncf_graph.add_edge_between_nncf_nodes( + self.nncf_graph.get_node_by_key("0 /Input_1_0").node_id, + dropout_4.node_id, + tensor_shape=[1, 1, 1, 1], + input_port_id=0, + output_port_id=0, + dtype=Dtype.FLOAT, + parallel_input_port_ids=list(range(1, 10)), + ) diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py index 8de8ce10451..71a14d8ad76 100644 --- a/tests/post_training/test_templates/test_ptq_params.py +++ b/tests/post_training/test_templates/test_ptq_params.py @@ -165,6 +165,7 @@ def test_quantize_outputs(self, test_params, quantize_outputs): inference_nncf_graph = transform_to_inference_graph( deepcopy(nncf_graph), min_max_algo._backend_entity.shapeof_metatypes, + min_max_algo._backend_entity.dropout_metatypes, min_max_algo._backend_entity.read_variable_metatypes, ) q_setup = min_max_algo._get_quantizer_setup(nncf_graph, inference_nncf_graph, hw_patterns, ignored_patterns) @@ -189,6 +190,7 @@ def test_ignored_scopes(self, test_params, ignored_scopes_data): inference_nncf_graph = transform_to_inference_graph( deepcopy(nncf_graph), min_max_algo._backend_entity.shapeof_metatypes, + min_max_algo._backend_entity.dropout_metatypes, min_max_algo._backend_entity.read_variable_metatypes, ) q_setup = min_max_algo._get_quantizer_setup(nncf_graph, inference_nncf_graph, hw_patterns, ignored_patterns) @@ -213,6 +215,7 @@ def test_model_type_pass(self, test_params, model_type): inference_nncf_graph = transform_to_inference_graph( deepcopy(nncf_graph), min_max_algo._backend_entity.shapeof_metatypes, + min_max_algo._backend_entity.dropout_metatypes, min_max_algo._backend_entity.read_variable_metatypes, ) q_setup = min_max_algo._get_quantizer_setup(nncf_graph, inference_nncf_graph, hw_patterns, ignored_patterns) @@ -276,7 +279,7 @@ def test_quantization_points_overflow_fix(self, overflow_fix, affected_target_po @pytest.mark.parametrize("validate_scopes", (True, False)) def test_validate_scope(self, test_params, validate_scopes): nncf_graph = test_params["test_model_type_pass"]["nncf_graph"] - inference_nncf_graph = transform_to_inference_graph(deepcopy(nncf_graph), []) + inference_nncf_graph = transform_to_inference_graph(deepcopy(nncf_graph), [], []) ignored_patterns = test_params["test_model_type_pass"]["ignored_patterns"] algo = MinMaxQuantization( ignored_scope=IgnoredScope(names=["some_node"], validate=validate_scopes), diff --git a/tests/post_training/test_templates/test_quantizer_config.py b/tests/post_training/test_templates/test_quantizer_config.py index 033b35377f5..48e5b585084 100644 --- a/tests/post_training/test_templates/test_quantizer_config.py +++ b/tests/post_training/test_templates/test_quantizer_config.py @@ -91,6 +91,7 @@ def test_default_quantizer_config(self, single_conv_nncf_graph): inference_nncf_graph = transform_to_inference_graph( deepcopy(nncf_graph), min_max_algo._backend_entity.shapeof_metatypes, + min_max_algo._backend_entity.dropout_metatypes, min_max_algo._backend_entity.read_variable_metatypes, ) q_setup = min_max_algo._get_quantizer_setup( @@ -144,6 +145,7 @@ def test_quantizer_config_from_ptq_params_for_CPU( inference_nncf_graph = transform_to_inference_graph( deepcopy(nncf_graph), min_max_algo._backend_entity.shapeof_metatypes, + min_max_algo._backend_entity.dropout_metatypes, min_max_algo._backend_entity.read_variable_metatypes, ) if signed_weights is False or signed_activations in [True, False]: # Incompatible with HW CPU config @@ -185,6 +187,7 @@ def test_depthwise_conv_default_quantizer_config(self, depthwise_conv_nncf_graph inference_nncf_graph = transform_to_inference_graph( deepcopy(nncf_graph), min_max_algo._backend_entity.shapeof_metatypes, + min_max_algo._backend_entity.dropout_metatypes, min_max_algo._backend_entity.read_variable_metatypes, ) q_setup = min_max_algo._get_quantizer_setup(