From 96de397815eb2599cab457aa2e9bb1677369bec2 Mon Sep 17 00:00:00 2001 From: Liubov Talamanova Date: Tue, 7 Nov 2023 15:13:06 +0000 Subject: [PATCH] Add Concat to _add_softmax_matmul ignored pattern (#2244) ### Changes Added `Concat` to `MULTIHEAD_ATTENTION_OUTPUT` ignored pattern for OV, ONNX, Torch backends ### Reason for changes To improve accuracy of https://huggingface.co/EleutherAI/gpt-neo-1.3B model ### Related tickets * 117617 --- nncf/onnx/quantization/ignored_patterns.py | 18 +++++++------ .../openvino/quantization/ignored_patterns.py | 23 +++++++++++------ nncf/torch/quantization/ignored_patterns.py | 25 ++++++++++++++----- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/nncf/onnx/quantization/ignored_patterns.py b/nncf/onnx/quantization/ignored_patterns.py index fad3841f34a..e18e7b39047 100644 --- a/nncf/onnx/quantization/ignored_patterns.py +++ b/nncf/onnx/quantization/ignored_patterns.py @@ -19,18 +19,19 @@ def _add_softmax_matmul(pattern: GraphPattern) -> None: - # SOFTMAX RESHAPE||TRANSPOSE||GATHER||SQUEEZE + # SOFTMAX RESHAPE||TRANSPOSE||GATHER||SQUEEZE||CONCAT # \ / # \ / # \ / # \ / # \ / # MATMUL - reshape_transpose_gather_squeeze = [ + branch_matmul_nodes = [ om.ONNXReshapeMetatype, om.ONNXTransposeMetatype, om.ONNXGatherMetatype, om.ONNXSqueezeMetatype, + om.ONNXConcatMetatype, ] softmax = pattern.add_node( **{GraphPattern.LABEL_ATTR: "SOFTMAX", GraphPattern.METATYPE_ATTR: om.ONNXSoftmaxMetatype} @@ -38,8 +39,8 @@ def _add_softmax_matmul(pattern: GraphPattern) -> None: matmul = pattern.add_node(**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: MATMUL_METATYPES}) matmul_branch_nodes = pattern.add_node( **{ - GraphPattern.LABEL_ATTR: "RESHAPE||TRANSPOSE||GATHER||SQUEEZE", - GraphPattern.METATYPE_ATTR: reshape_transpose_gather_squeeze, + GraphPattern.LABEL_ATTR: "RESHAPE||TRANSPOSE||GATHER||SQUEEZE||CONCAT", + GraphPattern.METATYPE_ATTR: branch_matmul_nodes, } ) pattern.add_edge(softmax, matmul) @@ -51,7 +52,7 @@ def _add_softmax_reshape_matmul(pattern: GraphPattern) -> None: # \ # \ # \ - # RESHAPE RESHAPE||TRANSPOSE||GATHER||SQUEEZE + # RESHAPE RESHAPE||TRANSPOSE||GATHER||SQUEEZE||CONCAT # \ / # \ / # \ / @@ -59,11 +60,12 @@ def _add_softmax_reshape_matmul(pattern: GraphPattern) -> None: # \ / # \ / # MATMUL - reshape_transpose_gather_squeeze = [ + branch_matmul_nodes = [ om.ONNXReshapeMetatype, om.ONNXTransposeMetatype, om.ONNXGatherMetatype, om.ONNXSqueezeMetatype, + om.ONNXConcatMetatype, ] softmax = pattern.add_node( **{GraphPattern.LABEL_ATTR: "SOFTMAX", GraphPattern.METATYPE_ATTR: om.ONNXSoftmaxMetatype} @@ -74,8 +76,8 @@ def _add_softmax_reshape_matmul(pattern: GraphPattern) -> None: matmul = pattern.add_node(**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: MATMUL_METATYPES}) matmul_branch_nodes = pattern.add_node( **{ - GraphPattern.LABEL_ATTR: "RESHAPE||TRANSPOSE||GATHER||SQUEEZE", - GraphPattern.METATYPE_ATTR: reshape_transpose_gather_squeeze, + GraphPattern.LABEL_ATTR: "RESHAPE||TRANSPOSE||GATHER||SQUEEZE||CONCAT", + GraphPattern.METATYPE_ATTR: branch_matmul_nodes, } ) pattern.add_edge(softmax, reshape) diff --git a/nncf/openvino/quantization/ignored_patterns.py b/nncf/openvino/quantization/ignored_patterns.py index 6868c6ee7c3..25591dcd635 100644 --- a/nncf/openvino/quantization/ignored_patterns.py +++ b/nncf/openvino/quantization/ignored_patterns.py @@ -18,25 +18,26 @@ def _add_softmax_matmul(pattern: GraphPattern) -> None: - # SOFTMAX RESHAPE||TRANSPOSE||GATHER||SQUEEZE + # SOFTMAX RESHAPE||TRANSPOSE||GATHER||SQUEEZE||CONCAT # \ / # \ / # \ / # \ / # \ / # MATMUL - reshape_transpose_gather_squeeze = [ + branch_matmul_nodes = [ om.OVReshapeMetatype, om.OVTransposeMetatype, om.OVGatherMetatype, om.OVSqueezeMetatype, + om.OVConcatMetatype, ] softmax = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SOFTMAX", GraphPattern.METATYPE_ATTR: om.OVSoftmaxMetatype}) matmul = pattern.add_node(**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: om.OVMatMulMetatype}) matmul_branch_nodes = pattern.add_node( **{ - GraphPattern.LABEL_ATTR: "RESHAPE||TRANSPOSE||GATHER||SQUEEZE", - GraphPattern.METATYPE_ATTR: reshape_transpose_gather_squeeze, + GraphPattern.LABEL_ATTR: "RESHAPE||TRANSPOSE||GATHER||SQUEEZE||CONCAT", + GraphPattern.METATYPE_ATTR: branch_matmul_nodes, } ) pattern.add_edge(softmax, matmul) @@ -48,7 +49,7 @@ def _add_softmax_reshape_matmul(pattern: GraphPattern) -> None: # \ # \ # \ - # RESHAPE RESHAPE||TRANSPOSE||GATHER||SQUEEZE + # RESHAPE RESHAPE||TRANSPOSE||GATHER||SQUEEZE||CONCAT # \ / # \ / # \ / @@ -56,14 +57,20 @@ def _add_softmax_reshape_matmul(pattern: GraphPattern) -> None: # \ / # \ / # MATMUL - reshape_transpose_gather = [om.OVReshapeMetatype, om.OVTransposeMetatype, om.OVGatherMetatype, om.OVSqueezeMetatype] + branch_matmul_nodes = [ + om.OVReshapeMetatype, + om.OVTransposeMetatype, + om.OVGatherMetatype, + om.OVSqueezeMetatype, + om.OVConcatMetatype, + ] softmax = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SOFTMAX", GraphPattern.METATYPE_ATTR: om.OVSoftmaxMetatype}) reshape = pattern.add_node(**{GraphPattern.LABEL_ATTR: "RESHAPE", GraphPattern.METATYPE_ATTR: om.OVReshapeMetatype}) matmul = pattern.add_node(**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: om.OVMatMulMetatype}) matmul_branch_nodes = pattern.add_node( **{ - GraphPattern.LABEL_ATTR: "RESHAPE||TRANSPOSE||GATHER||SQUEEZE", - GraphPattern.METATYPE_ATTR: reshape_transpose_gather, + GraphPattern.LABEL_ATTR: "RESHAPE||TRANSPOSE||GATHER||SQUEEZE||CONCAT", + GraphPattern.METATYPE_ATTR: branch_matmul_nodes, } ) pattern.add_edge(softmax, reshape) diff --git a/nncf/torch/quantization/ignored_patterns.py b/nncf/torch/quantization/ignored_patterns.py index 0565d9a8083..c5e87f61e28 100644 --- a/nncf/torch/quantization/ignored_patterns.py +++ b/nncf/torch/quantization/ignored_patterns.py @@ -18,16 +18,21 @@ def _add_softmax_matmul( - pattern: GraphPattern, matmul_aliases, reshape_squeeze_aliases, gather_aliases, transpose_aliases + pattern: GraphPattern, + matmul_aliases, + reshape_squeeze_aliases, + gather_aliases, + transpose_aliases, + concat_aliases, ) -> None: - # SOFTMAX RESHAPE||TRANSPOSE||GATHER||SQUEEZE + # SOFTMAX RESHAPE||TRANSPOSE||GATHER||SQUEEZE||CONCAT # \ / # \ / # \ / # \ / # \ / # MATMUL - branch_matmul_nodes = reshape_squeeze_aliases + gather_aliases + transpose_aliases + branch_matmul_nodes = reshape_squeeze_aliases + gather_aliases + transpose_aliases + concat_aliases softmax = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SOFTMAX", GraphPattern.METATYPE_ATTR: "softmax"}) matmul = pattern.add_node(**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: matmul_aliases}) matmul_branch_nodes = pattern.add_node( @@ -38,13 +43,18 @@ def _add_softmax_matmul( def _add_softmax_reshape_matmul( - pattern: GraphPattern, matmul_aliases, reshape_squeeze_aliases, gather_aliases, transpose_aliases + pattern: GraphPattern, + matmul_aliases, + reshape_squeeze_aliases, + gather_aliases, + transpose_aliases, + concat_aliases, ) -> None: # SOFTMAX # \ # \ # \ - # RESHAPE RESHAPE||TRANSPOSE||GATHER||SQUEEZE + # RESHAPE RESHAPE||TRANSPOSE||GATHER||SQUEEZE||CONCAT # \ / # \ / # \ / @@ -52,7 +62,7 @@ def _add_softmax_reshape_matmul( # \ / # \ / # MATMUL - branch_matmul_nodes = reshape_squeeze_aliases + gather_aliases + transpose_aliases + branch_matmul_nodes = reshape_squeeze_aliases + gather_aliases + transpose_aliases + concat_aliases softmax = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SOFTMAX", GraphPattern.METATYPE_ATTR: "softmax"}) reshape = pattern.add_node( **{GraphPattern.LABEL_ATTR: "RESHAPE", GraphPattern.METATYPE_ATTR: reshape_squeeze_aliases} @@ -80,6 +90,7 @@ def create_multihead_attention_output() -> GraphPattern: ] gather_aliases = ["gather", "index_select", "where", "index_select", "__getitem__"] transpose_aliases = ["transpose", "permute", "transpose_"] + concat_aliases = ["cat", "stack"] pattern = GraphPattern() _add_softmax_matmul( @@ -88,6 +99,7 @@ def create_multihead_attention_output() -> GraphPattern: reshape_squeeze_aliases=reshape_squeeze_aliases, gather_aliases=gather_aliases, transpose_aliases=transpose_aliases, + concat_aliases=concat_aliases, ) _add_softmax_reshape_matmul( pattern, @@ -95,6 +107,7 @@ def create_multihead_attention_output() -> GraphPattern: reshape_squeeze_aliases=reshape_squeeze_aliases, gather_aliases=gather_aliases, transpose_aliases=transpose_aliases, + concat_aliases=concat_aliases, ) return pattern