From 3ccbd949e2a4ed7cba897899a33a791afb8a5ecb Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 24 Aug 2023 11:56:39 +0200 Subject: [PATCH 01/21] remove ONNXGraph --- nncf/onnx/graph/metatypes/onnx_metatypes.py | 19 +- nncf/onnx/graph/model_transformer.py | 64 ++-- nncf/onnx/graph/nncf_graph_builder.py | 106 +++--- nncf/onnx/graph/node_utils.py | 14 +- nncf/onnx/graph/onnx_graph.py | 321 ----------------- nncf/onnx/graph/onnx_helper.py | 327 ++++++++++++++++++ nncf/onnx/statistics/aggregator.py | 12 +- .../bias_correction/onnx_backend.py | 8 +- tests/onnx/quantization/common.py | 11 +- .../test_qdq_params_calculation.py | 10 +- tests/onnx/test_model_transformer.py | 21 +- tests/onnx/weightless_model.py | 5 +- 12 files changed, 463 insertions(+), 455 deletions(-) delete mode 100644 nncf/onnx/graph/onnx_graph.py create mode 100644 nncf/onnx/graph/onnx_helper.py diff --git a/nncf/onnx/graph/metatypes/onnx_metatypes.py b/nncf/onnx/graph/metatypes/onnx_metatypes.py index f3c44a0b9ad..0d6ac8e4f67 100644 --- a/nncf/onnx/graph/metatypes/onnx_metatypes.py +++ b/nncf/onnx/graph/metatypes/onnx_metatypes.py @@ -16,7 +16,9 @@ from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.operator_metatypes import OperatorMetatypeRegistry from nncf.common.hardware.opset import HWConfigOpName -from nncf.onnx.graph.onnx_graph import ONNXGraph +from nncf.onnx.graph.onnx_helper import get_parent +from nncf.onnx.graph.onnx_helper import get_tensor +from nncf.onnx.graph.onnx_helper import has_tensor ONNX_OPERATION_METATYPES = OperatorMetatypeRegistry("onnx_operator_metatypes") @@ -690,7 +692,7 @@ def get_bias_tensor_port_id(metatype: ONNXOpWithWeightsMetatype) -> Optional[int return None -def get_tensor_edge_name(onnx_graph: ONNXGraph, node: onnx.NodeProto, port_id: int) -> Optional[str]: +def get_tensor_edge_name(model: onnx.ModelProto, node: onnx.NodeProto, port_id: int) -> Optional[str]: """ Returns an edge name associated with a weight of a node laying on an input port_id. @@ -707,7 +709,7 @@ def get_tensor_edge_name(onnx_graph: ONNXGraph, node: onnx.NodeProto, port_id: i ONNXTransposeMetatype ONNXQuantizeLinearMetatype - :param onnx_graph: ONNXGraph. + :param model: ONNX model. :param node: Node. :param port_id: Port id on which a weight edge is seeking. :return: Edge name associated with a weight. @@ -720,14 +722,14 @@ def get_tensor_edge_name(onnx_graph: ONNXGraph, node: onnx.NodeProto, port_id: i + ONNXDequantizeLinearMetatype.get_all_aliases() ) END_NODES = ONNXConstantMetatype.get_all_aliases() - parent = onnx_graph.get_parent(node, port_id) + parent = get_parent(model, node, port_id) if not parent: - if onnx_graph.has_tensor(node.input[port_id]): + if has_tensor(model, node.input[port_id]): return node.input[port_id] elif parent.op_type in END_NODES: return node.input[port_id] elif parent.op_type in PROPAGATING_NODES: - return get_tensor_edge_name(onnx_graph, parent, 0) + return get_tensor_edge_name(model, parent, 0) return None @@ -776,12 +778,11 @@ def _is_embedding(model: onnx.ModelProto, node: onnx.NodeProto) -> bool: :return: True if the layer is embedding, False - otherwise. """ tensor_port_id = ONNXEmbeddingMetatype.weight_port_ids[0] - onnx_graph = ONNXGraph(model) allowed_types_list = ["TensorProto.FLOAT"] - weight_edge_name = get_tensor_edge_name(onnx_graph, node, tensor_port_id) + weight_edge_name = get_tensor_edge_name(model, node, tensor_port_id) if weight_edge_name is not None: - tensor_data_type = onnx_graph.get_tensor(weight_edge_name).data_type + tensor_data_type = get_tensor(model, weight_edge_name).data_type if onnx.helper.tensor_dtype_to_string(tensor_data_type) in allowed_types_list: return True return False diff --git a/nncf/onnx/graph/model_transformer.py b/nncf/onnx/graph/model_transformer.py index a8f5355babf..2211209100d 100644 --- a/nncf/onnx/graph/model_transformer.py +++ b/nncf/onnx/graph/model_transformer.py @@ -19,7 +19,15 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout from nncf.onnx.graph.node_utils import get_input_edge -from nncf.onnx.graph.onnx_graph import ONNXGraph +from nncf.onnx.graph.onnx_helper import get_children +from nncf.onnx.graph.onnx_helper import get_edge +from nncf.onnx.graph.onnx_helper import get_edge_dtype +from nncf.onnx.graph.onnx_helper import get_model_outputs +from nncf.onnx.graph.onnx_helper import get_node_by_name +from nncf.onnx.graph.onnx_helper import get_node_edge_names +from nncf.onnx.graph.onnx_helper import get_node_index +from nncf.onnx.graph.onnx_helper import get_nodes_by_input +from nncf.onnx.graph.onnx_helper import get_tensor from nncf.onnx.graph.transformations.commands import ONNXBiasCorrectionCommand from nncf.onnx.graph.transformations.commands import ONNXModelExtractionCommand from nncf.onnx.graph.transformations.commands import ONNXOutputInsertionCommand @@ -48,7 +56,6 @@ def _get_target_edge( port_id: int, node_name: str, transform_type: TargetType, - onnx_graph: ONNXGraph, input_edges_mapping: Dict[str, str], ) -> str: """ @@ -57,16 +64,15 @@ def _get_target_edge( :param port_id: Edge number of port. :param node_name: Node name. :param transform_type: Type of transformation. - :param onnx_graph: ONNXGraph. :param input_edges_mapping: Mapping between NNCF Input nodes and the following ONNX nodes and corresponding input port id. :return: Target edge name. """ if transform_type in [TargetType.PRE_LAYER_OPERATION, TargetType.OPERATION_WITH_WEIGHTS]: - return onnx_graph.get_node_edge_names(node_name)["input"][port_id] + return get_node_edge_names(self._model, node_name)["input"][port_id] if node_name in input_edges_mapping: # ADD INPUT NODE CASE - return get_input_edge(node_name, input_edges_mapping, onnx_graph) - return onnx_graph.get_node_edge_names(node_name)["output"][port_id] + return get_input_edge(node_name, input_edges_mapping, self._model) + return get_node_edge_names(self._model, node_name)["output"][port_id] def transform(self, transformation_layout: TransformationLayout) -> onnx.ModelProto: """ @@ -123,16 +129,13 @@ def _apply_output_insertion_transformations( :param transformations: ONNXOutputInsertionCommand transformations. :return: New model with inserted outputs. """ - onnx_graph = ONNXGraph(self._model) - model_outputs = set(output.name for output in onnx_graph.get_model_outputs()) + model_outputs = set(output.name for output in get_model_outputs(self._model)) for transformation in transformations: port_id = transformation.target_point.port_id node_name = transformation.target_point.target_node_name transform_type = transformation.target_point.type input_edges_mapping = transformation.input_edges_mapping - target_edge_name = self._get_target_edge( - port_id, node_name, transform_type, onnx_graph, input_edges_mapping - ) + target_edge_name = self._get_target_edge(port_id, node_name, transform_type, input_edges_mapping) model_outputs.add(target_edge_name) return ONNXModelTransformer._insert_outputs(self._model, outputs=model_outputs) @@ -146,11 +149,10 @@ def _insert_outputs(model: onnx.ModelProto, outputs: Union[List[str], Set[str]]) :param outputs: Edge names to use as outputs. :return: New model with inserted outputs. """ - onnx_graph = ONNXGraph(model) model_outputs = [] for output in outputs: - edge = onnx_graph.get_edge(output) - onnx_dtype = ONNXGraph.get_edge_dtype(edge) + edge = get_edge(model, output) + onnx_dtype = get_edge_dtype(edge) type_proto = onnx.helper.make_tensor_type_proto(onnx_dtype, shape=None) model_outputs.append(onnx.helper.make_value_info(name=output, type_proto=type_proto)) @@ -273,21 +275,18 @@ def _get_scale_zero_point_tensors( ) return onnx_scale_tensor, onnx_zero_point_tensor - def _get_quantizer_dequantizer_edge_name( - self, transformation: ONNXQuantizerInsertionCommand, onnx_graph: ONNXGraph - ) -> str: + def _get_quantizer_dequantizer_edge_name(self, transformation: ONNXQuantizerInsertionCommand) -> str: """ Returns an edge name on which QuantizeLinear-DequantizeLinear nodes pair has to be inserted. :param transformation: QuantizeLinear-DequantizeLinear insertion transformation. - :param onnx_graph: ONNXGraph. :return: Edge name to insert QuantizeLinear-DequantizeLinear nodes pair. """ port_id = transformation.target_point.port_id node_name = transformation.target_point.target_node_name transform_type = transformation.target_point.type input_edges_mapping = transformation.input_edges_mapping - target_edge_name = self._get_target_edge(port_id, node_name, transform_type, onnx_graph, input_edges_mapping) + target_edge_name = self._get_target_edge(port_id, node_name, transform_type, input_edges_mapping) self._added_target_edges[target_edge_name] += 1 return target_edge_name @@ -301,8 +300,7 @@ def _insert_quantizer_dequantizer( :param transformation: QuantizeLinear-DequantizeLinear insertion transformation. :return: Updated model with inserted QuantizeLinear-DequantizeLinear pair. """ - onnx_graph = ONNXGraph(model) - target_edge_name = self._get_quantizer_dequantizer_edge_name(transformation, onnx_graph) + target_edge_name = self._get_quantizer_dequantizer_edge_name(transformation) quantizer, dequantizer = self._get_quantize_dequantize_nodes(transformation, target_edge_name) onnx_scale_tensor, onnx_zero_point_tensor = ONNXModelTransformer._get_scale_zero_point_tensors( transformation, quantizer, dequantizer @@ -310,7 +308,7 @@ def _insert_quantizer_dequantizer( # If several nodes on one edge input_nodes = [] - input_nodes.extend(onnx_graph.get_nodes_by_input(target_edge_name)) + input_nodes.extend(get_nodes_by_input(model, target_edge_name)) if not input_nodes: raise RuntimeError( f"Can not add the quantizer to the {target_edge_name} edge. This edge does not have end node." @@ -318,7 +316,7 @@ def _insert_quantizer_dequantizer( if transformation.target_point.type == TargetType.PRE_LAYER_OPERATION: # If we need to change only target nodes input - target_node = onnx_graph.get_node_by_name(transformation.target_point.target_node_name) + target_node = get_node_by_name(model, transformation.target_point.target_node_name) for i, inp in enumerate(target_node.input): if inp == target_edge_name: target_node.input[i] = dequantizer.output[0] @@ -336,7 +334,7 @@ def _insert_quantizer_dequantizer( ) model.graph.initializer.extend([onnx_scale_tensor, onnx_zero_point_tensor]) model.graph.value_info.extend([onnx_scale_value_info, onnx_zero_point_info]) - insert_index = onnx_graph.get_node_index(input_nodes[0].name) + insert_index = get_node_index(model, input_nodes[0].name) model.graph.node.insert(insert_index, quantizer) model.graph.node.insert(insert_index + 1, dequantizer) return model @@ -351,13 +349,12 @@ def _apply_bias_correction_transformations( :param transformations: Bias correction transformations. :return: Copy of original model with updated biases. """ - onnx_graph = ONNXGraph(model) for transformation in transformations: bias_tensor_position = transformation.target_point.port_id node_name = transformation.target_point.target_node_name - onnx_node = onnx_graph.get_node_by_name(node_name) + onnx_node = get_node_by_name(model, node_name) bias_initializer_name = onnx_node.input[bias_tensor_position] - bias_initializer = onnx_graph.get_tensor(bias_initializer_name) + bias_initializer = get_tensor(model, bias_initializer_name) new_bias_tensor = onnx.numpy_helper.from_array(transformation.bias_value, bias_initializer_name) bias_initializer.CopyFrom(new_bias_tensor) @@ -370,20 +367,18 @@ def _apply_model_extraction_transformation(self, transformation: ONNXModelExtrac :param transformation: Model extraction transformation. :return: Extracted sub-model. """ - onnx_graph = ONNXGraph(self._model) - input_tensor_names = [] for input_node_name in transformation.inputs: - input_onnx_node = onnx_graph.get_node_by_name(input_node_name) + input_onnx_node = get_node_by_name(self._model, input_node_name) input_tensor_names.append(input_onnx_node.input[0]) output_tensor_names = [] for output_node_name in transformation.outputs: - output_onnx_node = onnx_graph.get_node_by_name(output_node_name) + output_onnx_node = get_node_by_name(self._model, output_node_name) output_tensor_names.append(output_onnx_node.output[0]) if not output_tensor_names: - output_tensor_names = [n.name for n in onnx_graph.get_model_outputs()] + output_tensor_names = [n.name for n in get_model_outputs(self._model)] return self.onnx_model_extractor.extract_model(input_tensor_names, output_tensor_names) @@ -397,11 +392,10 @@ def _apply_qdq_node_removing_transformations( :param transformations: Nodes removing transformations. :return: Model with removed nodes. """ - onnx_graph = ONNXGraph(model) for transformation in transformations: - node = onnx_graph.get_node_by_name(transformation.target_point.target_node_name) + node = get_node_by_name(model, transformation.target_point.target_node_name) - node_children = onnx_graph.get_children(node) + node_children = get_children(model, node) for node_child in node_children: for input_id, input_obj in enumerate(node_child.input): if input_obj == node.output[0]: diff --git a/nncf/onnx/graph/nncf_graph_builder.py b/nncf/onnx/graph/nncf_graph_builder.py index ebf438bdcbc..f82604fc2f4 100644 --- a/nncf/onnx/graph/nncf_graph_builder.py +++ b/nncf/onnx/graph/nncf_graph_builder.py @@ -27,7 +27,19 @@ from nncf.onnx.graph.metatypes.onnx_metatypes import get_metatype from nncf.onnx.graph.metatypes.onnx_metatypes import get_possible_weight_port_ids from nncf.onnx.graph.metatypes.onnx_metatypes import get_tensor_edge_name -from nncf.onnx.graph.onnx_graph import ONNXGraph +from nncf.onnx.graph.onnx_helper import get_all_nodes +from nncf.onnx.graph.onnx_helper import get_edge +from nncf.onnx.graph.onnx_helper import get_edge_dtype +from nncf.onnx.graph.onnx_helper import get_edge_shape +from nncf.onnx.graph.onnx_helper import get_input_port_id_for_node_after_input +from nncf.onnx.graph.onnx_helper import get_model_inputs +from nncf.onnx.graph.onnx_helper import get_model_outputs +from nncf.onnx.graph.onnx_helper import get_node_by_output +from nncf.onnx.graph.onnx_helper import get_node_edge_names +from nncf.onnx.graph.onnx_helper import get_nodes_by_input +from nncf.onnx.graph.onnx_helper import get_output_port_id_for_node_before_output +from nncf.onnx.graph.onnx_helper import get_port_ids_between_nodes +from nncf.onnx.graph.onnx_helper import is_node_has_shared_weight class ONNXLayerAttributes(BaseLayerAttributes): @@ -64,23 +76,23 @@ def has_node_attrs(self) -> bool: return bool(self.node_attrs) -def _get_weight_port_ids(node: onnx.NodeProto, onnx_graph: ONNXGraph) -> Set[int]: +def _get_weight_port_ids(node: onnx.NodeProto, model: onnx.ModelProto) -> Set[int]: """ Returns all weight input ports. First, add constant weight port ids from metatype. Second, add weight port ids determined dynamically if metatype could have them. :param node: ONNX node. - :param onnx_graph: ONNXGraph. + :param model: ONNX model. :return: Port ids with weights. """ port_ids = set() - metatype = get_metatype(onnx_graph.onnx_model, node) + metatype = get_metatype(model, node) constant_port_ids = get_constant_weight_port_ids(metatype) port_ids.update(constant_port_ids) possible_port_ids = get_possible_weight_port_ids(metatype) for port_id in possible_port_ids: - if get_tensor_edge_name(onnx_graph, node, port_id): + if get_tensor_edge_name(model, node, port_id): port_ids.add(port_id) return port_ids @@ -90,7 +102,7 @@ def _is_node_with_bias(node: onnx.NodeProto, model: onnx.ModelProto) -> bool: Returns True if node has bias tensor, otherwise - False. :param node: ONNX node. - :param onnx_graph: ONNXGraph. + :param model: ONNX model. :return: True if node has bias tensor, otherwise - False. """ metatype = get_metatype(model, node) @@ -100,19 +112,19 @@ def _is_node_with_bias(node: onnx.NodeProto, model: onnx.ModelProto) -> bool: return False -def _get_weight_attr(node: onnx.NodeProto, onnx_graph: ONNXGraph, weight_port_id: int) -> Dict[int, Dict]: +def _get_weight_attr(node: onnx.NodeProto, model: onnx.ModelProto, weight_port_id: int) -> Dict[int, Dict]: """ Returns weight attributes. :param node: ONNX node. - :param onnx_graph: ONNXGraph. + :param model: ONNX model. :param weight_port_ids: Port ids with weights location. :return: Weight attributes. """ weight_attrs = {} weight_edge_name = node.input[weight_port_id] - edge = onnx_graph.get_edge(weight_edge_name) - weight_shape = ONNXGraph.get_edge_shape(edge) + edge = get_edge(model, weight_edge_name) + weight_shape = get_edge_shape(edge) weight_attrs[weight_port_id] = {"name": weight_edge_name, "shape": weight_shape} return weight_attrs @@ -137,7 +149,7 @@ def _get_node_attrs(node: onnx.NodeProto, model: onnx.ModelProto) -> Dict[str, A Returns node attributes. :param node: Node. - :param onnx_graph: ONNXGraph. + :param model: ONNX model. :return : Node attributes. """ metatype = get_metatype(model, node) @@ -146,19 +158,19 @@ def _get_node_attrs(node: onnx.NodeProto, model: onnx.ModelProto) -> Dict[str, A return {} -def _get_bias_attr(node: onnx.NodeProto, onnx_graph: ONNXGraph) -> Dict[str, str]: +def _get_bias_attr(node: onnx.NodeProto, model: onnx.ModelProto) -> Dict[str, str]: """ Returns bias tensor attributes. :param node: ONNX node. - :param onnx_graph: ONNXGraph. + :param model: ONNX model. :return: Bias tensor attributes. """ bias_attrs = {} - metatype = get_metatype(onnx_graph.onnx_model, node) - if _is_node_with_bias(node, onnx_graph.onnx_model): + metatype = get_metatype(model, node) + if _is_node_with_bias(node, model): bias_tensor_port_id = get_bias_tensor_port_id(metatype) - bias_edge_name = get_tensor_edge_name(onnx_graph, node, bias_tensor_port_id) + bias_edge_name = get_tensor_edge_name(model, node, bias_tensor_port_id) bias_attrs["name"] = bias_edge_name return bias_attrs @@ -193,15 +205,15 @@ def _replace_empty_node_name(model: onnx.ModelProto) -> onnx.ModelProto: return model @staticmethod - def _add_nncf_input_nodes(onnx_graph: ONNXGraph, nncf_graph: NNCFGraph) -> None: + def _add_nncf_input_nodes(model: onnx.ModelProto, nncf_graph: NNCFGraph) -> None: """ Adds special NNCF Input nodes to NNCFGraph. For all the ONNX model inputs, the special NNCF Input node is placed and then corresponding edges are added. - :param onnx_graph: ONNXGraph, which helps to get information about the ONNX model. + :param model: ONNX model. :param nncf_graph: NNCFGraph, in which the new nodes will be added. :return: None. """ - for i, _input in enumerate(onnx_graph.get_model_inputs()): + for i, _input in enumerate(get_model_inputs(model)): input_name = _input.name layer_attributes = ONNXLayerAttributes() input_node = nncf_graph.add_nncf_node( @@ -210,18 +222,18 @@ def _add_nncf_input_nodes(onnx_graph: ONNXGraph, nncf_graph: NNCFGraph) -> None: node_metatype=InputNoopMetatype, layer_attributes=layer_attributes, ) - to_nodes = onnx_graph.get_nodes_by_input(input_name) + to_nodes = get_nodes_by_input(model, input_name) input_node_node_id = input_node.node_id - edge = onnx_graph.get_edge(input_name) - input_shape = ONNXGraph.get_edge_shape(edge) - onnx_dtype = ONNXGraph.get_edge_dtype(edge) + edge = get_edge(model, input_name) + input_shape = get_edge_shape(edge) + onnx_dtype = get_edge_dtype(edge) nncf_dtype = GraphConverter.convert_onnx_dtype_to_nncf_dtype(onnx_dtype) output_port_id = 0 for node in to_nodes: to_node_id = nncf_graph.get_node_by_name(node.name).node_id - input_port_id = ONNXGraph.get_input_port_id_for_node_after_input(input_name, node) + input_port_id = get_input_port_id_for_node_after_input(input_name, node) nncf_graph.add_edge_between_nncf_nodes( from_node_id=input_node_node_id, to_node_id=to_node_id, @@ -233,15 +245,15 @@ def _add_nncf_input_nodes(onnx_graph: ONNXGraph, nncf_graph: NNCFGraph) -> None: output_port_id += 1 @staticmethod - def _add_nncf_output_nodes(onnx_graph: ONNXGraph, nncf_graph: NNCFGraph) -> None: + def _add_nncf_output_nodes(model: onnx.ModelProto, nncf_graph: NNCFGraph) -> None: """ Adds special NNCF Output nodes to NNCFGraph. For all the ONNX model outputs, the special NNCF Output node is placed and then corresponding edges are added. - :param onnx_graph: ONNXGraph, which helps to get information about the ONNX model. + :param model: ONNX model. :param nncf_graph: NNCFGraph, in which the new nodes will be added. :return: None. """ - for i, _output in enumerate(onnx_graph.get_model_outputs()): + for i, _output in enumerate(get_model_outputs(model)): output_name = _output.name layer_attributes = ONNXLayerAttributes() output_node = nncf_graph.add_nncf_node( @@ -250,16 +262,16 @@ def _add_nncf_output_nodes(onnx_graph: ONNXGraph, nncf_graph: NNCFGraph) -> None node_metatype=OutputNoopMetatype, layer_attributes=layer_attributes, ) - from_node = onnx_graph.get_node_by_output(output_name) + from_node = get_node_by_output(model, output_name) output_node_node_id = output_node.node_id - edge = onnx_graph.get_edge(output_name) - output_shape = ONNXGraph.get_edge_shape(edge) - onnx_dtype = ONNXGraph.get_edge_dtype(edge) + edge = get_edge(model, output_name) + output_shape = get_edge_shape(edge) + onnx_dtype = get_edge_dtype(edge) nncf_dtype = GraphConverter.convert_onnx_dtype_to_nncf_dtype(onnx_dtype) input_port_id = 0 from_node_id = nncf_graph.get_node_by_name(from_node.name).node_id - output_port_id = ONNXGraph.get_output_port_id_for_node_before_output(output_name, from_node) + output_port_id = get_output_port_id_for_node_before_output(output_name, from_node) nncf_graph.add_edge_between_nncf_nodes( from_node_id=from_node_id, to_node_id=output_node_node_id, @@ -291,21 +303,21 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: :return: NNCFGraph. """ onnx_model = GraphConverter._replace_empty_node_name(onnx_model) + onnx_model = onnx.shape_inference.infer_shapes(onnx_model) nncf_graph = NNCFGraph() - onnx_graph = ONNXGraph(onnx_model) - for node in onnx_graph.get_all_nodes(): + for node in get_all_nodes(onnx_model): metatype = get_metatype(onnx_model, node) - weight_port_ids = _get_weight_port_ids(node, onnx_graph) + weight_port_ids = _get_weight_port_ids(node, onnx_model) is_shared = None weight_attrs = {} node_attrs = _get_node_attrs(node, onnx_model) - bias_attrs = _get_bias_attr(node, onnx_graph) + bias_attrs = _get_bias_attr(node, onnx_model) if weight_port_ids: # If node has weight weight_edge_names = [] for weight_port_id in weight_port_ids: weight_edge_names.append(node.input[weight_port_id]) - weight_attrs.update(_get_weight_attr(node, onnx_graph, weight_port_id)) - if not is_shared and onnx_graph.is_node_has_shared_weight(node, weight_port_id): + weight_attrs.update(_get_weight_attr(node, onnx_model, weight_port_id)) + if not is_shared and is_node_has_shared_weight(onnx_model, node, weight_port_id): is_shared = True layer_attributes = ONNXLayerAttributes( @@ -318,22 +330,22 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: layer_attributes=layer_attributes, is_shared=is_shared, ) - for output_node in onnx_graph.get_all_nodes(): - output_edges = onnx_graph.get_node_edge_names(output_node.name)["output"] + for output_node in get_all_nodes(onnx_model): + output_edges = get_node_edge_names(onnx_model, output_node.name)["output"] for output_edge in output_edges: - edge = onnx_graph.get_edge(output_edge) + edge = get_edge(onnx_model, output_edge) if edge is None: # If the edge is None it means that the edge was not added during shape inference of ONNX model. # BatchNorm exported in Training mode has unused outputs edges: mean, var, saved_mean, saved_var. # NNCFGraph should not contain such edges. continue - tensor_shape = ONNXGraph.get_edge_shape(edge) - onnx_dtype = ONNXGraph.get_edge_dtype(edge) + tensor_shape = get_edge_shape(edge) + onnx_dtype = get_edge_dtype(edge) nncf_dtype = GraphConverter.convert_onnx_dtype_to_nncf_dtype(onnx_dtype) output_node_id = nncf_graph.get_node_by_name(output_node.name).node_id - input_nodes = onnx_graph.get_nodes_by_input(output_edge) + input_nodes = get_nodes_by_input(onnx_model, output_edge) for input_node in input_nodes: - port_ids = ONNXGraph.get_port_ids_between_nodes(output_node, input_node) + port_ids = get_port_ids_between_nodes(output_node, input_node) input_port_id = port_ids["input_port_id"] output_port_id = port_ids["output_port_id"] in_node_id = nncf_graph.get_node_by_name(input_node.name).node_id @@ -345,6 +357,6 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: output_port_id=output_port_id, dtype=Dtype(nncf_dtype), ) - GraphConverter._add_nncf_input_nodes(onnx_graph, nncf_graph) - GraphConverter._add_nncf_output_nodes(onnx_graph, nncf_graph) + GraphConverter._add_nncf_input_nodes(onnx_model, nncf_graph) + GraphConverter._add_nncf_output_nodes(onnx_model, nncf_graph) return nncf_graph diff --git a/nncf/onnx/graph/node_utils.py b/nncf/onnx/graph/node_utils.py index 5312f2fdded..12a28bb9507 100644 --- a/nncf/onnx/graph/node_utils.py +++ b/nncf/onnx/graph/node_utils.py @@ -21,7 +21,8 @@ from nncf.common.tensor_statistics.collectors import ReductionShape from nncf.onnx.graph.metatypes import onnx_metatypes as om from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXDequantizeLinearMetatype -from nncf.onnx.graph.onnx_graph import ONNXGraph +from nncf.onnx.graph.onnx_helper import get_node_edge_names +from nncf.onnx.graph.onnx_helper import get_tensor_value from nncf.onnx.graph.transformations.commands import ONNXTargetPoint @@ -45,10 +46,9 @@ def get_bias_value(node_with_bias: NNCFNode, model: onnx.ModelProto) -> np.ndarr :param model: The model that contains this operation. :return: The bias value that is applied to the output tensor of the node's operation. """ - onnx_graph = ONNXGraph(model) assert node_with_bias.layer_attributes.has_bias() bias_name = node_with_bias.layer_attributes.bias_attrs["name"] - return onnx_graph.get_tensor_value(bias_name) + return get_tensor_value(model, bias_name) def get_input_edges_mapping(nncf_graph: NNCFGraph) -> Dict[str, Tuple[str, int]]: @@ -68,20 +68,22 @@ def get_input_edges_mapping(nncf_graph: NNCFGraph) -> Dict[str, Tuple[str, int]] return input_edges_mapping -def get_input_edge(input_node_name: str, input_edges_mapping: Dict[str, Tuple[str, int]], onnx_graph: ONNXGraph) -> str: +def get_input_edge( + input_node_name: str, input_edges_mapping: Dict[str, Tuple[str, int]], model: onnx.ModelProto +) -> str: """ Returns input edge corresponding to the NNCF input node with the name input_node_name. :param input_node_name: Name of NNCF input node. :param input_edges_mapping: A mapping of NNCF input node names and a tuple with the consumed node names and their input port ids. - :param onnx_graph: Instance of ONNXGraph of the model. + :param model: ONNX model. :return: Input edge name. """ input_edges = set() for node_info in input_edges_mapping[input_node_name]: name, port_id = node_info - input_edges.add(onnx_graph.get_node_edge_names(name)["input"][port_id]) + input_edges.add(get_node_edge_names(model, name)["input"][port_id]) assert len(input_edges) == 1 return input_edges.pop() diff --git a/nncf/onnx/graph/onnx_graph.py b/nncf/onnx/graph/onnx_graph.py deleted file mode 100644 index df754263c99..00000000000 --- a/nncf/onnx/graph/onnx_graph.py +++ /dev/null @@ -1,321 +0,0 @@ -# Copyright (c) 2023 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Dict, Iterator, List, Optional, Union - -import numpy as np -import onnx -from onnx import numpy_helper - - -class ONNXGraph: - """ - The class provides the interface to get the necessary information from ONNX model. - """ - - def __init__(self, onnx_model: onnx.ModelProto): - self.onnx_model = onnx_model - self._node_name_to_node = None # type: Dict[str, onnx.NodeProto] - self._edge_name_to_value_info = None # type: Dict[str, onnx.ValueInfoProto] - - def _update_edges(self) -> None: - self.onnx_model = onnx.shape_inference.infer_shapes(self.onnx_model) - value_infos = [ - *self.onnx_model.graph.value_info, - *self.onnx_model.graph.input, - *self.onnx_model.graph.output, - *self.onnx_model.graph.initializer, - ] - self._edge_name_to_value_info = {tensor.name: tensor for tensor in value_infos} - - def _update_node_names(self) -> None: - self._node_name_to_node = {n.name: n for n in self.onnx_model.graph.node} - - def _get_all_tensors(self) -> Iterator[onnx.TensorProto]: - """ - Iterate over all tensors of ONNX model. - - :yield: tensors of ONNX model. - """ - for initializer in self.onnx_model.graph.initializer: - yield initializer - for node in self.onnx_model.graph.node: - for attribute in node.attribute: - if attribute.HasField("t"): - yield attribute.t - yield from attribute.tensors - - def get_all_nodes(self) -> List[onnx.NodeProto]: - """ - Returns model nodes in the original order. - - :return: model nodes. - """ - return self.onnx_model.graph.node - - def get_node_by_name(self, node_name: str) -> Optional[onnx.NodeProto]: - """ - Returns a model node with the name equals to 'node_name' from self._node_name_to_node. - If the self._node_name_to_node is None, fills it with the nodes from the self.onnx_model. - If there is no node with such name returns None. - - :param node_name: Name of the node. - :return: None if the node with the specified name exists - otherwise returns the node. - """ - if self._node_name_to_node is None: - self._update_node_names() - return self._node_name_to_node[node_name] if node_name in self._node_name_to_node else None - - def get_edge(self, edge_name: str) -> Optional[onnx.ValueInfoProto]: - """ - Returns edge by its name or None if the model has no such edge. - If self._edge_name_to_value_info is not initialized runs an initialization. - - :param edge_name: Name of edge. - :return: Edge. - """ - if self._edge_name_to_value_info is None: - self._update_edges() - return self._edge_name_to_value_info.get(edge_name, None) - - def get_model_inputs(self) -> List[onnx.ValueInfoProto]: - """ - Returns all model inputs. - - :return: Model Inputs. - """ - inputs = [] - input_all = [node.name for node in self.onnx_model.graph.input] - input_initializer = [node.name for node in self.onnx_model.graph.initializer] - net_feed_input = list(set(input_all) - set(input_initializer)) - for node in self.onnx_model.graph.input: - if node.name in net_feed_input: - inputs.append(node) - return inputs - - def get_model_outputs(self) -> List[onnx.ValueInfoProto]: - """ - Returns all model outputs. - - :return: Model Outputs. - """ - return list(self.onnx_model.graph.output) - - def get_node_by_output(self, output_name: str) -> Optional[onnx.NodeProto]: - """ - Returns node that have output edge with the name 'output_name'. - - :param output_name: The name of output edge. - :return: Node with corresponding output. - """ - for node in self.get_all_nodes(): - if output_name in node.output: - return node - return None - - def get_nodes_by_input(self, input_name: str) -> List[onnx.NodeProto]: - """ - Returns all nodes that have input with the name 'input_name'. - - :param input_name: The name of input edge. - :return: Nodes with corresponding input. - """ - output = [] - for node in self.get_all_nodes(): - if input_name in node.input: - output.append(node) - return output - - def get_node_edge_names(self, node_name: str) -> Dict[str, List[str]]: - """ - Returns node edge names. - - :param node_name: The name of the node. - :return: Dict with two keys: 'input' and 'output', - which are corresponding to input and output edges accordingly. - """ - if self._node_name_to_node is None: - self._update_node_names() - if node_name in self._node_name_to_node: - return { - "input": list(self._node_name_to_node[node_name].input), - "output": list(self._node_name_to_node[node_name].output), - } - raise RuntimeError("There is no node with the name {}".format(node_name)) - - @staticmethod - def get_input_port_id_for_node_after_input(input_name: str, to_node: onnx.NodeProto) -> int: - """ - Returns input_port_id for 'to_node' connected with the model input with the name 'input_name'. - - :param input_name: Name of the ONNX model Input. - :param to_node: Node, which has input edge with 'input_name' name. - :return: input port number for 'to_node', which is connected to 'input_name'. - """ - for input_port_id, port in enumerate(to_node.input): - if port == input_name: - return input_port_id - raise RuntimeError(f"The node {to_node} does not have input edge with the name {input_name}") - - @staticmethod - def get_output_port_id_for_node_before_output(output_name: str, from_node: onnx.NodeProto) -> int: - """ - Returns output_port_id for 'from_node' connected with the model output with the name 'output_name'. - - :param output_name: Name of the ONNX model Output. - :param from_node: Node, which has output edge with 'output_name' name. - :return: output port number for 'from_node', which is connected to 'output_name'. - """ - for output_port_id, port in enumerate(from_node.output): - if port == output_name: - return output_port_id - raise RuntimeError(f"The node {from_node} does not have output edge with the name {output_name}") - - @staticmethod - def get_port_ids_between_nodes(from_node: onnx.NodeProto, to_node: onnx.NodeProto) -> Dict[str, int]: - """ - Returns input_port_id and output_port_id between 'from_node' and 'to_node'. - - :param from_node: Node, whose output is connected to 'to_node' node. - :param to_node: Node, whose input is connected to 'from_node' node. - :return: Dict{'input_port_id': input port id, 'output_port_id': output port id} - """ - output = {"input_port_id": None, "output_port_id": None} - for port_id, port in enumerate(to_node.input): - if port in from_node.output: - output["input_port_id"] = port_id - for port_id, port in enumerate(from_node.output): - if port in to_node.input: - output["output_port_id"] = port_id - if output["output_port_id"] is None or output["input_port_id"] is None: - raise RuntimeError(f"The nodes {from_node.name} and {to_node.name} do not have edges between.") - return output - - def get_node_index(self, node_name: str) -> int: - """ - Returns the node index in the model. - - :param node_name: Name of the node. - :return: Node index, -1 if there is no such node. - """ - for i, node in enumerate(self.get_all_nodes()): - if node.name == node_name: - return i - return -1 - - def has_tensor(self, tensor_name: str) -> bool: - """ - Returns True whether the model has the tensor with the name equals to tensor_name. - - :param tensor_name: Name of the tensor. - :return: True if the model has such tensor, False - otherwise. - """ - for tensor in self._get_all_tensors(): - if tensor.name == tensor_name: - return True - return False - - def get_tensor_value(self, tensor_name: str) -> np.ndarray: - """ - Returns tensor value of a tensor with the name 'tensor_name'. - - :param tensor_name: Name of the tensor. - :return: The value of the tensor. - """ - tensor = self.get_tensor(tensor_name) - return numpy_helper.to_array(tensor) - - def get_tensor(self, tensor_name: str) -> onnx.TensorProto: - """ - Returns a tensor with the name 'tensor_name'. - - :param initializer_name: Name of the Initializer. - :return: The Initializer. - """ - for tensor in self._get_all_tensors(): - if tensor.name == tensor_name: - return tensor - raise RuntimeError("There is no tensor with the name {}".format(tensor_name)) - - @staticmethod - def get_edge_shape(edge: Union[onnx.ValueInfoProto, onnx.TensorProto]) -> List[int]: - """ - Returns edge shape. - - :param edge: The edge. - :return: Shape of the Tensor. - """ - if isinstance(edge, onnx.TensorProto): - return list(edge.dims) - tensor_type = edge.type.tensor_type - shape = [] - if tensor_type.HasField("shape"): - for d in tensor_type.shape.dim: - if d.HasField("dim_value"): - dim_value = d.dim_value - if isinstance(dim_value, int): - shape.append(dim_value) - else: - return shape - elif d.HasField("dim_param"): - # flexible shape make manually -1 - shape.append(-1) - else: - return shape - return shape - - @staticmethod - def get_edge_dtype(edge: Union[onnx.ValueInfoProto, onnx.TensorProto]) -> int: - """ - Returns the data type of the edge. - - :param edge: The edge. - :return: Data type of the edge. - """ - if isinstance(edge, onnx.ValueInfoProto): - return edge.type.tensor_type.elem_type - return edge.data_type - - def get_parent(self, node: onnx.NodeProto, port_id: int) -> Optional[onnx.NodeProto]: - """ - Returns parents of the node. If there is no parent node, returns None. - - :param node: The child node. - :param port_id: Input port id on which the parent is seeked. - :return: Parent node. - """ - if port_id < len(node.input): - return self.get_node_by_output(node.input[port_id]) - return None - - def get_children(self, node: onnx.NodeProto) -> List[onnx.NodeProto]: - """ - Returns children of the node. - - :param node: The parent node. - :return: All children nodes. - """ - output = [] - node_edges = self.get_node_edge_names(node.name)["output"] - for node_edge in node_edges: - output.extend(self.get_nodes_by_input(node_edge)) - return output - - def is_node_has_shared_weight(self, node: onnx.NodeProto, weight_port_id: int) -> bool: - """ - Returns whether the node share a weight. - - :param node: Node. - :return: True whether node shares a weight - otherwise False. - """ - weight_tensor_edge = self.get_node_edge_names(node.name)["input"][weight_port_id] - nodes = self.get_nodes_by_input(weight_tensor_edge) - return len(nodes) > 1 diff --git a/nncf/onnx/graph/onnx_helper.py b/nncf/onnx/graph/onnx_helper.py new file mode 100644 index 00000000000..47f46765898 --- /dev/null +++ b/nncf/onnx/graph/onnx_helper.py @@ -0,0 +1,327 @@ +# Copyright (c) 2023 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, Iterator, List, Optional, Union + +import numpy as np +import onnx +from onnx import numpy_helper + + +def get_all_nodes(model: onnx.ModelProto) -> List[onnx.NodeProto]: + """ + Returns model nodes in the original order. + + :return: model nodes. + """ + return model.graph.node + + +def get_node_by_name(model: onnx.ModelProto, node_name: str) -> Optional[onnx.NodeProto]: + """ + Returns a model node with the name equals to 'node_name' from self._node_name_to_node. + If the self._node_name_to_node is None, fills it with the nodes from the self.onnx_model. + If there is no node with such name returns None. + + :param node_name: Name of the node. + :return: None if the node with the specified name exists - otherwise returns the node. + """ + for node in get_all_nodes(model): + if node.name == node_name: + return node + return None + + +def get_edge(model: onnx.ModelProto, edge_name: str) -> Optional[onnx.ValueInfoProto]: + """ + Returns edge by its name or None if the model has no such edge. + If self._edge_name_to_value_info is not initialized runs an initialization. + + :param edge_name: Name of edge. + :return: Edge. + """ + + def seek_v_info(model, edge_name): + value_infos = [ + *model.graph.value_info, + *model.graph.input, + *model.graph.output, + *model.graph.initializer, + ] + for info in value_infos: + if info.name == edge_name: + return info + return None + + v_info = seek_v_info(model, edge_name) + if v_info is not None: + return v_info + print("Shape infe") + infered_model = onnx.shape_inference.infer_shapes(model) + return seek_v_info(infered_model, edge_name) + + +def get_model_inputs(model: onnx.ModelProto) -> List[onnx.ValueInfoProto]: + """ + Returns all model inputs. + + :return: Model Inputs. + """ + inputs = [] + input_all = [node.name for node in model.graph.input] + input_initializer = [node.name for node in model.graph.initializer] + net_feed_input = list(set(input_all) - set(input_initializer)) + for node in model.graph.input: + if node.name in net_feed_input: + inputs.append(node) + return inputs + + +def get_model_outputs(model: onnx.ModelProto) -> List[onnx.ValueInfoProto]: + """ + Returns all model outputs. + + :return: Model Outputs. + """ + return list(model.graph.output) + + +def get_node_by_output(model: onnx.ModelProto, output_name: str) -> Optional[onnx.NodeProto]: + """ + Returns node that have output edge with the name 'output_name'. + + :param output_name: The name of output edge. + :return: Node with corresponding output. + """ + for node in get_all_nodes(model): + if output_name in node.output: + return node + return None + + +def get_nodes_by_input(model: onnx.ModelProto, input_name: str) -> List[onnx.NodeProto]: + """ + Returns all nodes that have input with the name 'input_name'. + + :param input_name: The name of input edge. + :return: Nodes with corresponding input. + """ + output = [] + for node in get_all_nodes(model): + if input_name in node.input: + output.append(node) + return output + + +def get_node_edge_names(model: onnx.ModelProto, node_name: str) -> Dict[str, List[str]]: + """ + Returns node edge names. + + :param node_name: The name of the node. + :return: Dict with two keys: 'input' and 'output', + which are corresponding to input and output edges accordingly. + """ + node = get_node_by_name(model, node_name) + if node: + return { + "input": list(node.input), + "output": list(node.output), + } + raise RuntimeError("There is no node with the name {}".format(node_name)) + + +def get_input_port_id_for_node_after_input(input_name: str, to_node: onnx.NodeProto) -> int: + """ + Returns input_port_id for 'to_node' connected with the model input with the name 'input_name'. + + :param input_name: Name of the ONNX model Input. + :param to_node: Node, which has input edge with 'input_name' name. + :return: input port number for 'to_node', which is connected to 'input_name'. + """ + for input_port_id, port in enumerate(to_node.input): + if port == input_name: + return input_port_id + raise RuntimeError(f"The node {to_node} does not have input edge with the name {input_name}") + + +def get_output_port_id_for_node_before_output(output_name: str, from_node: onnx.NodeProto) -> int: + """ + Returns output_port_id for 'from_node' connected with the model output with the name 'output_name'. + + :param output_name: Name of the ONNX model Output. + :param from_node: Node, which has output edge with 'output_name' name. + :return: output port number for 'from_node', which is connected to 'output_name'. + """ + for output_port_id, port in enumerate(from_node.output): + if port == output_name: + return output_port_id + raise RuntimeError(f"The node {from_node} does not have output edge with the name {output_name}") + + +def get_port_ids_between_nodes(from_node: onnx.NodeProto, to_node: onnx.NodeProto) -> Dict[str, int]: + """ + Returns input_port_id and output_port_id between 'from_node' and 'to_node'. + + :param from_node: Node, whose output is connected to 'to_node' node. + :param to_node: Node, whose input is connected to 'from_node' node. + :return: Dict{'input_port_id': input port id, 'output_port_id': output port id} + """ + output = {"input_port_id": None, "output_port_id": None} + for port_id, port in enumerate(to_node.input): + if port in from_node.output: + output["input_port_id"] = port_id + for port_id, port in enumerate(from_node.output): + if port in to_node.input: + output["output_port_id"] = port_id + if output["output_port_id"] is None or output["input_port_id"] is None: + raise RuntimeError(f"The nodes {from_node.name} and {to_node.name} do not have edges between.") + return output + + +def get_node_index(model: onnx.ModelProto, node_name: str) -> Optional[int]: + """ + Returns the node index in the model. + + :param node_name: Name of the node. + :return: Node index, -1 if there is no such node. + """ + for i, node in enumerate(get_all_nodes(model)): + if node.name == node_name: + return i + return None + + +def _get_all_tensors(model: onnx.ModelProto) -> Iterator[onnx.TensorProto]: + """ + Iterate over all tensors of ONNX model. + + :yield: tensors of ONNX model. + """ + for initializer in model.graph.initializer: + yield initializer + for node in model.graph.node: + for attribute in node.attribute: + if attribute.HasField("t"): + yield attribute.t + yield from attribute.tensors + + +def has_tensor(model: onnx.ModelProto, tensor_name: str) -> bool: + """ + Returns True whether the model has the tensor with the name equals to tensor_name. + + :param tensor_name: Name of the tensor. + :return: True if the model has such tensor, False - otherwise. + """ + for tensor in _get_all_tensors(model): + if tensor.name == tensor_name: + return True + return False + + +def get_tensor(model: onnx.ModelProto, tensor_name: str) -> onnx.TensorProto: + """ + Returns a tensor with the name 'tensor_name'. + + :param initializer_name: Name of the Initializer. + :return: The Initializer. + """ + for tensor in _get_all_tensors(model): + if tensor.name == tensor_name: + return tensor + raise RuntimeError("There is no tensor with the name {}".format(tensor_name)) + + +def get_tensor_value(model: onnx.ModelProto, tensor_name: str) -> np.ndarray: + """ + Returns tensor value of a tensor with the name 'tensor_name'. + + :param tensor_name: Name of the tensor. + :return: The value of the tensor. + """ + return numpy_helper.to_array(get_tensor(model, tensor_name)) + + +def get_edge_shape(edge: Union[onnx.ValueInfoProto, onnx.TensorProto]) -> List[int]: + """ + Returns edge shape. + + :param edge: The edge. + :return: Shape of the Tensor. + """ + if isinstance(edge, onnx.TensorProto): + return list(edge.dims) + tensor_type = edge.type.tensor_type + shape = [] + if tensor_type.HasField("shape"): + for d in tensor_type.shape.dim: + if d.HasField("dim_value"): + dim_value = d.dim_value + if isinstance(dim_value, int): + shape.append(dim_value) + else: + return shape + elif d.HasField("dim_param"): + # flexible shape make manually -1 + shape.append(-1) + else: + return shape + return shape + + +def get_edge_dtype(edge: Union[onnx.ValueInfoProto, onnx.TensorProto]) -> int: + """ + Returns the data type of the edge. + + :param edge: The edge. + :return: Data type of the edge. + """ + if isinstance(edge, onnx.ValueInfoProto): + return edge.type.tensor_type.elem_type + return edge.data_type + + +def get_parent(model: onnx.ModelProto, node: onnx.NodeProto, port_id: int) -> Optional[onnx.NodeProto]: + """ + Returns parents of the node. If there is no parent node, returns None. + + :param node: The child node. + :param port_id: Input port id on which the parent is seeked. + :return: Parent node. + """ + if port_id < len(node.input): + return get_node_by_output(model, node.input[port_id]) + return None + + +def get_children(model: onnx.ModelProto, node: onnx.NodeProto) -> List[onnx.NodeProto]: + """ + Returns children of the node. + + :param node: The parent node. + :return: All children nodes. + """ + output = [] + node_edges = get_node_edge_names(model, node.name)["output"] + for node_edge in node_edges: + output.extend(get_nodes_by_input(model, node_edge)) + return output + + +def is_node_has_shared_weight(model: onnx.ModelProto, node: onnx.NodeProto, weight_port_id: int) -> bool: + """ + Returns whether the node share a weight. + + :param node: Node. + :return: True whether node shares a weight - otherwise False. + """ + weight_tensor_edge = get_node_edge_names(model, node.name)["input"][weight_port_id] + nodes = get_nodes_by_input(model, weight_tensor_edge) + return len(nodes) > 1 diff --git a/nncf/onnx/statistics/aggregator.py b/nncf/onnx/statistics/aggregator.py index e3435382b5d..8fcf0543fed 100644 --- a/nncf/onnx/statistics/aggregator.py +++ b/nncf/onnx/statistics/aggregator.py @@ -22,7 +22,7 @@ from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.onnx.graph.node_utils import get_input_edge from nncf.onnx.graph.node_utils import get_input_edges_mapping -from nncf.onnx.graph.onnx_graph import ONNXGraph +from nncf.onnx.graph.onnx_helper import get_node_edge_names from nncf.onnx.graph.transformations.commands import ONNXOutputInsertionCommand from nncf.onnx.tensor import ONNXNNCFTensor @@ -30,7 +30,7 @@ class ONNXStatisticsAggregator(StatisticsAggregator): def collect_statistics(self, model: onnx.ModelProto, graph: NNCFGraph) -> None: self.input_edges_mapping = get_input_edges_mapping(graph) - self._onnx_graph = ONNXGraph(model) + self._model = model self._registered_weights = set() super().collect_statistics(model, graph) @@ -42,15 +42,13 @@ def _register_statistics( target_point = statistic_point.target_point port_id = target_point.port_id if target_point.target_node_name in self.input_edges_mapping: # Input case - edge_name = get_input_edge( - target_point.target_node_name, self.input_edges_mapping, self._onnx_graph - ) + edge_name = get_input_edge(target_point.target_node_name, self.input_edges_mapping, self._model) statistic_point.register_tensor(outputs[edge_name]) elif target_point.type == TargetType.POST_LAYER_OPERATION: - edge_name = self._onnx_graph.get_node_edge_names(node_name)["output"][port_id] + edge_name = get_node_edge_names(self._model, node_name)["output"][port_id] statistic_point.register_tensor(outputs[edge_name]) elif target_point.type in [TargetType.PRE_LAYER_OPERATION, TargetType.OPERATION_WITH_WEIGHTS]: - edge_name = self._onnx_graph.get_node_edge_names(node_name)["input"][port_id] + edge_name = get_node_edge_names(self._model, node_name)["input"][port_id] statistic_point.register_tensor(outputs[edge_name]) def _get_transformation_layout_extra_outputs( diff --git a/nncf/quantization/algorithms/bias_correction/onnx_backend.py b/nncf/quantization/algorithms/bias_correction/onnx_backend.py index 0e9ad720a10..49a80a6bd4a 100644 --- a/nncf/quantization/algorithms/bias_correction/onnx_backend.py +++ b/nncf/quantization/algorithms/bias_correction/onnx_backend.py @@ -23,7 +23,7 @@ from nncf.onnx.graph.node_utils import get_bias_value from nncf.onnx.graph.node_utils import is_any_weight_quantized from nncf.onnx.graph.node_utils import is_node_with_bias -from nncf.onnx.graph.onnx_graph import ONNXGraph +from nncf.onnx.graph.onnx_helper import get_node_by_name from nncf.onnx.graph.transformations.command_creation import create_bias_correction_command from nncf.onnx.graph.transformations.commands import ONNXBiasCorrectionCommand from nncf.onnx.graph.transformations.commands import ONNXModelExtractionCommand @@ -102,14 +102,12 @@ def get_bias_value(node: NNCFNode, model: onnx.ModelProto, nncf_graph: NNCFGraph @staticmethod def get_input_name(model: onnx.ModelProto, node_name: str) -> str: - onnx_graph = ONNXGraph(model) - node = onnx_graph.get_node_by_name(node_name) + node = get_node_by_name(model, node_name) return node.input[0] @staticmethod def get_output_name(model: onnx.ModelProto, node_name: str, output_id: int) -> List[str]: - onnx_graph = ONNXGraph(model) - node = onnx_graph.get_node_by_name(node_name) + node = get_node_by_name(model, node_name) return node.output[output_id] @staticmethod diff --git a/tests/onnx/quantization/common.py b/tests/onnx/quantization/common.py index 1d3464882fe..e2c58968dfa 100644 --- a/tests/onnx/quantization/common.py +++ b/tests/onnx/quantization/common.py @@ -17,7 +17,9 @@ from nncf import Dataset from nncf.onnx.graph.nncf_graph_builder import GraphConverter -from nncf.onnx.graph.onnx_graph import ONNXGraph +from nncf.onnx.graph.onnx_helper import get_edge +from nncf.onnx.graph.onnx_helper import get_edge_dtype +from nncf.onnx.graph.onnx_helper import get_edge_shape from nncf.onnx.statistics.statistics import ONNXMinMaxTensorStatistic from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization @@ -53,15 +55,14 @@ def _get_input_keys(original_model: onnx.ModelProto) -> str: def get_random_dataset_for_test(model: onnx.ModelProto, has_batch_dim: bool, length: Optional[int] = 10): keys = _get_input_keys(model) - onnx_graph = ONNXGraph(model) def transform_fn(i): output = {} for key in keys: - edge = onnx_graph.get_edge(key) - input_dtype = ONNXGraph.get_edge_dtype(edge) + edge = get_edge(model, key) + input_dtype = get_edge_dtype(edge) input_np_dtype = onnx.helper.tensor_dtype_to_np_dtype(input_dtype) - shape = ONNXGraph.get_edge_shape(edge) + shape = get_edge_shape(edge) rng = get_random_generator() tensor = rng.uniform(-1, 1, shape).astype(input_np_dtype) if has_batch_dim: diff --git a/tests/onnx/quantization/test_qdq_params_calculation.py b/tests/onnx/quantization/test_qdq_params_calculation.py index bf16eb152b2..d38df7c5337 100644 --- a/tests/onnx/quantization/test_qdq_params_calculation.py +++ b/tests/onnx/quantization/test_qdq_params_calculation.py @@ -15,7 +15,8 @@ import pytest from nncf.common.quantization.structs import QuantizationPreset -from nncf.onnx.graph.onnx_graph import ONNXGraph +from nncf.onnx.graph.onnx_helper import get_all_nodes +from nncf.onnx.graph.onnx_helper import get_tensor_value from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import OverflowFix from tests.onnx.conftest import ONNX_TEST_ROOT @@ -36,11 +37,10 @@ def get_q_nodes_params(model: onnx.ModelProto) -> Dict[str, np.ndarray]: output = {} - onnx_graph = ONNXGraph(model) - for node in onnx_graph.get_all_nodes(): + for node in get_all_nodes(model): if node.op_type == "QuantizeLinear": - scale = onnx_graph.get_tensor_value(node.input[1]) - zero_point = onnx_graph.get_tensor_value(node.input[2]) + scale = get_tensor_value(model, node.input[1]) + zero_point = get_tensor_value(model, node.input[2]) output[node.name] = {"scale": scale, "zero_point": zero_point} return output diff --git a/tests/onnx/test_model_transformer.py b/tests/onnx/test_model_transformer.py index 4cf5cb4e332..f9b9540fc95 100644 --- a/tests/onnx/test_model_transformer.py +++ b/tests/onnx/test_model_transformer.py @@ -20,7 +20,10 @@ from nncf.common.graph.transformations.layout import TransformationLayout from nncf.onnx.graph.model_transformer import ONNXModelTransformer from nncf.onnx.graph.nncf_graph_builder import GraphConverter -from nncf.onnx.graph.onnx_graph import ONNXGraph +from nncf.onnx.graph.onnx_helper import get_model_outputs +from nncf.onnx.graph.onnx_helper import get_node_by_name +from nncf.onnx.graph.onnx_helper import get_tensor +from nncf.onnx.graph.onnx_helper import get_tensor_value from nncf.onnx.graph.transformations.commands import ONNXBiasCorrectionCommand from nncf.onnx.graph.transformations.commands import ONNXOutputInsertionCommand from nncf.onnx.graph.transformations.commands import ONNXQDQNodeRemovingCommand @@ -124,17 +127,15 @@ def test_inserted_quantizer_parameters(test_parameters): transformed_model = model_transformer.transform(transformation_layout) onnx.checker.check_model(transformed_model) - onnx_graph = ONNXGraph(transformed_model) - # pylint:disable=no-member for node in transformed_model.graph.node: op_type = node.op_type if op_type == "QuantizeLinear": for attr in node.attribute: assert test_parameters.onnx_attributes[attr.name] == onnx.helper.get_attribute_value(attr) - assert np.allclose(onnx_graph.get_tensor_value(node.input[1]), np.array(test_parameters.scale)) - assert np.allclose(onnx_graph.get_tensor_value(node.input[2]), np.array(test_parameters.zero_point)) - assert onnx_graph.get_tensor_value(node.input[2]).dtype == test_parameters.onnx_dtype + assert np.allclose(get_tensor_value(transformed_model, node.input[1]), np.array(test_parameters.scale)) + assert np.allclose(get_tensor_value(transformed_model, node.input[2]), np.array(test_parameters.zero_point)) + assert get_tensor_value(transformed_model, node.input[2]).dtype == test_parameters.onnx_dtype TARGET_LAYERS = [["ReLU1"], ["Conv1", "BN1"], ["Conv1", "BN1", "ReLU1"]] @@ -160,8 +161,7 @@ def test_output_insertion(target_layers, target_layer_outputs): transformed_model = model_transformer.transform(transformation_layout) - onnx_graph = ONNXGraph(transformed_model) - assert Counter([out.name for out in onnx_graph.get_model_outputs()]) == Counter(target_layer_outputs) + assert Counter([out.name for out in get_model_outputs(transformed_model)]) == Counter(target_layer_outputs) CONV_LAYERS = [["Conv1", "Conv2"]] @@ -182,11 +182,10 @@ def test_bias_correction(layers, values, refs): model_transformer = ONNXModelTransformer(model) transformed_model = model_transformer.transform(transformation_layout) - onnx_graph = ONNXGraph(transformed_model) for conv_layer, bias_reference in zip(layers, refs): - bias_tensor_name = onnx_graph.get_node_by_name(conv_layer).input[2] - bias_tensor = onnx_graph.get_tensor(bias_tensor_name) + bias_tensor_name = get_node_by_name(transformed_model, conv_layer).input[2] + bias_tensor = get_tensor(transformed_model, bias_tensor_name) bias_value = onnx.numpy_helper.to_array(bias_tensor) assert np.all(bias_value == bias_reference) diff --git a/tests/onnx/weightless_model.py b/tests/onnx/weightless_model.py index 046568df8eb..6f34347ba38 100644 --- a/tests/onnx/weightless_model.py +++ b/tests/onnx/weightless_model.py @@ -19,8 +19,6 @@ from onnx import TensorProto # pylint:disable=no-name-in-module from onnx.external_data_helper import uses_external_data -from nncf.onnx.graph.onnx_graph import ONNXGraph - # pylint: disable=no-member @@ -32,8 +30,7 @@ def load_model_topology_with_zeros_weights(model_path: Union[str, Path]) -> onnx :return: Onnx model with filled the all external tensors by random values. """ model = onnx.load_model(model_path, load_external_data=False) - onnx_graph = ONNXGraph(model) - for tensor in onnx_graph.onnx_model.graph.initializer: + for tensor in model.graph.initializer: if uses_external_data(tensor): np_dtype = onnx.helper.tensor_dtype_to_np_dtype(tensor.data_type) np_tensor = np.zeros(list(tensor.dims)).astype(np_dtype) From e576356f0ac835f9ec9cc29b71735481213e4078 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 24 Aug 2023 13:16:15 +0200 Subject: [PATCH 02/21] remove print --- nncf/onnx/graph/onnx_helper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nncf/onnx/graph/onnx_helper.py b/nncf/onnx/graph/onnx_helper.py index 47f46765898..1a635aec189 100644 --- a/nncf/onnx/graph/onnx_helper.py +++ b/nncf/onnx/graph/onnx_helper.py @@ -63,7 +63,6 @@ def seek_v_info(model, edge_name): v_info = seek_v_info(model, edge_name) if v_info is not None: return v_info - print("Shape infe") infered_model = onnx.shape_inference.infer_shapes(model) return seek_v_info(infered_model, edge_name) From b5e9f28ce7bc276ad0ab83a5c2275c0f43f2b9b8 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 20 Sep 2023 11:19:42 +0200 Subject: [PATCH 03/21] draft --- nncf/onnx/graph/model_transformer.py | 23 ++--- nncf/onnx/graph/nncf_graph_builder.py | 6 +- nncf/onnx/graph/node_utils.py | 9 +- nncf/onnx/graph/onnx_helper.py | 119 ++++++++++---------------- nncf/onnx/statistics/aggregator.py | 21 +++-- 5 files changed, 76 insertions(+), 102 deletions(-) diff --git a/nncf/onnx/graph/model_transformer.py b/nncf/onnx/graph/model_transformer.py index 2211209100d..68fa5da75aa 100644 --- a/nncf/onnx/graph/model_transformer.py +++ b/nncf/onnx/graph/model_transformer.py @@ -19,12 +19,9 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout from nncf.onnx.graph.node_utils import get_input_edge +from nncf.onnx.graph.onnx_helper import ModelSeeker from nncf.onnx.graph.onnx_helper import get_children -from nncf.onnx.graph.onnx_helper import get_edge from nncf.onnx.graph.onnx_helper import get_edge_dtype -from nncf.onnx.graph.onnx_helper import get_model_outputs -from nncf.onnx.graph.onnx_helper import get_node_by_name -from nncf.onnx.graph.onnx_helper import get_node_edge_names from nncf.onnx.graph.onnx_helper import get_node_index from nncf.onnx.graph.onnx_helper import get_nodes_by_input from nncf.onnx.graph.onnx_helper import get_tensor @@ -50,6 +47,7 @@ class ONNXModelTransformer(ModelTransformer): def __init__(self, model: onnx.ModelProto): super().__init__(model) self.onnx_model_extractor = onnx.utils.Extractor(self._model) + self.onnx_model_seeker = ModelSeeker(self.model) def _get_target_edge( self, @@ -69,10 +67,10 @@ def _get_target_edge( :return: Target edge name. """ if transform_type in [TargetType.PRE_LAYER_OPERATION, TargetType.OPERATION_WITH_WEIGHTS]: - return get_node_edge_names(self._model, node_name)["input"][port_id] + return self.onnx_model_seeker.get_node(node_name).input[port_id] if node_name in input_edges_mapping: # ADD INPUT NODE CASE return get_input_edge(node_name, input_edges_mapping, self._model) - return get_node_edge_names(self._model, node_name)["output"][port_id] + return self.onnx_model_seeker.get_node(self._model, node_name).input[port_id] def transform(self, transformation_layout: TransformationLayout) -> onnx.ModelProto: """ @@ -129,7 +127,7 @@ def _apply_output_insertion_transformations( :param transformations: ONNXOutputInsertionCommand transformations. :return: New model with inserted outputs. """ - model_outputs = set(output.name for output in get_model_outputs(self._model)) + model_outputs = set(output.name for output in self._model.graph.output) for transformation in transformations: port_id = transformation.target_point.port_id node_name = transformation.target_point.target_node_name @@ -369,17 +367,14 @@ def _apply_model_extraction_transformation(self, transformation: ONNXModelExtrac """ input_tensor_names = [] for input_node_name in transformation.inputs: - input_onnx_node = get_node_by_name(self._model, input_node_name) + input_onnx_node = self.onnx_model_seeker.get_node(input_node_name) input_tensor_names.append(input_onnx_node.input[0]) - output_tensor_names = [] + output_tensor_names = [n.name for n in self._model.graph.output] for output_node_name in transformation.outputs: - output_onnx_node = get_node_by_name(self._model, output_node_name) + output_onnx_node = self.onnx_model_seeker.get_node(output_node_name) output_tensor_names.append(output_onnx_node.output[0]) - if not output_tensor_names: - output_tensor_names = [n.name for n in get_model_outputs(self._model)] - return self.onnx_model_extractor.extract_model(input_tensor_names, output_tensor_names) def _apply_qdq_node_removing_transformations( @@ -393,7 +388,7 @@ def _apply_qdq_node_removing_transformations( :return: Model with removed nodes. """ for transformation in transformations: - node = get_node_by_name(model, transformation.target_point.target_node_name) + node = self.onnx_model_seeker.get_node(transformation.target_point.target_node_name) node_children = get_children(model, node) for node_child in node_children: diff --git a/nncf/onnx/graph/nncf_graph_builder.py b/nncf/onnx/graph/nncf_graph_builder.py index f82604fc2f4..d9dd981972b 100644 --- a/nncf/onnx/graph/nncf_graph_builder.py +++ b/nncf/onnx/graph/nncf_graph_builder.py @@ -33,9 +33,7 @@ from nncf.onnx.graph.onnx_helper import get_edge_shape from nncf.onnx.graph.onnx_helper import get_input_port_id_for_node_after_input from nncf.onnx.graph.onnx_helper import get_model_inputs -from nncf.onnx.graph.onnx_helper import get_model_outputs from nncf.onnx.graph.onnx_helper import get_node_by_output -from nncf.onnx.graph.onnx_helper import get_node_edge_names from nncf.onnx.graph.onnx_helper import get_nodes_by_input from nncf.onnx.graph.onnx_helper import get_output_port_id_for_node_before_output from nncf.onnx.graph.onnx_helper import get_port_ids_between_nodes @@ -253,7 +251,7 @@ def _add_nncf_output_nodes(model: onnx.ModelProto, nncf_graph: NNCFGraph) -> Non :param nncf_graph: NNCFGraph, in which the new nodes will be added. :return: None. """ - for i, _output in enumerate(get_model_outputs(model)): + for i, _output in enumerate(model.graph.output): output_name = _output.name layer_attributes = ONNXLayerAttributes() output_node = nncf_graph.add_nncf_node( @@ -331,7 +329,7 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: is_shared=is_shared, ) for output_node in get_all_nodes(onnx_model): - output_edges = get_node_edge_names(onnx_model, output_node.name)["output"] + output_edges = output_node.output for output_edge in output_edges: edge = get_edge(onnx_model, output_edge) if edge is None: diff --git a/nncf/onnx/graph/node_utils.py b/nncf/onnx/graph/node_utils.py index 12a28bb9507..0e1e2ac4512 100644 --- a/nncf/onnx/graph/node_utils.py +++ b/nncf/onnx/graph/node_utils.py @@ -21,7 +21,7 @@ from nncf.common.tensor_statistics.collectors import ReductionShape from nncf.onnx.graph.metatypes import onnx_metatypes as om from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXDequantizeLinearMetatype -from nncf.onnx.graph.onnx_helper import get_node_edge_names +from nncf.onnx.graph.onnx_helper import ONNXModelSeeker from nncf.onnx.graph.onnx_helper import get_tensor_value from nncf.onnx.graph.transformations.commands import ONNXTargetPoint @@ -69,7 +69,9 @@ def get_input_edges_mapping(nncf_graph: NNCFGraph) -> Dict[str, Tuple[str, int]] def get_input_edge( - input_node_name: str, input_edges_mapping: Dict[str, Tuple[str, int]], model: onnx.ModelProto + input_node_name: str, + input_edges_mapping: Dict[str, Tuple[str, int]], + name_to_node_mapping: Dict[str, onnx.NodeProto], ) -> str: """ Returns input edge corresponding to the NNCF input node with the name input_node_name. @@ -83,7 +85,8 @@ def get_input_edge( input_edges = set() for node_info in input_edges_mapping[input_node_name]: name, port_id = node_info - input_edges.add(get_node_edge_names(model, name)["input"][port_id]) + node = name_to_node_mapping[name] + input_edges.add(node.input[port_id]) assert len(input_edges) == 1 return input_edges.pop() diff --git a/nncf/onnx/graph/onnx_helper.py b/nncf/onnx/graph/onnx_helper.py index 1a635aec189..24445baa6e7 100644 --- a/nncf/onnx/graph/onnx_helper.py +++ b/nncf/onnx/graph/onnx_helper.py @@ -15,58 +15,60 @@ from onnx import numpy_helper -def get_all_nodes(model: onnx.ModelProto) -> List[onnx.NodeProto]: - """ - Returns model nodes in the original order. - - :return: model nodes. - """ - return model.graph.node - - -def get_node_by_name(model: onnx.ModelProto, node_name: str) -> Optional[onnx.NodeProto]: +class ModelSeeker: + def __init__(self, model: onnx.ModelProto) -> None: + self.model = model + self.node_name_to_node_mapping = {node.node_name: node for node in model.graph.node} + self._edge_name_to_value_info: Dict[str, onnx.ValueInfoProto] = {} + + def _update_edges(self) -> None: + self.model = onnx.shape_inference.infer_shapes(self.onnx_model) + value_infos = ( + *self.model.graph.value_info, + *self.model.graph.input, + *self.model.graph.output, + *self.model.graph.initializer, + ) + self._edge_name_to_value_info = {tensor.name: tensor for tensor in value_infos} + + def get_node(self, node_name: str) -> Optional[onnx.NodeProto]: + """ + Returns a model node with the name equals to 'node_name' from self._node_name_to_node. + If the self._node_name_to_node is None, fills it with the nodes from the self.onnx_model. + If there is no node with such name returns None. + + :param node_name: Name of the node. + :return: None if the node with the specified name exists - otherwise returns the node. + """ + return self.node_name_to_node_mapping.get(node_name) + + def get_edge(self, edge_name: str) -> Optional[onnx.ValueInfoProto]: + """ + Returns edge by its name or None if the model has no such edge. + If self._edge_name_to_value_info is not initialized runs an initialization. + + :param edge_name: Name of edge. + :return: Edge. + """ + if edge_name not in self._edge_name_to_value_info: + self._update_edges() + return self._edge_name_to_value_info.get(edge_name) + + +def get_node(model: onnx.ModelProto, node_name: str) -> Optional[onnx.NodeProto]: """ Returns a model node with the name equals to 'node_name' from self._node_name_to_node. If the self._node_name_to_node is None, fills it with the nodes from the self.onnx_model. If there is no node with such name returns None. - :param node_name: Name of the node. :return: None if the node with the specified name exists - otherwise returns the node. """ - for node in get_all_nodes(model): + for node in model.graph.node: if node.name == node_name: return node return None -def get_edge(model: onnx.ModelProto, edge_name: str) -> Optional[onnx.ValueInfoProto]: - """ - Returns edge by its name or None if the model has no such edge. - If self._edge_name_to_value_info is not initialized runs an initialization. - - :param edge_name: Name of edge. - :return: Edge. - """ - - def seek_v_info(model, edge_name): - value_infos = [ - *model.graph.value_info, - *model.graph.input, - *model.graph.output, - *model.graph.initializer, - ] - for info in value_infos: - if info.name == edge_name: - return info - return None - - v_info = seek_v_info(model, edge_name) - if v_info is not None: - return v_info - infered_model = onnx.shape_inference.infer_shapes(model) - return seek_v_info(infered_model, edge_name) - - def get_model_inputs(model: onnx.ModelProto) -> List[onnx.ValueInfoProto]: """ Returns all model inputs. @@ -83,15 +85,6 @@ def get_model_inputs(model: onnx.ModelProto) -> List[onnx.ValueInfoProto]: return inputs -def get_model_outputs(model: onnx.ModelProto) -> List[onnx.ValueInfoProto]: - """ - Returns all model outputs. - - :return: Model Outputs. - """ - return list(model.graph.output) - - def get_node_by_output(model: onnx.ModelProto, output_name: str) -> Optional[onnx.NodeProto]: """ Returns node that have output edge with the name 'output_name'. @@ -99,7 +92,7 @@ def get_node_by_output(model: onnx.ModelProto, output_name: str) -> Optional[onn :param output_name: The name of output edge. :return: Node with corresponding output. """ - for node in get_all_nodes(model): + for node in model.graph.node: if output_name in node.output: return node return None @@ -113,29 +106,12 @@ def get_nodes_by_input(model: onnx.ModelProto, input_name: str) -> List[onnx.Nod :return: Nodes with corresponding input. """ output = [] - for node in get_all_nodes(model): + for node in model.graph.node: if input_name in node.input: output.append(node) return output -def get_node_edge_names(model: onnx.ModelProto, node_name: str) -> Dict[str, List[str]]: - """ - Returns node edge names. - - :param node_name: The name of the node. - :return: Dict with two keys: 'input' and 'output', - which are corresponding to input and output edges accordingly. - """ - node = get_node_by_name(model, node_name) - if node: - return { - "input": list(node.input), - "output": list(node.output), - } - raise RuntimeError("There is no node with the name {}".format(node_name)) - - def get_input_port_id_for_node_after_input(input_name: str, to_node: onnx.NodeProto) -> int: """ Returns input_port_id for 'to_node' connected with the model input with the name 'input_name'. @@ -191,7 +167,7 @@ def get_node_index(model: onnx.ModelProto, node_name: str) -> Optional[int]: :param node_name: Name of the node. :return: Node index, -1 if there is no such node. """ - for i, node in enumerate(get_all_nodes(model)): + for i, node in enumerate(model.graph.node): if node.name == node_name: return i return None @@ -308,8 +284,7 @@ def get_children(model: onnx.ModelProto, node: onnx.NodeProto) -> List[onnx.Node :return: All children nodes. """ output = [] - node_edges = get_node_edge_names(model, node.name)["output"] - for node_edge in node_edges: + for node_edge in node.output: output.extend(get_nodes_by_input(model, node_edge)) return output @@ -321,6 +296,6 @@ def is_node_has_shared_weight(model: onnx.ModelProto, node: onnx.NodeProto, weig :param node: Node. :return: True whether node shares a weight - otherwise False. """ - weight_tensor_edge = get_node_edge_names(model, node.name)["input"][weight_port_id] + weight_tensor_edge = node.input[weight_port_id] nodes = get_nodes_by_input(model, weight_tensor_edge) return len(nodes) > 1 diff --git a/nncf/onnx/statistics/aggregator.py b/nncf/onnx/statistics/aggregator.py index 8fcf0543fed..dd4f2c6bd12 100644 --- a/nncf/onnx/statistics/aggregator.py +++ b/nncf/onnx/statistics/aggregator.py @@ -22,7 +22,7 @@ from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.onnx.graph.node_utils import get_input_edge from nncf.onnx.graph.node_utils import get_input_edges_mapping -from nncf.onnx.graph.onnx_helper import get_node_edge_names +from nncf.onnx.graph.onnx_helper import ModelSeeker from nncf.onnx.graph.transformations.commands import ONNXOutputInsertionCommand from nncf.onnx.tensor import ONNXNNCFTensor @@ -30,26 +30,29 @@ class ONNXStatisticsAggregator(StatisticsAggregator): def collect_statistics(self, model: onnx.ModelProto, graph: NNCFGraph) -> None: self.input_edges_mapping = get_input_edges_mapping(graph) - self._model = model + self.model_seeker = ModelSeeker(model) self._registered_weights = set() super().collect_statistics(model, graph) def _register_statistics( self, outputs: Dict[str, ONNXNNCFTensor], statistic_points: StatisticPointsContainer ) -> None: - for node_name, _statistic_points in statistic_points.items(): + for _statistic_points in statistic_points.values(): for statistic_point in _statistic_points: target_point = statistic_point.target_point port_id = target_point.port_id + node = self.model_seeker.get_node(target_point.target_node_name) if target_point.target_node_name in self.input_edges_mapping: # Input case - edge_name = get_input_edge(target_point.target_node_name, self.input_edges_mapping, self._model) - statistic_point.register_tensor(outputs[edge_name]) + edge_name = get_input_edge( + target_point.target_node_name, + self.input_edges_mapping, + self.model_seeker.node_name_to_node_mapping, + ) elif target_point.type == TargetType.POST_LAYER_OPERATION: - edge_name = get_node_edge_names(self._model, node_name)["output"][port_id] - statistic_point.register_tensor(outputs[edge_name]) + edge_name = node.output[port_id] elif target_point.type in [TargetType.PRE_LAYER_OPERATION, TargetType.OPERATION_WITH_WEIGHTS]: - edge_name = get_node_edge_names(self._model, node_name)["input"][port_id] - statistic_point.register_tensor(outputs[edge_name]) + edge_name = node.input[port_id] + statistic_point.register_tensor(outputs[edge_name]) def _get_transformation_layout_extra_outputs( self, statistic_points: StatisticPointsContainer From ba038e9d054651643759e5aecd60fe2469e68e93 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 20 Sep 2023 11:20:11 +0200 Subject: [PATCH 04/21] draft x2 --- tests/onnx/test_model_transformer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/onnx/test_model_transformer.py b/tests/onnx/test_model_transformer.py index f9b9540fc95..12204bc48c8 100644 --- a/tests/onnx/test_model_transformer.py +++ b/tests/onnx/test_model_transformer.py @@ -20,7 +20,6 @@ from nncf.common.graph.transformations.layout import TransformationLayout from nncf.onnx.graph.model_transformer import ONNXModelTransformer from nncf.onnx.graph.nncf_graph_builder import GraphConverter -from nncf.onnx.graph.onnx_helper import get_model_outputs from nncf.onnx.graph.onnx_helper import get_node_by_name from nncf.onnx.graph.onnx_helper import get_tensor from nncf.onnx.graph.onnx_helper import get_tensor_value @@ -161,7 +160,7 @@ def test_output_insertion(target_layers, target_layer_outputs): transformed_model = model_transformer.transform(transformation_layout) - assert Counter([out.name for out in get_model_outputs(transformed_model)]) == Counter(target_layer_outputs) + assert Counter([out.name for out in transformed_model.graph.output]) == Counter(target_layer_outputs) CONV_LAYERS = [["Conv1", "Conv2"]] From 4e5262b92ef0bcc37ea32032235b9d560201e855 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 2 Oct 2023 14:35:40 +0200 Subject: [PATCH 05/21] add mappings for node and edge seeking --- nncf/onnx/graph/model_transformer.py | 42 ++++++++----- nncf/onnx/graph/nncf_graph_builder.py | 47 +++++--------- nncf/onnx/graph/node_utils.py | 5 +- nncf/onnx/graph/onnx_helper.py | 62 +++---------------- nncf/onnx/statistics/aggregator.py | 9 +-- .../bias_correction/onnx_backend.py | 9 ++- tests/onnx/quantization/common.py | 5 +- .../test_qdq_params_calculation.py | 3 +- tests/onnx/test_model_transformer.py | 6 +- 9 files changed, 71 insertions(+), 117 deletions(-) diff --git a/nncf/onnx/graph/model_transformer.py b/nncf/onnx/graph/model_transformer.py index 68fa5da75aa..7471fbecbd8 100644 --- a/nncf/onnx/graph/model_transformer.py +++ b/nncf/onnx/graph/model_transformer.py @@ -19,10 +19,11 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout from nncf.onnx.graph.node_utils import get_input_edge -from nncf.onnx.graph.onnx_helper import ModelSeeker from nncf.onnx.graph.onnx_helper import get_children from nncf.onnx.graph.onnx_helper import get_edge_dtype +from nncf.onnx.graph.onnx_helper import get_edge_mapping from nncf.onnx.graph.onnx_helper import get_node_index +from nncf.onnx.graph.onnx_helper import get_node_mapping from nncf.onnx.graph.onnx_helper import get_nodes_by_input from nncf.onnx.graph.onnx_helper import get_tensor from nncf.onnx.graph.transformations.commands import ONNXBiasCorrectionCommand @@ -45,15 +46,16 @@ class ONNXModelTransformer(ModelTransformer): ZERO_POINT_NAME_PREFIX = "zero_point_" def __init__(self, model: onnx.ModelProto): - super().__init__(model) + infered_model = onnx.shape_inference.infer_shapes(model) + super().__init__(infered_model) self.onnx_model_extractor = onnx.utils.Extractor(self._model) - self.onnx_model_seeker = ModelSeeker(self.model) def _get_target_edge( self, port_id: int, node_name: str, transform_type: TargetType, + node_mapping, input_edges_mapping: Dict[str, str], ) -> str: """ @@ -67,10 +69,10 @@ def _get_target_edge( :return: Target edge name. """ if transform_type in [TargetType.PRE_LAYER_OPERATION, TargetType.OPERATION_WITH_WEIGHTS]: - return self.onnx_model_seeker.get_node(node_name).input[port_id] + return node_mapping[node_name].input[port_id] if node_name in input_edges_mapping: # ADD INPUT NODE CASE - return get_input_edge(node_name, input_edges_mapping, self._model) - return self.onnx_model_seeker.get_node(self._model, node_name).input[port_id] + return get_input_edge(node_name, input_edges_mapping, node_mapping) + return node_mapping[node_name].output[port_id] def transform(self, transformation_layout: TransformationLayout) -> onnx.ModelProto: """ @@ -128,12 +130,15 @@ def _apply_output_insertion_transformations( :return: New model with inserted outputs. """ model_outputs = set(output.name for output in self._model.graph.output) + node_mapping = get_node_mapping(self._model) for transformation in transformations: port_id = transformation.target_point.port_id node_name = transformation.target_point.target_node_name transform_type = transformation.target_point.type input_edges_mapping = transformation.input_edges_mapping - target_edge_name = self._get_target_edge(port_id, node_name, transform_type, input_edges_mapping) + target_edge_name = self._get_target_edge( + port_id, node_name, transform_type, node_mapping, input_edges_mapping + ) model_outputs.add(target_edge_name) return ONNXModelTransformer._insert_outputs(self._model, outputs=model_outputs) @@ -148,8 +153,9 @@ def _insert_outputs(model: onnx.ModelProto, outputs: Union[List[str], Set[str]]) :return: New model with inserted outputs. """ model_outputs = [] + edge_mapping = get_edge_mapping(model) for output in outputs: - edge = get_edge(model, output) + edge = edge_mapping[output] onnx_dtype = get_edge_dtype(edge) type_proto = onnx.helper.make_tensor_type_proto(onnx_dtype, shape=None) model_outputs.append(onnx.helper.make_value_info(name=output, type_proto=type_proto)) @@ -273,7 +279,7 @@ def _get_scale_zero_point_tensors( ) return onnx_scale_tensor, onnx_zero_point_tensor - def _get_quantizer_dequantizer_edge_name(self, transformation: ONNXQuantizerInsertionCommand) -> str: + def _get_quantizer_dequantizer_edge_name(self, transformation: ONNXQuantizerInsertionCommand, node_mapping) -> str: """ Returns an edge name on which QuantizeLinear-DequantizeLinear nodes pair has to be inserted. @@ -284,7 +290,7 @@ def _get_quantizer_dequantizer_edge_name(self, transformation: ONNXQuantizerInse node_name = transformation.target_point.target_node_name transform_type = transformation.target_point.type input_edges_mapping = transformation.input_edges_mapping - target_edge_name = self._get_target_edge(port_id, node_name, transform_type, input_edges_mapping) + target_edge_name = self._get_target_edge(port_id, node_name, transform_type, node_mapping, input_edges_mapping) self._added_target_edges[target_edge_name] += 1 return target_edge_name @@ -298,7 +304,8 @@ def _insert_quantizer_dequantizer( :param transformation: QuantizeLinear-DequantizeLinear insertion transformation. :return: Updated model with inserted QuantizeLinear-DequantizeLinear pair. """ - target_edge_name = self._get_quantizer_dequantizer_edge_name(transformation) + node_mapping = get_node_mapping(model) + target_edge_name = self._get_quantizer_dequantizer_edge_name(transformation, node_mapping) quantizer, dequantizer = self._get_quantize_dequantize_nodes(transformation, target_edge_name) onnx_scale_tensor, onnx_zero_point_tensor = ONNXModelTransformer._get_scale_zero_point_tensors( transformation, quantizer, dequantizer @@ -314,7 +321,7 @@ def _insert_quantizer_dequantizer( if transformation.target_point.type == TargetType.PRE_LAYER_OPERATION: # If we need to change only target nodes input - target_node = get_node_by_name(model, transformation.target_point.target_node_name) + target_node = node_mapping[transformation.target_point.target_node_name] for i, inp in enumerate(target_node.input): if inp == target_edge_name: target_node.input[i] = dequantizer.output[0] @@ -347,10 +354,11 @@ def _apply_bias_correction_transformations( :param transformations: Bias correction transformations. :return: Copy of original model with updated biases. """ + node_mapping = get_node_mapping(model) for transformation in transformations: bias_tensor_position = transformation.target_point.port_id node_name = transformation.target_point.target_node_name - onnx_node = get_node_by_name(model, node_name) + onnx_node = node_mapping[node_name] bias_initializer_name = onnx_node.input[bias_tensor_position] bias_initializer = get_tensor(model, bias_initializer_name) @@ -366,13 +374,14 @@ def _apply_model_extraction_transformation(self, transformation: ONNXModelExtrac :return: Extracted sub-model. """ input_tensor_names = [] + node_mapping = get_node_mapping(self._model) for input_node_name in transformation.inputs: - input_onnx_node = self.onnx_model_seeker.get_node(input_node_name) + input_onnx_node = node_mapping[input_node_name] input_tensor_names.append(input_onnx_node.input[0]) output_tensor_names = [n.name for n in self._model.graph.output] for output_node_name in transformation.outputs: - output_onnx_node = self.onnx_model_seeker.get_node(output_node_name) + output_onnx_node = node_mapping[output_node_name] output_tensor_names.append(output_onnx_node.output[0]) return self.onnx_model_extractor.extract_model(input_tensor_names, output_tensor_names) @@ -388,7 +397,8 @@ def _apply_qdq_node_removing_transformations( :return: Model with removed nodes. """ for transformation in transformations: - node = self.onnx_model_seeker.get_node(transformation.target_point.target_node_name) + node_mapping = get_node_mapping(model) + node = node_mapping[transformation.target_point.target_node_name] node_children = get_children(model, node) for node_child in node_children: diff --git a/nncf/onnx/graph/nncf_graph_builder.py b/nncf/onnx/graph/nncf_graph_builder.py index 5acef675ea0..22b483418e6 100644 --- a/nncf/onnx/graph/nncf_graph_builder.py +++ b/nncf/onnx/graph/nncf_graph_builder.py @@ -29,9 +29,8 @@ from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXOpWithWeightsMetatype from nncf.onnx.graph.metatypes.onnx_metatypes import get_metatype from nncf.onnx.graph.metatypes.onnx_metatypes import get_tensor_edge_name -from nncf.onnx.graph.onnx_helper import get_all_nodes -from nncf.onnx.graph.onnx_helper import get_edge from nncf.onnx.graph.onnx_helper import get_edge_dtype +from nncf.onnx.graph.onnx_helper import get_edge_mapping from nncf.onnx.graph.onnx_helper import get_edge_shape from nncf.onnx.graph.onnx_helper import get_input_port_id_for_node_after_input from nncf.onnx.graph.onnx_helper import get_model_inputs @@ -149,23 +148,6 @@ def _is_node_with_bias(node: onnx.NodeProto, model: onnx.ModelProto) -> bool: return False -def _get_weight_attr(node: onnx.NodeProto, model: onnx.ModelProto, weight_port_id: int) -> Dict[int, Dict]: - """ - Returns weight attributes. - - :param node: ONNX node. - :param model: ONNX model. - :param weight_port_ids: Port ids with weights location. - :return: Weight attributes. - """ - weight_attrs = {} - weight_edge_name = node.input[weight_port_id] - edge = get_edge(model, weight_edge_name) - weight_shape = get_edge_shape(edge) - weight_attrs[weight_port_id] = {"name": weight_edge_name, "shape": weight_shape} - return weight_attrs - - def _get_gemm_attrs(node: onnx.NodeProto) -> Dict[str, int]: """ Returns transpose attrbiutes of GEMM node. @@ -242,7 +224,7 @@ def _replace_empty_node_name(model: onnx.ModelProto) -> onnx.ModelProto: return model @staticmethod - def _add_nncf_input_nodes(model: onnx.ModelProto, nncf_graph: NNCFGraph) -> None: + def _add_nncf_input_nodes(model: onnx.ModelProto, nncf_graph: NNCFGraph, edge_mapping) -> None: """ Adds special NNCF Input nodes to NNCFGraph. For all the ONNX model inputs, the special NNCF Input node is placed and then corresponding edges are added. @@ -262,7 +244,7 @@ def _add_nncf_input_nodes(model: onnx.ModelProto, nncf_graph: NNCFGraph) -> None to_nodes = get_nodes_by_input(model, input_name) input_node_node_id = input_node.node_id - edge = get_edge(model, input_name) + edge = edge_mapping[input_name] input_shape = get_edge_shape(edge) onnx_dtype = get_edge_dtype(edge) nncf_dtype = GraphConverter.convert_onnx_dtype_to_nncf_dtype(onnx_dtype) @@ -282,7 +264,7 @@ def _add_nncf_input_nodes(model: onnx.ModelProto, nncf_graph: NNCFGraph) -> None output_port_id += 1 @staticmethod - def _add_nncf_output_nodes(model: onnx.ModelProto, nncf_graph: NNCFGraph) -> None: + def _add_nncf_output_nodes(model: onnx.ModelProto, nncf_graph: NNCFGraph, edge_mapping) -> None: """ Adds special NNCF Output nodes to NNCFGraph. For all the ONNX model outputs, the special NNCF Output node is placed and then corresponding edges are added. @@ -302,7 +284,7 @@ def _add_nncf_output_nodes(model: onnx.ModelProto, nncf_graph: NNCFGraph) -> Non from_node = get_node_by_output(model, output_name) output_node_node_id = output_node.node_id - edge = get_edge(model, output_name) + edge = edge_mapping[output_name] output_shape = get_edge_shape(edge) onnx_dtype = get_edge_dtype(edge) nncf_dtype = GraphConverter.convert_onnx_dtype_to_nncf_dtype(onnx_dtype) @@ -341,8 +323,9 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: """ onnx_model = GraphConverter._replace_empty_node_name(onnx_model) onnx_model = onnx.shape_inference.infer_shapes(onnx_model) + edge_mapping = get_edge_mapping(onnx_model) nncf_graph = NNCFGraph() - for node in get_all_nodes(onnx_model): + for node in onnx_model.graph.node: metatype = get_metatype(onnx_model, node) weight_port_ids = _get_weight_port_ids(node, onnx_model) is_shared = None @@ -352,8 +335,11 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: if weight_port_ids: # If node has weight weight_edge_names = [] for weight_port_id in weight_port_ids: - weight_edge_names.append(node.input[weight_port_id]) - weight_attrs.update(_get_weight_attr(node, onnx_model, weight_port_id)) + weight_edge_name = node.input[weight_port_id] + weight_edge_names.append(weight_edge_name) + edge = edge_mapping[weight_edge_name] + weight_shape = get_edge_shape(edge) + weight_attrs[weight_port_id] = {"name": weight_edge_name, "shape": weight_shape} if not is_shared and is_node_has_shared_weight(onnx_model, node, weight_port_id): is_shared = True @@ -367,10 +353,11 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: layer_attributes=layer_attributes, is_shared=is_shared, ) - for output_node in get_all_nodes(onnx_model): + + for output_node in onnx_model.graph.node: output_edges = output_node.output for output_edge in output_edges: - edge = get_edge(onnx_model, output_edge) + edge = edge_mapping.get(output_edge) if edge is None: # If the edge is None it means that the edge was not added during shape inference of ONNX model. # BatchNorm exported in Training mode has unused outputs edges: mean, var, saved_mean, saved_var. @@ -394,6 +381,6 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: output_port_id=output_port_id, dtype=Dtype(nncf_dtype), ) - GraphConverter._add_nncf_input_nodes(onnx_model, nncf_graph) - GraphConverter._add_nncf_output_nodes(onnx_model, nncf_graph) + GraphConverter._add_nncf_input_nodes(onnx_model, nncf_graph, edge_mapping) + GraphConverter._add_nncf_output_nodes(onnx_model, nncf_graph, edge_mapping) return nncf_graph diff --git a/nncf/onnx/graph/node_utils.py b/nncf/onnx/graph/node_utils.py index 0e1e2ac4512..6d18a894abe 100644 --- a/nncf/onnx/graph/node_utils.py +++ b/nncf/onnx/graph/node_utils.py @@ -21,7 +21,6 @@ from nncf.common.tensor_statistics.collectors import ReductionShape from nncf.onnx.graph.metatypes import onnx_metatypes as om from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXDequantizeLinearMetatype -from nncf.onnx.graph.onnx_helper import ONNXModelSeeker from nncf.onnx.graph.onnx_helper import get_tensor_value from nncf.onnx.graph.transformations.commands import ONNXTargetPoint @@ -71,7 +70,7 @@ def get_input_edges_mapping(nncf_graph: NNCFGraph) -> Dict[str, Tuple[str, int]] def get_input_edge( input_node_name: str, input_edges_mapping: Dict[str, Tuple[str, int]], - name_to_node_mapping: Dict[str, onnx.NodeProto], + node_mapping: Dict[str, onnx.NodeProto], ) -> str: """ Returns input edge corresponding to the NNCF input node with the name input_node_name. @@ -85,7 +84,7 @@ def get_input_edge( input_edges = set() for node_info in input_edges_mapping[input_node_name]: name, port_id = node_info - node = name_to_node_mapping[name] + node = node_mapping[name] input_edges.add(node.input[port_id]) assert len(input_edges) == 1 return input_edges.pop() diff --git a/nncf/onnx/graph/onnx_helper.py b/nncf/onnx/graph/onnx_helper.py index 24445baa6e7..2ab14aa069c 100644 --- a/nncf/onnx/graph/onnx_helper.py +++ b/nncf/onnx/graph/onnx_helper.py @@ -15,58 +15,16 @@ from onnx import numpy_helper -class ModelSeeker: - def __init__(self, model: onnx.ModelProto) -> None: - self.model = model - self.node_name_to_node_mapping = {node.node_name: node for node in model.graph.node} - self._edge_name_to_value_info: Dict[str, onnx.ValueInfoProto] = {} - - def _update_edges(self) -> None: - self.model = onnx.shape_inference.infer_shapes(self.onnx_model) - value_infos = ( - *self.model.graph.value_info, - *self.model.graph.input, - *self.model.graph.output, - *self.model.graph.initializer, - ) - self._edge_name_to_value_info = {tensor.name: tensor for tensor in value_infos} - - def get_node(self, node_name: str) -> Optional[onnx.NodeProto]: - """ - Returns a model node with the name equals to 'node_name' from self._node_name_to_node. - If the self._node_name_to_node is None, fills it with the nodes from the self.onnx_model. - If there is no node with such name returns None. - - :param node_name: Name of the node. - :return: None if the node with the specified name exists - otherwise returns the node. - """ - return self.node_name_to_node_mapping.get(node_name) - - def get_edge(self, edge_name: str) -> Optional[onnx.ValueInfoProto]: - """ - Returns edge by its name or None if the model has no such edge. - If self._edge_name_to_value_info is not initialized runs an initialization. - - :param edge_name: Name of edge. - :return: Edge. - """ - if edge_name not in self._edge_name_to_value_info: - self._update_edges() - return self._edge_name_to_value_info.get(edge_name) - - -def get_node(model: onnx.ModelProto, node_name: str) -> Optional[onnx.NodeProto]: - """ - Returns a model node with the name equals to 'node_name' from self._node_name_to_node. - If the self._node_name_to_node is None, fills it with the nodes from the self.onnx_model. - If there is no node with such name returns None. - :param node_name: Name of the node. - :return: None if the node with the specified name exists - otherwise returns the node. - """ - for node in model.graph.node: - if node.name == node_name: - return node - return None +def get_node_mapping(model: onnx.ModelProto): + return {node.name: node for node in model.graph.node} + + +def get_edge_mapping(model: onnx.ModelProto) -> Dict[str, onnx.ValueInfoProto]: + """ """ + return { + tensor.name: tensor + for tensor in (*model.graph.value_info, *model.graph.input, *model.graph.output, *model.graph.initializer) + } def get_model_inputs(model: onnx.ModelProto) -> List[onnx.ValueInfoProto]: diff --git a/nncf/onnx/statistics/aggregator.py b/nncf/onnx/statistics/aggregator.py index dd4f2c6bd12..6c5c3c6aef8 100644 --- a/nncf/onnx/statistics/aggregator.py +++ b/nncf/onnx/statistics/aggregator.py @@ -22,7 +22,7 @@ from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.onnx.graph.node_utils import get_input_edge from nncf.onnx.graph.node_utils import get_input_edges_mapping -from nncf.onnx.graph.onnx_helper import ModelSeeker +from nncf.onnx.graph.onnx_helper import get_node_mapping from nncf.onnx.graph.transformations.commands import ONNXOutputInsertionCommand from nncf.onnx.tensor import ONNXNNCFTensor @@ -30,7 +30,7 @@ class ONNXStatisticsAggregator(StatisticsAggregator): def collect_statistics(self, model: onnx.ModelProto, graph: NNCFGraph) -> None: self.input_edges_mapping = get_input_edges_mapping(graph) - self.model_seeker = ModelSeeker(model) + self.node_mapping = get_node_mapping(model) self._registered_weights = set() super().collect_statistics(model, graph) @@ -41,16 +41,17 @@ def _register_statistics( for statistic_point in _statistic_points: target_point = statistic_point.target_point port_id = target_point.port_id - node = self.model_seeker.get_node(target_point.target_node_name) if target_point.target_node_name in self.input_edges_mapping: # Input case edge_name = get_input_edge( target_point.target_node_name, self.input_edges_mapping, - self.model_seeker.node_name_to_node_mapping, + self.node_mapping, ) elif target_point.type == TargetType.POST_LAYER_OPERATION: + node = self.node_mapping[target_point.target_node_name] edge_name = node.output[port_id] elif target_point.type in [TargetType.PRE_LAYER_OPERATION, TargetType.OPERATION_WITH_WEIGHTS]: + node = self.node_mapping[target_point.target_node_name] edge_name = node.input[port_id] statistic_point.register_tensor(outputs[edge_name]) diff --git a/nncf/quantization/algorithms/bias_correction/onnx_backend.py b/nncf/quantization/algorithms/bias_correction/onnx_backend.py index 49a80a6bd4a..af19fc694a9 100644 --- a/nncf/quantization/algorithms/bias_correction/onnx_backend.py +++ b/nncf/quantization/algorithms/bias_correction/onnx_backend.py @@ -23,7 +23,6 @@ from nncf.onnx.graph.node_utils import get_bias_value from nncf.onnx.graph.node_utils import is_any_weight_quantized from nncf.onnx.graph.node_utils import is_node_with_bias -from nncf.onnx.graph.onnx_helper import get_node_by_name from nncf.onnx.graph.transformations.command_creation import create_bias_correction_command from nncf.onnx.graph.transformations.commands import ONNXBiasCorrectionCommand from nncf.onnx.graph.transformations.commands import ONNXModelExtractionCommand @@ -102,13 +101,13 @@ def get_bias_value(node: NNCFNode, model: onnx.ModelProto, nncf_graph: NNCFGraph @staticmethod def get_input_name(model: onnx.ModelProto, node_name: str) -> str: - node = get_node_by_name(model, node_name) - return node.input[0] + node_dict = {node.name: node for node in model.graph.node} + return node_dict[node_name].input[0] @staticmethod def get_output_name(model: onnx.ModelProto, node_name: str, output_id: int) -> List[str]: - node = get_node_by_name(model, node_name) - return node.output[output_id] + node_dict = {node.name: node for node in model.graph.node} + return node_dict[node_name].output[output_id] @staticmethod def is_quantized_weights(node: NNCFNode, nncf_graph: NNCFGraph) -> bool: diff --git a/tests/onnx/quantization/common.py b/tests/onnx/quantization/common.py index e2c58968dfa..06bbc5ee34d 100644 --- a/tests/onnx/quantization/common.py +++ b/tests/onnx/quantization/common.py @@ -17,8 +17,8 @@ from nncf import Dataset from nncf.onnx.graph.nncf_graph_builder import GraphConverter -from nncf.onnx.graph.onnx_helper import get_edge from nncf.onnx.graph.onnx_helper import get_edge_dtype +from nncf.onnx.graph.onnx_helper import get_edge_mapping from nncf.onnx.graph.onnx_helper import get_edge_shape from nncf.onnx.statistics.statistics import ONNXMinMaxTensorStatistic from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters @@ -55,11 +55,12 @@ def _get_input_keys(original_model: onnx.ModelProto) -> str: def get_random_dataset_for_test(model: onnx.ModelProto, has_batch_dim: bool, length: Optional[int] = 10): keys = _get_input_keys(model) + edge_mapping = get_edge_mapping(model) def transform_fn(i): output = {} for key in keys: - edge = get_edge(model, key) + edge = edge_mapping[key] input_dtype = get_edge_dtype(edge) input_np_dtype = onnx.helper.tensor_dtype_to_np_dtype(input_dtype) shape = get_edge_shape(edge) diff --git a/tests/onnx/quantization/test_qdq_params_calculation.py b/tests/onnx/quantization/test_qdq_params_calculation.py index d38df7c5337..1b3367ab6fa 100644 --- a/tests/onnx/quantization/test_qdq_params_calculation.py +++ b/tests/onnx/quantization/test_qdq_params_calculation.py @@ -15,7 +15,6 @@ import pytest from nncf.common.quantization.structs import QuantizationPreset -from nncf.onnx.graph.onnx_helper import get_all_nodes from nncf.onnx.graph.onnx_helper import get_tensor_value from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import OverflowFix @@ -37,7 +36,7 @@ def get_q_nodes_params(model: onnx.ModelProto) -> Dict[str, np.ndarray]: output = {} - for node in get_all_nodes(model): + for node in model.graph.node: if node.op_type == "QuantizeLinear": scale = get_tensor_value(model, node.input[1]) zero_point = get_tensor_value(model, node.input[2]) diff --git a/tests/onnx/test_model_transformer.py b/tests/onnx/test_model_transformer.py index 12204bc48c8..da039ee2d1a 100644 --- a/tests/onnx/test_model_transformer.py +++ b/tests/onnx/test_model_transformer.py @@ -20,7 +20,6 @@ from nncf.common.graph.transformations.layout import TransformationLayout from nncf.onnx.graph.model_transformer import ONNXModelTransformer from nncf.onnx.graph.nncf_graph_builder import GraphConverter -from nncf.onnx.graph.onnx_helper import get_node_by_name from nncf.onnx.graph.onnx_helper import get_tensor from nncf.onnx.graph.onnx_helper import get_tensor_value from nncf.onnx.graph.transformations.commands import ONNXBiasCorrectionCommand @@ -62,7 +61,7 @@ def test_quantizer_insertion(target_layers, should_raise, quantizer_number): if should_raise: try: _ = model_transformer.transform(transformation_layout) - except RuntimeError: + except KeyError: return transformed_model = model_transformer.transform(transformation_layout) onnx.checker.check_model(transformed_model) @@ -181,9 +180,10 @@ def test_bias_correction(layers, values, refs): model_transformer = ONNXModelTransformer(model) transformed_model = model_transformer.transform(transformation_layout) + node_dict = {node.name: node for node in transformed_model.graph.node} for conv_layer, bias_reference in zip(layers, refs): - bias_tensor_name = get_node_by_name(transformed_model, conv_layer).input[2] + bias_tensor_name = node_dict[conv_layer].input[2] bias_tensor = get_tensor(transformed_model, bias_tensor_name) bias_value = onnx.numpy_helper.to_array(bias_tensor) assert np.all(bias_value == bias_reference) From 830818e2e1da8ea1766be8121c6402747bc3233d Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 2 Oct 2023 14:50:31 +0200 Subject: [PATCH 06/21] fix bug with transformation --- nncf/onnx/graph/model_transformer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nncf/onnx/graph/model_transformer.py b/nncf/onnx/graph/model_transformer.py index 7471fbecbd8..0a31aa89e08 100644 --- a/nncf/onnx/graph/model_transformer.py +++ b/nncf/onnx/graph/model_transformer.py @@ -379,11 +379,14 @@ def _apply_model_extraction_transformation(self, transformation: ONNXModelExtrac input_onnx_node = node_mapping[input_node_name] input_tensor_names.append(input_onnx_node.input[0]) - output_tensor_names = [n.name for n in self._model.graph.output] + output_tensor_names = [] for output_node_name in transformation.outputs: output_onnx_node = node_mapping[output_node_name] output_tensor_names.append(output_onnx_node.output[0]) + if not output_tensor_names: + output_tensor_names = [n.name for n in self._model.graph.output] + return self.onnx_model_extractor.extract_model(input_tensor_names, output_tensor_names) def _apply_qdq_node_removing_transformations( From 0f997714777f6cda19d7cd1520667eb33acdb585 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 2 Oct 2023 15:42:44 +0200 Subject: [PATCH 07/21] reuse func --- .../algorithms/bias_correction/onnx_backend.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/nncf/quantization/algorithms/bias_correction/onnx_backend.py b/nncf/quantization/algorithms/bias_correction/onnx_backend.py index af19fc694a9..6fda7851561 100644 --- a/nncf/quantization/algorithms/bias_correction/onnx_backend.py +++ b/nncf/quantization/algorithms/bias_correction/onnx_backend.py @@ -23,6 +23,7 @@ from nncf.onnx.graph.node_utils import get_bias_value from nncf.onnx.graph.node_utils import is_any_weight_quantized from nncf.onnx.graph.node_utils import is_node_with_bias +from nncf.onnx.graph.onnx_helper import get_node_mapping from nncf.onnx.graph.transformations.command_creation import create_bias_correction_command from nncf.onnx.graph.transformations.commands import ONNXBiasCorrectionCommand from nncf.onnx.graph.transformations.commands import ONNXModelExtractionCommand @@ -101,13 +102,13 @@ def get_bias_value(node: NNCFNode, model: onnx.ModelProto, nncf_graph: NNCFGraph @staticmethod def get_input_name(model: onnx.ModelProto, node_name: str) -> str: - node_dict = {node.name: node for node in model.graph.node} - return node_dict[node_name].input[0] + node_mapping = get_node_mapping(model) + return node_mapping[node_name].input[0] @staticmethod def get_output_name(model: onnx.ModelProto, node_name: str, output_id: int) -> List[str]: - node_dict = {node.name: node for node in model.graph.node} - return node_dict[node_name].output[output_id] + node_mapping = get_node_mapping(model) + return node_mapping[node_name].output[output_id] @staticmethod def is_quantized_weights(node: NNCFNode, nncf_graph: NNCFGraph) -> bool: From ef1e41105fe6292073da184d44183094b60371c0 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 2 Oct 2023 15:52:28 +0200 Subject: [PATCH 08/21] docstring --- nncf/onnx/graph/model_transformer.py | 5 ++++- nncf/onnx/graph/onnx_helper.py | 15 +++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/nncf/onnx/graph/model_transformer.py b/nncf/onnx/graph/model_transformer.py index 0a31aa89e08..b3625e3b7ef 100644 --- a/nncf/onnx/graph/model_transformer.py +++ b/nncf/onnx/graph/model_transformer.py @@ -279,11 +279,14 @@ def _get_scale_zero_point_tensors( ) return onnx_scale_tensor, onnx_zero_point_tensor - def _get_quantizer_dequantizer_edge_name(self, transformation: ONNXQuantizerInsertionCommand, node_mapping) -> str: + def _get_quantizer_dequantizer_edge_name( + self, transformation: ONNXQuantizerInsertionCommand, node_mapping: Dict[str, onnx.NodeProto] + ) -> str: """ Returns an edge name on which QuantizeLinear-DequantizeLinear nodes pair has to be inserted. :param transformation: QuantizeLinear-DequantizeLinear insertion transformation. + :param node_mapping: Mapping from a node name to the node. :return: Edge name to insert QuantizeLinear-DequantizeLinear nodes pair. """ port_id = transformation.target_point.port_id diff --git a/nncf/onnx/graph/onnx_helper.py b/nncf/onnx/graph/onnx_helper.py index 2ab14aa069c..af43a80fa7f 100644 --- a/nncf/onnx/graph/onnx_helper.py +++ b/nncf/onnx/graph/onnx_helper.py @@ -15,12 +15,23 @@ from onnx import numpy_helper -def get_node_mapping(model: onnx.ModelProto): +def get_node_mapping(model: onnx.ModelProto) -> Dict[str, onnx.NodeProto]: + """ + Retuns mapping from node name to the node. + + :param model: Model from mapping is built. + :return: Mapping. + """ return {node.name: node for node in model.graph.node} def get_edge_mapping(model: onnx.ModelProto) -> Dict[str, onnx.ValueInfoProto]: - """ """ + """ + Retuns mapping from edge name to the edge info. + + :param model: Model from mapping is built. + :return: Mapping. + """ return { tensor.name: tensor for tensor in (*model.graph.value_info, *model.graph.input, *model.graph.output, *model.graph.initializer) From 997cea324357135f2a3b501dc5e44b585ed96769 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 2 Oct 2023 16:34:26 +0200 Subject: [PATCH 09/21] docstring --- nncf/onnx/graph/node_utils.py | 4 ++-- nncf/onnx/graph/onnx_helper.py | 12 +++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/nncf/onnx/graph/node_utils.py b/nncf/onnx/graph/node_utils.py index 6d18a894abe..3413d999cf9 100644 --- a/nncf/onnx/graph/node_utils.py +++ b/nncf/onnx/graph/node_utils.py @@ -77,8 +77,8 @@ def get_input_edge( :param input_node_name: Name of NNCF input node. :param input_edges_mapping: A mapping of NNCF input node names and - a tuple with the consumed node names and their input port ids. - :param model: ONNX model. + a tuple with the consumed node names and their input port ids. + :param node_mapping: Mapping of node names to the nodes. :return: Input edge name. """ input_edges = set() diff --git a/nncf/onnx/graph/onnx_helper.py b/nncf/onnx/graph/onnx_helper.py index af43a80fa7f..3600ad4592a 100644 --- a/nncf/onnx/graph/onnx_helper.py +++ b/nncf/onnx/graph/onnx_helper.py @@ -42,6 +42,7 @@ def get_model_inputs(model: onnx.ModelProto) -> List[onnx.ValueInfoProto]: """ Returns all model inputs. + :param model: ONNX model. :return: Model Inputs. """ inputs = [] @@ -58,6 +59,7 @@ def get_node_by_output(model: onnx.ModelProto, output_name: str) -> Optional[onn """ Returns node that have output edge with the name 'output_name'. + :param model: ONNX model. :param output_name: The name of output edge. :return: Node with corresponding output. """ @@ -133,6 +135,7 @@ def get_node_index(model: onnx.ModelProto, node_name: str) -> Optional[int]: """ Returns the node index in the model. + :param model: ONNX model. :param node_name: Name of the node. :return: Node index, -1 if there is no such node. """ @@ -146,6 +149,7 @@ def _get_all_tensors(model: onnx.ModelProto) -> Iterator[onnx.TensorProto]: """ Iterate over all tensors of ONNX model. + :param model: ONNX model. :yield: tensors of ONNX model. """ for initializer in model.graph.initializer: @@ -161,6 +165,7 @@ def has_tensor(model: onnx.ModelProto, tensor_name: str) -> bool: """ Returns True whether the model has the tensor with the name equals to tensor_name. + :param model: ONNX model. :param tensor_name: Name of the tensor. :return: True if the model has such tensor, False - otherwise. """ @@ -174,7 +179,8 @@ def get_tensor(model: onnx.ModelProto, tensor_name: str) -> onnx.TensorProto: """ Returns a tensor with the name 'tensor_name'. - :param initializer_name: Name of the Initializer. + :param model: ONNX model. + :param tensor_name: Name of the tensor. :return: The Initializer. """ for tensor in _get_all_tensors(model): @@ -187,6 +193,7 @@ def get_tensor_value(model: onnx.ModelProto, tensor_name: str) -> np.ndarray: """ Returns tensor value of a tensor with the name 'tensor_name'. + :param model: ONNX model. :param tensor_name: Name of the tensor. :return: The value of the tensor. """ @@ -236,6 +243,7 @@ def get_parent(model: onnx.ModelProto, node: onnx.NodeProto, port_id: int) -> Op """ Returns parents of the node. If there is no parent node, returns None. + :param model: ONNX model. :param node: The child node. :param port_id: Input port id on which the parent is seeked. :return: Parent node. @@ -249,6 +257,7 @@ def get_children(model: onnx.ModelProto, node: onnx.NodeProto) -> List[onnx.Node """ Returns children of the node. + :param model: ONNX model. :param node: The parent node. :return: All children nodes. """ @@ -262,6 +271,7 @@ def is_node_has_shared_weight(model: onnx.ModelProto, node: onnx.NodeProto, weig """ Returns whether the node share a weight. + :param model: ONNX model. :param node: Node. :return: True whether node shares a weight - otherwise False. """ From adf54067e21be991435a3189c5658392321428d4 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 2 Oct 2023 18:55:03 +0200 Subject: [PATCH 10/21] more mappings --- nncf/onnx/graph/metatypes/onnx_metatypes.py | 12 +++-- nncf/onnx/graph/model_transformer.py | 12 +++-- nncf/onnx/graph/nncf_graph_builder.py | 39 ++++++++------ nncf/onnx/graph/onnx_helper.py | 57 +++++++++------------ 4 files changed, 61 insertions(+), 59 deletions(-) diff --git a/nncf/onnx/graph/metatypes/onnx_metatypes.py b/nncf/onnx/graph/metatypes/onnx_metatypes.py index d26ef79ecd4..f1d2fd8f6da 100644 --- a/nncf/onnx/graph/metatypes/onnx_metatypes.py +++ b/nncf/onnx/graph/metatypes/onnx_metatypes.py @@ -16,6 +16,7 @@ from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.operator_metatypes import OperatorMetatypeRegistry from nncf.common.hardware.opset import HWConfigOpName +from nncf.onnx.graph.onnx_helper import get_output_edge_node_mapping from nncf.onnx.graph.onnx_helper import get_parent from nncf.onnx.graph.onnx_helper import get_tensor from nncf.onnx.graph.onnx_helper import has_tensor @@ -644,7 +645,9 @@ def get_metatype(model: onnx.ModelProto, node: onnx.NodeProto) -> ONNXOpMetatype return metatype -def get_tensor_edge_name(model: onnx.ModelProto, node: onnx.NodeProto, port_id: int) -> Optional[str]: +def get_tensor_edge_name( + model: onnx.ModelProto, node: onnx.NodeProto, port_id: int, output_edge_node_mapping +) -> Optional[str]: """ Returns an edge name associated with a weight of a node laying on an input port_id. @@ -674,14 +677,14 @@ def get_tensor_edge_name(model: onnx.ModelProto, node: onnx.NodeProto, port_id: + ONNXDequantizeLinearMetatype.get_all_aliases() ) END_NODES = ONNXConstantMetatype.get_all_aliases() - parent = get_parent(model, node, port_id) + parent = get_parent(node, port_id, output_edge_node_mapping) if not parent: if has_tensor(model, node.input[port_id]): return node.input[port_id] elif parent.op_type in END_NODES: return node.input[port_id] elif parent.op_type in PROPAGATING_NODES: - return get_tensor_edge_name(model, parent, 0) + return get_tensor_edge_name(model, parent, 0, output_edge_node_mapping) return None @@ -731,7 +734,8 @@ def _is_embedding(model: onnx.ModelProto, node: onnx.NodeProto) -> bool: """ tensor_port_id = ONNXEmbeddingMetatype.weight_port_ids[0] allowed_types_list = ["TensorProto.FLOAT"] - weight_edge_name = get_tensor_edge_name(model, node, tensor_port_id) + output_edge_node_mapping = get_output_edge_node_mapping(model) + weight_edge_name = get_tensor_edge_name(model, node, tensor_port_id, output_edge_node_mapping) if weight_edge_name is not None: tensor_data_type = get_tensor(model, weight_edge_name).data_type diff --git a/nncf/onnx/graph/model_transformer.py b/nncf/onnx/graph/model_transformer.py index b3625e3b7ef..2cda5be9591 100644 --- a/nncf/onnx/graph/model_transformer.py +++ b/nncf/onnx/graph/model_transformer.py @@ -22,9 +22,9 @@ from nncf.onnx.graph.onnx_helper import get_children from nncf.onnx.graph.onnx_helper import get_edge_dtype from nncf.onnx.graph.onnx_helper import get_edge_mapping +from nncf.onnx.graph.onnx_helper import get_input_edge_node_mapping from nncf.onnx.graph.onnx_helper import get_node_index from nncf.onnx.graph.onnx_helper import get_node_mapping -from nncf.onnx.graph.onnx_helper import get_nodes_by_input from nncf.onnx.graph.onnx_helper import get_tensor from nncf.onnx.graph.transformations.commands import ONNXBiasCorrectionCommand from nncf.onnx.graph.transformations.commands import ONNXModelExtractionCommand @@ -198,8 +198,9 @@ def _apply_quantizer_insertion_transformations( :return: New model with inserted QuantizeLinear-DequantizeLinear nodes pairs. """ self._added_target_edges = Counter() + input_edge_node_mapping = get_input_edge_node_mapping(model) # TODO: maybe update after transformation for transformation in transformations: - model = self._insert_quantizer_dequantizer(model, transformation) + model = self._insert_quantizer_dequantizer(model, transformation, input_edge_node_mapping) return model def _get_quantize_dequantize_nodes( @@ -298,7 +299,7 @@ def _get_quantizer_dequantizer_edge_name( return target_edge_name def _insert_quantizer_dequantizer( - self, model: onnx.ModelProto, transformation: ONNXQuantizerInsertionCommand + self, model: onnx.ModelProto, transformation: ONNXQuantizerInsertionCommand, input_edge_node_mapping ) -> onnx.ModelProto: """ Inserts QuantizeLinear-DequantizeLinear nodes pair. @@ -316,7 +317,7 @@ def _insert_quantizer_dequantizer( # If several nodes on one edge input_nodes = [] - input_nodes.extend(get_nodes_by_input(model, target_edge_name)) + input_nodes.extend(input_edge_node_mapping[target_edge_name]) if not input_nodes: raise RuntimeError( f"Can not add the quantizer to the {target_edge_name} edge. This edge does not have end node." @@ -402,11 +403,12 @@ def _apply_qdq_node_removing_transformations( :param transformations: Nodes removing transformations. :return: Model with removed nodes. """ + input_edge_node_mapping = get_input_edge_node_mapping(model) for transformation in transformations: node_mapping = get_node_mapping(model) node = node_mapping[transformation.target_point.target_node_name] - node_children = get_children(model, node) + node_children = get_children(node, input_edge_node_mapping) for node_child in node_children: for input_id, input_obj in enumerate(node_child.input): if input_obj == node.output[0]: diff --git a/nncf/onnx/graph/nncf_graph_builder.py b/nncf/onnx/graph/nncf_graph_builder.py index 22b483418e6..94066583dd4 100644 --- a/nncf/onnx/graph/nncf_graph_builder.py +++ b/nncf/onnx/graph/nncf_graph_builder.py @@ -32,10 +32,10 @@ from nncf.onnx.graph.onnx_helper import get_edge_dtype from nncf.onnx.graph.onnx_helper import get_edge_mapping from nncf.onnx.graph.onnx_helper import get_edge_shape +from nncf.onnx.graph.onnx_helper import get_input_edge_node_mapping from nncf.onnx.graph.onnx_helper import get_input_port_id_for_node_after_input from nncf.onnx.graph.onnx_helper import get_model_inputs -from nncf.onnx.graph.onnx_helper import get_node_by_output -from nncf.onnx.graph.onnx_helper import get_nodes_by_input +from nncf.onnx.graph.onnx_helper import get_output_edge_node_mapping from nncf.onnx.graph.onnx_helper import get_output_port_id_for_node_before_output from nncf.onnx.graph.onnx_helper import get_port_ids_between_nodes from nncf.onnx.graph.onnx_helper import is_node_has_shared_weight @@ -112,7 +112,7 @@ def get_bias_tensor_port_id(metatype: ONNXOpWithWeightsMetatype) -> Optional[int return None -def _get_weight_port_ids(node: onnx.NodeProto, model: onnx.ModelProto) -> Set[int]: +def _get_weight_port_ids(node: onnx.NodeProto, model: onnx.ModelProto, output_edge_node_mapping) -> Set[int]: """ Returns all weight input ports. First, add constant weight port ids from metatype. @@ -128,7 +128,7 @@ def _get_weight_port_ids(node: onnx.NodeProto, model: onnx.ModelProto) -> Set[in port_ids.update(constant_port_ids) possible_port_ids = get_possible_weight_port_ids(metatype) for port_id in possible_port_ids: - if get_tensor_edge_name(model, node, port_id): + if get_tensor_edge_name(model, node, port_id, output_edge_node_mapping): port_ids.add(port_id) return port_ids @@ -177,7 +177,7 @@ def _get_node_attrs(node: onnx.NodeProto, model: onnx.ModelProto) -> Dict[str, A return {} -def _get_bias_attr(node: onnx.NodeProto, model: onnx.ModelProto) -> Dict[str, str]: +def _get_bias_attr(node: onnx.NodeProto, model: onnx.ModelProto, output_edge_node_mapping) -> Dict[str, str]: """ Returns bias tensor attributes. @@ -189,7 +189,7 @@ def _get_bias_attr(node: onnx.NodeProto, model: onnx.ModelProto) -> Dict[str, st metatype = get_metatype(model, node) if _is_node_with_bias(node, model): bias_tensor_port_id = get_bias_tensor_port_id(metatype) - bias_edge_name = get_tensor_edge_name(model, node, bias_tensor_port_id) + bias_edge_name = get_tensor_edge_name(model, node, bias_tensor_port_id, output_edge_node_mapping) bias_attrs["name"] = bias_edge_name return bias_attrs @@ -224,7 +224,9 @@ def _replace_empty_node_name(model: onnx.ModelProto) -> onnx.ModelProto: return model @staticmethod - def _add_nncf_input_nodes(model: onnx.ModelProto, nncf_graph: NNCFGraph, edge_mapping) -> None: + def _add_nncf_input_nodes( + model: onnx.ModelProto, nncf_graph: NNCFGraph, edge_mapping, input_edge_node_mapping + ) -> None: """ Adds special NNCF Input nodes to NNCFGraph. For all the ONNX model inputs, the special NNCF Input node is placed and then corresponding edges are added. @@ -241,7 +243,7 @@ def _add_nncf_input_nodes(model: onnx.ModelProto, nncf_graph: NNCFGraph, edge_ma node_metatype=InputNoopMetatype, layer_attributes=layer_attributes, ) - to_nodes = get_nodes_by_input(model, input_name) + to_nodes = input_edge_node_mapping[input_name] input_node_node_id = input_node.node_id edge = edge_mapping[input_name] @@ -264,7 +266,9 @@ def _add_nncf_input_nodes(model: onnx.ModelProto, nncf_graph: NNCFGraph, edge_ma output_port_id += 1 @staticmethod - def _add_nncf_output_nodes(model: onnx.ModelProto, nncf_graph: NNCFGraph, edge_mapping) -> None: + def _add_nncf_output_nodes( + model: onnx.ModelProto, nncf_graph: NNCFGraph, edge_mapping, output_edge_node_mapping + ) -> None: """ Adds special NNCF Output nodes to NNCFGraph. For all the ONNX model outputs, the special NNCF Output node is placed and then corresponding edges are added. @@ -281,7 +285,7 @@ def _add_nncf_output_nodes(model: onnx.ModelProto, nncf_graph: NNCFGraph, edge_m node_metatype=OutputNoopMetatype, layer_attributes=layer_attributes, ) - from_node = get_node_by_output(model, output_name) + from_node = output_edge_node_mapping[output_name] output_node_node_id = output_node.node_id edge = edge_mapping[output_name] @@ -324,14 +328,16 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: onnx_model = GraphConverter._replace_empty_node_name(onnx_model) onnx_model = onnx.shape_inference.infer_shapes(onnx_model) edge_mapping = get_edge_mapping(onnx_model) + input_edge_node_mapping = get_input_edge_node_mapping(onnx_model) + output_edge_node_mapping = get_output_edge_node_mapping(onnx_model) nncf_graph = NNCFGraph() for node in onnx_model.graph.node: metatype = get_metatype(onnx_model, node) - weight_port_ids = _get_weight_port_ids(node, onnx_model) + weight_port_ids = _get_weight_port_ids(node, onnx_model, output_edge_node_mapping) is_shared = None weight_attrs = {} node_attrs = _get_node_attrs(node, onnx_model) - bias_attrs = _get_bias_attr(node, onnx_model) + bias_attrs = _get_bias_attr(node, onnx_model, output_edge_node_mapping) if weight_port_ids: # If node has weight weight_edge_names = [] for weight_port_id in weight_port_ids: @@ -340,7 +346,7 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: edge = edge_mapping[weight_edge_name] weight_shape = get_edge_shape(edge) weight_attrs[weight_port_id] = {"name": weight_edge_name, "shape": weight_shape} - if not is_shared and is_node_has_shared_weight(onnx_model, node, weight_port_id): + if not is_shared and is_node_has_shared_weight(node, weight_port_id, input_edge_node_mapping): is_shared = True layer_attributes = ONNXLayerAttributes( @@ -367,7 +373,7 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: onnx_dtype = get_edge_dtype(edge) nncf_dtype = GraphConverter.convert_onnx_dtype_to_nncf_dtype(onnx_dtype) output_node_id = nncf_graph.get_node_by_name(output_node.name).node_id - input_nodes = get_nodes_by_input(onnx_model, output_edge) + input_nodes = input_edge_node_mapping[output_edge] for input_node in input_nodes: port_ids = get_port_ids_between_nodes(output_node, input_node) input_port_id = port_ids["input_port_id"] @@ -381,6 +387,7 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: output_port_id=output_port_id, dtype=Dtype(nncf_dtype), ) - GraphConverter._add_nncf_input_nodes(onnx_model, nncf_graph, edge_mapping) - GraphConverter._add_nncf_output_nodes(onnx_model, nncf_graph, edge_mapping) + + GraphConverter._add_nncf_input_nodes(onnx_model, nncf_graph, edge_mapping, input_edge_node_mapping) + GraphConverter._add_nncf_output_nodes(onnx_model, nncf_graph, edge_mapping, output_edge_node_mapping) return nncf_graph diff --git a/nncf/onnx/graph/onnx_helper.py b/nncf/onnx/graph/onnx_helper.py index 3600ad4592a..ce0ed7a7aa5 100644 --- a/nncf/onnx/graph/onnx_helper.py +++ b/nncf/onnx/graph/onnx_helper.py @@ -8,6 +8,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from collections import defaultdict from typing import Dict, Iterator, List, Optional, Union import numpy as np @@ -38,6 +39,22 @@ def get_edge_mapping(model: onnx.ModelProto) -> Dict[str, onnx.ValueInfoProto]: } +def get_input_edge_node_mapping(model: onnx.ModelProto) -> Dict[str, List[onnx.ValueInfoProto]]: + output = defaultdict(list) + for node in model.graph.node: + for input_edge in node.input: + output[input_edge].append(node) + return output + + +def get_output_edge_node_mapping(model: onnx.ModelProto) -> Dict[str, onnx.ValueInfoProto]: + output = defaultdict(list) + for node in model.graph.node: + for input_edge in node.output: + output[input_edge] = node + return output + + def get_model_inputs(model: onnx.ModelProto) -> List[onnx.ValueInfoProto]: """ Returns all model inputs. @@ -55,34 +72,6 @@ def get_model_inputs(model: onnx.ModelProto) -> List[onnx.ValueInfoProto]: return inputs -def get_node_by_output(model: onnx.ModelProto, output_name: str) -> Optional[onnx.NodeProto]: - """ - Returns node that have output edge with the name 'output_name'. - - :param model: ONNX model. - :param output_name: The name of output edge. - :return: Node with corresponding output. - """ - for node in model.graph.node: - if output_name in node.output: - return node - return None - - -def get_nodes_by_input(model: onnx.ModelProto, input_name: str) -> List[onnx.NodeProto]: - """ - Returns all nodes that have input with the name 'input_name'. - - :param input_name: The name of input edge. - :return: Nodes with corresponding input. - """ - output = [] - for node in model.graph.node: - if input_name in node.input: - output.append(node) - return output - - def get_input_port_id_for_node_after_input(input_name: str, to_node: onnx.NodeProto) -> int: """ Returns input_port_id for 'to_node' connected with the model input with the name 'input_name'. @@ -239,7 +228,7 @@ def get_edge_dtype(edge: Union[onnx.ValueInfoProto, onnx.TensorProto]) -> int: return edge.data_type -def get_parent(model: onnx.ModelProto, node: onnx.NodeProto, port_id: int) -> Optional[onnx.NodeProto]: +def get_parent(node: onnx.NodeProto, port_id: int, output_edge_node_mapping) -> Optional[onnx.NodeProto]: """ Returns parents of the node. If there is no parent node, returns None. @@ -249,11 +238,11 @@ def get_parent(model: onnx.ModelProto, node: onnx.NodeProto, port_id: int) -> Op :return: Parent node. """ if port_id < len(node.input): - return get_node_by_output(model, node.input[port_id]) + return output_edge_node_mapping[node.input[port_id]] return None -def get_children(model: onnx.ModelProto, node: onnx.NodeProto) -> List[onnx.NodeProto]: +def get_children(node: onnx.NodeProto, input_edge_node_mapping) -> List[onnx.NodeProto]: """ Returns children of the node. @@ -263,11 +252,11 @@ def get_children(model: onnx.ModelProto, node: onnx.NodeProto) -> List[onnx.Node """ output = [] for node_edge in node.output: - output.extend(get_nodes_by_input(model, node_edge)) + output.extend(input_edge_node_mapping[node_edge]) return output -def is_node_has_shared_weight(model: onnx.ModelProto, node: onnx.NodeProto, weight_port_id: int) -> bool: +def is_node_has_shared_weight(node: onnx.NodeProto, weight_port_id: int, input_edge_node_mapping) -> bool: """ Returns whether the node share a weight. @@ -276,5 +265,5 @@ def is_node_has_shared_weight(model: onnx.ModelProto, node: onnx.NodeProto, weig :return: True whether node shares a weight - otherwise False. """ weight_tensor_edge = node.input[weight_port_id] - nodes = get_nodes_by_input(model, weight_tensor_edge) + nodes = input_edge_node_mapping[weight_tensor_edge] return len(nodes) > 1 From 9d1b47bb504dfd7510e043d06e1799572137a0f7 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 4 Oct 2023 10:20:48 +0200 Subject: [PATCH 11/21] update graph node orders --- .../quantization/MaskRCNN-12.dot | 100 ++-- .../quantization/bertsquad-12.dot | 456 +++++++++--------- .../quantization/retinanet-9.dot | 20 +- 3 files changed, 288 insertions(+), 288 deletions(-) diff --git a/tests/onnx/data/reference_graphs/quantization/MaskRCNN-12.dot b/tests/onnx/data/reference_graphs/quantization/MaskRCNN-12.dot index 0775c236c76..e2a0350705f 100644 --- a/tests/onnx/data/reference_graphs/quantization/MaskRCNN-12.dot +++ b/tests/onnx/data/reference_graphs/quantization/MaskRCNN-12.dot @@ -481,10 +481,10 @@ strict digraph { "479 QuantizeLinear_389_1" [id=479, type=QuantizeLinear]; "480 DequantizeLinear_389_1" [id=480, type=DequantizeLinear]; "481 390" [id=481, type=Conv]; -"482 QuantizeLinear_391_2" [id=482, type=QuantizeLinear]; -"483 DequantizeLinear_391_2" [id=483, type=DequantizeLinear]; -"484 QuantizeLinear_391_1" [id=484, type=QuantizeLinear]; -"485 DequantizeLinear_391_1" [id=485, type=DequantizeLinear]; +"482 QuantizeLinear_391_1" [id=482, type=QuantizeLinear]; +"483 DequantizeLinear_391_1" [id=483, type=DequantizeLinear]; +"484 QuantizeLinear_391_2" [id=484, type=QuantizeLinear]; +"485 DequantizeLinear_391_2" [id=485, type=DequantizeLinear]; "486 487" [id=486, type=MaxPool]; "487 QuantizeLinear_489_1" [id=487, type=QuantizeLinear]; "488 DequantizeLinear_489_1" [id=488, type=DequantizeLinear]; @@ -1749,14 +1749,14 @@ strict digraph { "1747 1172" [id=1747, type=Gather]; "1748 2479" [id=1748, type=Concat]; "1749 2490" [id=1749, type=Gather]; -"1750 QuantizeLinear_2527_4" [id=1750, type=QuantizeLinear]; -"1751 DequantizeLinear_2527_4" [id=1751, type=DequantizeLinear]; -"1752 QuantizeLinear_2527_3" [id=1752, type=QuantizeLinear]; -"1753 DequantizeLinear_2527_3" [id=1753, type=DequantizeLinear]; -"1754 QuantizeLinear_2527_2" [id=1754, type=QuantizeLinear]; -"1755 DequantizeLinear_2527_2" [id=1755, type=DequantizeLinear]; -"1756 QuantizeLinear_2527_1" [id=1756, type=QuantizeLinear]; -"1757 DequantizeLinear_2527_1" [id=1757, type=DequantizeLinear]; +"1750 QuantizeLinear_2527_1" [id=1750, type=QuantizeLinear]; +"1751 DequantizeLinear_2527_1" [id=1751, type=DequantizeLinear]; +"1752 QuantizeLinear_2527_2" [id=1752, type=QuantizeLinear]; +"1753 DequantizeLinear_2527_2" [id=1753, type=DequantizeLinear]; +"1754 QuantizeLinear_2527_3" [id=1754, type=QuantizeLinear]; +"1755 DequantizeLinear_2527_3" [id=1755, type=DequantizeLinear]; +"1756 QuantizeLinear_2527_4" [id=1756, type=QuantizeLinear]; +"1757 DequantizeLinear_2527_4" [id=1757, type=DequantizeLinear]; "1758 2532" [id=1758, type=Slice]; "1759 2534" [id=1759, type=Gather]; "1760 2525" [id=1760, type=Slice]; @@ -3692,14 +3692,14 @@ strict digraph { "3690 3030" [id=3690, type=Gather]; "3691 6518" [id=3691, type=Concat]; "3692 6530" [id=3692, type=Gather]; -"3693 QuantizeLinear_6568_4" [id=3693, type=QuantizeLinear]; -"3694 DequantizeLinear_6568_4" [id=3694, type=DequantizeLinear]; -"3695 QuantizeLinear_6568_3" [id=3695, type=QuantizeLinear]; -"3696 DequantizeLinear_6568_3" [id=3696, type=DequantizeLinear]; -"3697 QuantizeLinear_6568_2" [id=3697, type=QuantizeLinear]; -"3698 DequantizeLinear_6568_2" [id=3698, type=DequantizeLinear]; -"3699 QuantizeLinear_6568_1" [id=3699, type=QuantizeLinear]; -"3700 DequantizeLinear_6568_1" [id=3700, type=DequantizeLinear]; +"3693 QuantizeLinear_6568_1" [id=3693, type=QuantizeLinear]; +"3694 DequantizeLinear_6568_1" [id=3694, type=DequantizeLinear]; +"3695 QuantizeLinear_6568_2" [id=3695, type=QuantizeLinear]; +"3696 DequantizeLinear_6568_2" [id=3696, type=DequantizeLinear]; +"3697 QuantizeLinear_6568_3" [id=3697, type=QuantizeLinear]; +"3698 DequantizeLinear_6568_3" [id=3698, type=DequantizeLinear]; +"3699 QuantizeLinear_6568_4" [id=3699, type=QuantizeLinear]; +"3700 DequantizeLinear_6568_4" [id=3700, type=DequantizeLinear]; "3701 6576" [id=3701, type=Slice]; "3702 6578" [id=3702, type=Gather]; "3703 6569" [id=3703, type=Slice]; @@ -4788,16 +4788,16 @@ strict digraph { "478 DequantizeLinear_388_1" -> "481 390" [label="[1, 256, -1, -1]", style=solid]; "479 QuantizeLinear_389_1" -> "480 DequantizeLinear_389_1" [label="[256, 256, 3, 3]", style=dashed]; "480 DequantizeLinear_389_1" -> "481 390" [label="[256, 256, 3, 3]", style=solid]; -"481 390" -> "482 QuantizeLinear_391_2" [label="[1, 256, -1, -1]", style=solid]; -"481 390" -> "484 QuantizeLinear_391_1" [label="[1, 256, -1, -1]", style=solid]; +"481 390" -> "482 QuantizeLinear_391_1" [label="[1, 256, -1, -1]", style=solid]; +"481 390" -> "484 QuantizeLinear_391_2" [label="[1, 256, -1, -1]", style=solid]; "481 390" -> "784 536" [label="[1, 256, -1, -1]", style=solid]; "481 390" -> "787 533" [label="[1, 256, -1, -1]", style=solid]; "481 390" -> "1929 2620" [label="[1, 256, -1, -1]", style=solid]; "481 390" -> "3872 6664" [label="[1, 256, -1, -1]", style=solid]; -"482 QuantizeLinear_391_2" -> "483 DequantizeLinear_391_2" [label="[1, 256, -1, -1]", style=dashed]; -"483 DequantizeLinear_391_2" -> "722 506" [label="[1, 256, -1, -1]", style=solid]; -"484 QuantizeLinear_391_1" -> "485 DequantizeLinear_391_1" [label="[1, 256, -1, -1]", style=dashed]; -"485 DequantizeLinear_391_1" -> "486 487" [label="[1, 256, -1, -1]", style=solid]; +"482 QuantizeLinear_391_1" -> "483 DequantizeLinear_391_1" [label="[1, 256, -1, -1]", style=dashed]; +"483 DequantizeLinear_391_1" -> "486 487" [label="[1, 256, -1, -1]", style=solid]; +"484 QuantizeLinear_391_2" -> "485 DequantizeLinear_391_2" [label="[1, 256, -1, -1]", style=dashed]; +"485 DequantizeLinear_391_2" -> "722 506" [label="[1, 256, -1, -1]", style=solid]; "486 487" -> "489 510" [label="[1, 256, -1, -1]", style=solid]; "486 487" -> "555 542" [label="[1, 256, -1, -1]", style=solid]; "486 487" -> "558 539" [label="[1, 256, -1, -1]", style=solid]; @@ -6341,21 +6341,21 @@ strict digraph { "1746 1171" -> "1747 1172" [label="[-1]", style=dashed]; "1747 1172" -> "1748 2479" [label="[-1, 4]", style=solid]; "1748 2479" -> "1749 2490" [label="[-1, 4]", style=solid]; -"1749 2490" -> "1750 QuantizeLinear_2527_4" [label="[]", style=solid]; -"1749 2490" -> "1752 QuantizeLinear_2527_3" [label="[]", style=solid]; -"1749 2490" -> "1754 QuantizeLinear_2527_2" [label="[]", style=solid]; -"1749 2490" -> "1756 QuantizeLinear_2527_1" [label="[]", style=solid]; +"1749 2490" -> "1750 QuantizeLinear_2527_1" [label="[]", style=solid]; +"1749 2490" -> "1752 QuantizeLinear_2527_2" [label="[]", style=solid]; +"1749 2490" -> "1754 QuantizeLinear_2527_3" [label="[]", style=solid]; +"1749 2490" -> "1756 QuantizeLinear_2527_4" [label="[]", style=solid]; "1749 2490" -> "1803 2495" [label="[]", style=solid]; "1749 2490" -> "1807 2503" [label="[]", style=solid]; "1749 2490" -> "2009 2775" [label="[]", style=solid]; -"1750 QuantizeLinear_2527_4" -> "1751 DequantizeLinear_2527_4" [label="[]", style=dashed]; -"1751 DequantizeLinear_2527_4" -> "1768 2508" [label="[]", style=solid]; -"1752 QuantizeLinear_2527_3" -> "1753 DequantizeLinear_2527_3" [label="[]", style=dashed]; -"1753 DequantizeLinear_2527_3" -> "1766 2515" [label="[]", style=solid]; -"1754 QuantizeLinear_2527_2" -> "1755 DequantizeLinear_2527_2" [label="[]", style=dashed]; -"1755 DequantizeLinear_2527_2" -> "1760 2525" [label="[]", style=solid]; -"1756 QuantizeLinear_2527_1" -> "1757 DequantizeLinear_2527_1" [label="[]", style=dashed]; -"1757 DequantizeLinear_2527_1" -> "1758 2532" [label="[]", style=solid]; +"1750 QuantizeLinear_2527_1" -> "1751 DequantizeLinear_2527_1" [label="[]", style=dashed]; +"1751 DequantizeLinear_2527_1" -> "1758 2532" [label="[]", style=solid]; +"1752 QuantizeLinear_2527_2" -> "1753 DequantizeLinear_2527_2" [label="[]", style=dashed]; +"1753 DequantizeLinear_2527_2" -> "1760 2525" [label="[]", style=solid]; +"1754 QuantizeLinear_2527_3" -> "1755 DequantizeLinear_2527_3" [label="[]", style=dashed]; +"1755 DequantizeLinear_2527_3" -> "1766 2515" [label="[]", style=solid]; +"1756 QuantizeLinear_2527_4" -> "1757 DequantizeLinear_2527_4" [label="[]", style=dashed]; +"1757 DequantizeLinear_2527_4" -> "1768 2508" [label="[]", style=solid]; "1758 2532" -> "1759 2534" [label="[]", style=solid]; "1759 2534" -> "1762 2535" [label="[]", style=solid]; "1760 2525" -> "1761 2527" [label="[]", style=solid]; @@ -8988,21 +8988,21 @@ strict digraph { "3690 3030" -> "3691 6518" [label="[]", style=solid]; "3690 3030" -> "4259 3037" [label="[]", style=solid]; "3691 6518" -> "3692 6530" [label="[]", style=solid]; -"3692 6530" -> "3693 QuantizeLinear_6568_4" [label="[-1, 4]", style=solid]; -"3692 6530" -> "3695 QuantizeLinear_6568_3" [label="[-1, 4]", style=solid]; -"3692 6530" -> "3697 QuantizeLinear_6568_2" [label="[-1, 4]", style=solid]; -"3692 6530" -> "3699 QuantizeLinear_6568_1" [label="[-1, 4]", style=solid]; +"3692 6530" -> "3693 QuantizeLinear_6568_1" [label="[-1, 4]", style=solid]; +"3692 6530" -> "3695 QuantizeLinear_6568_2" [label="[-1, 4]", style=solid]; +"3692 6530" -> "3697 QuantizeLinear_6568_3" [label="[-1, 4]", style=solid]; +"3692 6530" -> "3699 QuantizeLinear_6568_4" [label="[-1, 4]", style=solid]; "3692 6530" -> "3746 6539" [label="[-1, 4]", style=solid]; "3692 6530" -> "3750 6547" [label="[-1, 4]", style=solid]; "3692 6530" -> "4281 nncf_model_output_0" [label="[-1, 4]", style=solid]; -"3693 QuantizeLinear_6568_4" -> "3694 DequantizeLinear_6568_4" [label="[-1, 4]", style=dashed]; -"3694 DequantizeLinear_6568_4" -> "3711 6552" [label="[-1, 4]", style=solid]; -"3695 QuantizeLinear_6568_3" -> "3696 DequantizeLinear_6568_3" [label="[-1, 4]", style=dashed]; -"3696 DequantizeLinear_6568_3" -> "3709 6559" [label="[-1, 4]", style=solid]; -"3697 QuantizeLinear_6568_2" -> "3698 DequantizeLinear_6568_2" [label="[-1, 4]", style=dashed]; -"3698 DequantizeLinear_6568_2" -> "3703 6569" [label="[-1, 4]", style=solid]; -"3699 QuantizeLinear_6568_1" -> "3700 DequantizeLinear_6568_1" [label="[-1, 4]", style=dashed]; -"3700 DequantizeLinear_6568_1" -> "3701 6576" [label="[-1, 4]", style=solid]; +"3693 QuantizeLinear_6568_1" -> "3694 DequantizeLinear_6568_1" [label="[-1, 4]", style=dashed]; +"3694 DequantizeLinear_6568_1" -> "3701 6576" [label="[-1, 4]", style=solid]; +"3695 QuantizeLinear_6568_2" -> "3696 DequantizeLinear_6568_2" [label="[-1, 4]", style=dashed]; +"3696 DequantizeLinear_6568_2" -> "3703 6569" [label="[-1, 4]", style=solid]; +"3697 QuantizeLinear_6568_3" -> "3698 DequantizeLinear_6568_3" [label="[-1, 4]", style=dashed]; +"3698 DequantizeLinear_6568_3" -> "3709 6559" [label="[-1, 4]", style=solid]; +"3699 QuantizeLinear_6568_4" -> "3700 DequantizeLinear_6568_4" [label="[-1, 4]", style=dashed]; +"3700 DequantizeLinear_6568_4" -> "3711 6552" [label="[-1, 4]", style=solid]; "3701 6576" -> "3702 6578" [label="[-1, 4]", style=solid]; "3702 6578" -> "3705 6579" [label="[-1]", style=solid]; "3703 6569" -> "3704 6571" [label="[-1, 4]", style=solid]; diff --git a/tests/onnx/data/reference_graphs/quantization/bertsquad-12.dot b/tests/onnx/data/reference_graphs/quantization/bertsquad-12.dot index 246765a6663..5e2502ba0c4 100644 --- a/tests/onnx/data/reference_graphs/quantization/bertsquad-12.dot +++ b/tests/onnx/data/reference_graphs/quantization/bertsquad-12.dot @@ -405,14 +405,14 @@ strict digraph { "403 bert/encoder/Reshape_13/shape_Concat__301" [id=403, type=Concat]; "404 bert/encoder/Reshape_13__471" [id=404, type=Cast]; "405 bert/encoder/Reshape_1" [id=405, type=Reshape]; -"406 QuantizeLinear_bert/encoder/Reshape_1^0_3" [id=406, label="406 QuantizeLinear_bert/encoder/Reshape_1:0_3", type=QuantizeLinear]; -"407 DequantizeLinear_bert/encoder/Reshape_1^0_3" [id=407, label="407 DequantizeLinear_bert/encoder/Reshape_1:0_3", type=DequantizeLinear]; -"408 QuantizeLinear_bert/encoder/Reshape_1^0_2" [id=408, label="408 QuantizeLinear_bert/encoder/Reshape_1:0_2", type=QuantizeLinear]; -"409 DequantizeLinear_bert/encoder/Reshape_1^0_2" [id=409, label="409 DequantizeLinear_bert/encoder/Reshape_1:0_2", type=DequantizeLinear]; -"410 QuantizeLinear_bert/encoder/Reshape_1^0_1" [id=410, label="410 QuantizeLinear_bert/encoder/Reshape_1:0_1", type=QuantizeLinear]; -"411 DequantizeLinear_bert/encoder/Reshape_1^0_1" [id=411, label="411 DequantizeLinear_bert/encoder/Reshape_1:0_1", type=DequantizeLinear]; -"412 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [id=412, label="412 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"413 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [id=413, label="413 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"406 QuantizeLinear_bert/encoder/Reshape_1^0_1" [id=406, label="406 QuantizeLinear_bert/encoder/Reshape_1:0_1", type=QuantizeLinear]; +"407 DequantizeLinear_bert/encoder/Reshape_1^0_1" [id=407, label="407 DequantizeLinear_bert/encoder/Reshape_1:0_1", type=DequantizeLinear]; +"408 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [id=408, label="408 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"409 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [id=409, label="409 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"410 QuantizeLinear_bert/encoder/Reshape_1^0_2" [id=410, label="410 QuantizeLinear_bert/encoder/Reshape_1:0_2", type=QuantizeLinear]; +"411 DequantizeLinear_bert/encoder/Reshape_1^0_2" [id=411, label="411 DequantizeLinear_bert/encoder/Reshape_1:0_2", type=DequantizeLinear]; +"412 QuantizeLinear_bert/encoder/Reshape_1^0_3" [id=412, label="412 QuantizeLinear_bert/encoder/Reshape_1:0_3", type=QuantizeLinear]; +"413 DequantizeLinear_bert/encoder/Reshape_1^0_3" [id=413, label="413 DequantizeLinear_bert/encoder/Reshape_1:0_3", type=DequantizeLinear]; "414 bert/encoder/layer_0/attention/self/value/MatMul" [id=414, type=MatMul]; "415 bert/encoder/layer_0/attention/self/value/BiasAdd" [id=415, type=Add]; "416 bert/encoder/layer_0/attention/self/Reshape_2" [id=416, type=Reshape]; @@ -495,14 +495,14 @@ strict digraph { "493 bert/encoder/layer_0/output/LayerNorm/batchnorm/sub" [id=493, type=Sub]; "494 bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1" [id=494, type=Mul]; "495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" [id=495, type=Add]; -"496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [id=496, label="496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [id=497, label="497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"498 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [id=498, label="498 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"499 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [id=499, label="499 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [id=500, label="500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [id=501, label="501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"502 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [id=502, label="502 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"503 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [id=503, label="503 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [id=496, label="496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [id=497, label="497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"498 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [id=498, label="498 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"499 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [id=499, label="499 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [id=500, label="500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [id=501, label="501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"502 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [id=502, label="502 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"503 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [id=503, label="503 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "504 bert/encoder/layer_1/attention/self/value/MatMul" [id=504, type=MatMul]; "505 bert/encoder/layer_1/attention/self/value/BiasAdd" [id=505, type=Add]; "506 bert/encoder/layer_1/attention/self/Reshape_2" [id=506, type=Reshape]; @@ -585,14 +585,14 @@ strict digraph { "583 bert/encoder/layer_1/output/LayerNorm/batchnorm/sub" [id=583, type=Sub]; "584 bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1" [id=584, type=Mul]; "585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" [id=585, type=Add]; -"586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [id=586, label="586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [id=587, label="587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"588 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [id=588, label="588 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"589 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [id=589, label="589 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [id=590, label="590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [id=591, label="591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"592 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [id=592, label="592 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"593 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [id=593, label="593 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [id=586, label="586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [id=587, label="587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"588 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [id=588, label="588 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"589 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [id=589, label="589 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [id=590, label="590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [id=591, label="591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"592 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [id=592, label="592 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"593 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [id=593, label="593 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "594 bert/encoder/layer_2/attention/self/value/MatMul" [id=594, type=MatMul]; "595 bert/encoder/layer_2/attention/self/value/BiasAdd" [id=595, type=Add]; "596 bert/encoder/layer_2/attention/self/Reshape_2" [id=596, type=Reshape]; @@ -675,14 +675,14 @@ strict digraph { "673 bert/encoder/layer_2/output/LayerNorm/batchnorm/sub" [id=673, type=Sub]; "674 bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1" [id=674, type=Mul]; "675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" [id=675, type=Add]; -"676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [id=676, label="676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [id=677, label="677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"678 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [id=678, label="678 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"679 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [id=679, label="679 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [id=680, label="680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [id=681, label="681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"682 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [id=682, label="682 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"683 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [id=683, label="683 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [id=676, label="676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [id=677, label="677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"678 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [id=678, label="678 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"679 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [id=679, label="679 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [id=680, label="680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [id=681, label="681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"682 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [id=682, label="682 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"683 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [id=683, label="683 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "684 bert/encoder/layer_3/attention/self/value/MatMul" [id=684, type=MatMul]; "685 bert/encoder/layer_3/attention/self/value/BiasAdd" [id=685, type=Add]; "686 bert/encoder/layer_3/attention/self/Reshape_2" [id=686, type=Reshape]; @@ -765,14 +765,14 @@ strict digraph { "763 bert/encoder/layer_3/output/LayerNorm/batchnorm/sub" [id=763, type=Sub]; "764 bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1" [id=764, type=Mul]; "765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" [id=765, type=Add]; -"766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [id=766, label="766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [id=767, label="767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"768 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [id=768, label="768 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"769 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [id=769, label="769 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [id=770, label="770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [id=771, label="771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"772 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [id=772, label="772 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"773 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [id=773, label="773 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [id=766, label="766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [id=767, label="767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"768 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [id=768, label="768 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"769 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [id=769, label="769 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [id=770, label="770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [id=771, label="771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"772 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [id=772, label="772 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"773 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [id=773, label="773 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "774 bert/encoder/layer_4/attention/self/value/MatMul" [id=774, type=MatMul]; "775 bert/encoder/layer_4/attention/self/value/BiasAdd" [id=775, type=Add]; "776 bert/encoder/layer_4/attention/self/Reshape_2" [id=776, type=Reshape]; @@ -855,14 +855,14 @@ strict digraph { "853 bert/encoder/layer_4/output/LayerNorm/batchnorm/sub" [id=853, type=Sub]; "854 bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1" [id=854, type=Mul]; "855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" [id=855, type=Add]; -"856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [id=856, label="856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [id=857, label="857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"858 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [id=858, label="858 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"859 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [id=859, label="859 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [id=860, label="860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [id=861, label="861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"862 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [id=862, label="862 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"863 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [id=863, label="863 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [id=856, label="856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [id=857, label="857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"858 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [id=858, label="858 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"859 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [id=859, label="859 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [id=860, label="860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [id=861, label="861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"862 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [id=862, label="862 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"863 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [id=863, label="863 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "864 bert/encoder/layer_5/attention/self/value/MatMul" [id=864, type=MatMul]; "865 bert/encoder/layer_5/attention/self/value/BiasAdd" [id=865, type=Add]; "866 bert/encoder/layer_5/attention/self/Reshape_2" [id=866, type=Reshape]; @@ -945,14 +945,14 @@ strict digraph { "943 bert/encoder/layer_5/output/LayerNorm/batchnorm/sub" [id=943, type=Sub]; "944 bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1" [id=944, type=Mul]; "945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" [id=945, type=Add]; -"946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [id=946, label="946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [id=947, label="947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"948 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [id=948, label="948 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"949 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [id=949, label="949 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [id=950, label="950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [id=951, label="951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"952 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [id=952, label="952 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"953 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [id=953, label="953 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [id=946, label="946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [id=947, label="947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"948 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [id=948, label="948 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"949 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [id=949, label="949 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [id=950, label="950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [id=951, label="951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"952 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [id=952, label="952 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"953 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [id=953, label="953 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "954 bert/encoder/layer_6/attention/self/value/MatMul" [id=954, type=MatMul]; "955 bert/encoder/layer_6/attention/self/value/BiasAdd" [id=955, type=Add]; "956 bert/encoder/layer_6/attention/self/Reshape_2" [id=956, type=Reshape]; @@ -1035,14 +1035,14 @@ strict digraph { "1033 bert/encoder/layer_6/output/LayerNorm/batchnorm/sub" [id=1033, type=Sub]; "1034 bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1" [id=1034, type=Mul]; "1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" [id=1035, type=Add]; -"1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [id=1036, label="1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [id=1037, label="1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"1038 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [id=1038, label="1038 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"1039 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [id=1039, label="1039 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [id=1040, label="1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [id=1041, label="1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"1042 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [id=1042, label="1042 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"1043 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [id=1043, label="1043 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [id=1036, label="1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [id=1037, label="1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"1038 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [id=1038, label="1038 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"1039 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [id=1039, label="1039 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [id=1040, label="1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [id=1041, label="1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"1042 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [id=1042, label="1042 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"1043 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [id=1043, label="1043 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "1044 bert/encoder/layer_7/attention/self/value/MatMul" [id=1044, type=MatMul]; "1045 bert/encoder/layer_7/attention/self/value/BiasAdd" [id=1045, type=Add]; "1046 bert/encoder/layer_7/attention/self/Reshape_2" [id=1046, type=Reshape]; @@ -1125,14 +1125,14 @@ strict digraph { "1123 bert/encoder/layer_7/output/LayerNorm/batchnorm/sub" [id=1123, type=Sub]; "1124 bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1" [id=1124, type=Mul]; "1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" [id=1125, type=Add]; -"1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [id=1126, label="1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [id=1127, label="1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"1128 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [id=1128, label="1128 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"1129 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [id=1129, label="1129 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [id=1130, label="1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [id=1131, label="1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"1132 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [id=1132, label="1132 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"1133 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [id=1133, label="1133 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [id=1126, label="1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [id=1127, label="1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"1128 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [id=1128, label="1128 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"1129 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [id=1129, label="1129 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [id=1130, label="1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [id=1131, label="1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"1132 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [id=1132, label="1132 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"1133 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [id=1133, label="1133 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "1134 bert/encoder/layer_8/attention/self/value/MatMul" [id=1134, type=MatMul]; "1135 bert/encoder/layer_8/attention/self/value/BiasAdd" [id=1135, type=Add]; "1136 bert/encoder/layer_8/attention/self/Reshape_2" [id=1136, type=Reshape]; @@ -1215,14 +1215,14 @@ strict digraph { "1213 bert/encoder/layer_8/output/LayerNorm/batchnorm/sub" [id=1213, type=Sub]; "1214 bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1" [id=1214, type=Mul]; "1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" [id=1215, type=Add]; -"1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [id=1216, label="1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [id=1217, label="1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"1218 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [id=1218, label="1218 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"1219 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [id=1219, label="1219 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [id=1220, label="1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [id=1221, label="1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"1222 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [id=1222, label="1222 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"1223 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [id=1223, label="1223 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [id=1216, label="1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [id=1217, label="1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"1218 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [id=1218, label="1218 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"1219 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [id=1219, label="1219 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [id=1220, label="1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [id=1221, label="1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"1222 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [id=1222, label="1222 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"1223 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [id=1223, label="1223 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "1224 bert/encoder/layer_9/attention/self/value/MatMul" [id=1224, type=MatMul]; "1225 bert/encoder/layer_9/attention/self/value/BiasAdd" [id=1225, type=Add]; "1226 bert/encoder/layer_9/attention/self/Reshape_2" [id=1226, type=Reshape]; @@ -1305,14 +1305,14 @@ strict digraph { "1303 bert/encoder/layer_9/output/LayerNorm/batchnorm/sub" [id=1303, type=Sub]; "1304 bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1" [id=1304, type=Mul]; "1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" [id=1305, type=Add]; -"1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [id=1306, label="1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [id=1307, label="1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"1308 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [id=1308, label="1308 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"1309 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [id=1309, label="1309 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [id=1310, label="1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [id=1311, label="1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"1312 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [id=1312, label="1312 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"1313 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [id=1313, label="1313 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [id=1306, label="1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [id=1307, label="1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"1308 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [id=1308, label="1308 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"1309 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [id=1309, label="1309 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [id=1310, label="1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [id=1311, label="1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"1312 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [id=1312, label="1312 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"1313 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [id=1313, label="1313 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "1314 bert/encoder/layer_10/attention/self/value/MatMul" [id=1314, type=MatMul]; "1315 bert/encoder/layer_10/attention/self/value/BiasAdd" [id=1315, type=Add]; "1316 bert/encoder/layer_10/attention/self/Reshape_2" [id=1316, type=Reshape]; @@ -1395,14 +1395,14 @@ strict digraph { "1393 bert/encoder/layer_10/output/LayerNorm/batchnorm/sub" [id=1393, type=Sub]; "1394 bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1" [id=1394, type=Mul]; "1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" [id=1395, type=Add]; -"1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [id=1396, label="1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [id=1397, label="1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"1398 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [id=1398, label="1398 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"1399 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [id=1399, label="1399 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [id=1400, label="1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [id=1401, label="1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"1402 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [id=1402, label="1402 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"1403 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [id=1403, label="1403 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [id=1396, label="1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [id=1397, label="1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"1398 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [id=1398, label="1398 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"1399 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [id=1399, label="1399 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [id=1400, label="1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [id=1401, label="1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"1402 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [id=1402, label="1402 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"1403 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [id=1403, label="1403 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "1404 bert/encoder/layer_11/attention/self/value/MatMul" [id=1404, type=MatMul]; "1405 bert/encoder/layer_11/attention/self/value/BiasAdd" [id=1405, type=Add]; "1406 bert/encoder/layer_11/attention/self/Reshape_2" [id=1406, type=Reshape]; @@ -1991,18 +1991,18 @@ strict digraph { "402 bert/encoder/Reshape_13/shape_Unsqueeze__298" -> "403 bert/encoder/Reshape_13/shape_Concat__301" [label="[1]", style=dashed]; "403 bert/encoder/Reshape_13/shape_Concat__301" -> "404 bert/encoder/Reshape_13__471" [label="[3]", style=dashed]; "404 bert/encoder/Reshape_13__471" -> "1488 bert/encoder/Reshape_13" [label="[3]", style=dashed]; -"405 bert/encoder/Reshape_1" -> "406 QuantizeLinear_bert/encoder/Reshape_1^0_3" [label="[]", style=solid]; -"405 bert/encoder/Reshape_1" -> "408 QuantizeLinear_bert/encoder/Reshape_1^0_2" [label="[]", style=solid]; -"405 bert/encoder/Reshape_1" -> "410 QuantizeLinear_bert/encoder/Reshape_1^0_1" [label="[]", style=solid]; +"405 bert/encoder/Reshape_1" -> "406 QuantizeLinear_bert/encoder/Reshape_1^0_1" [label="[]", style=solid]; +"405 bert/encoder/Reshape_1" -> "410 QuantizeLinear_bert/encoder/Reshape_1^0_2" [label="[]", style=solid]; +"405 bert/encoder/Reshape_1" -> "412 QuantizeLinear_bert/encoder/Reshape_1^0_3" [label="[]", style=solid]; "405 bert/encoder/Reshape_1" -> "448 bert/encoder/layer_0/attention/output/add" [label="[]", style=solid]; -"406 QuantizeLinear_bert/encoder/Reshape_1^0_3" -> "407 DequantizeLinear_bert/encoder/Reshape_1^0_3" [label="[]", style=dashed]; -"407 DequantizeLinear_bert/encoder/Reshape_1^0_3" -> "428 bert/encoder/layer_0/attention/self/key/MatMul" [label="[]", style=solid]; -"408 QuantizeLinear_bert/encoder/Reshape_1^0_2" -> "409 DequantizeLinear_bert/encoder/Reshape_1^0_2" [label="[]", style=dashed]; -"409 DequantizeLinear_bert/encoder/Reshape_1^0_2" -> "420 bert/encoder/layer_0/attention/self/query/MatMul" [label="[]", style=solid]; -"410 QuantizeLinear_bert/encoder/Reshape_1^0_1" -> "411 DequantizeLinear_bert/encoder/Reshape_1^0_1" [label="[]", style=dashed]; -"411 DequantizeLinear_bert/encoder/Reshape_1^0_1" -> "414 bert/encoder/layer_0/attention/self/value/MatMul" [label="[]", style=solid]; -"412 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" -> "413 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"413 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" -> "414 bert/encoder/layer_0/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"406 QuantizeLinear_bert/encoder/Reshape_1^0_1" -> "407 DequantizeLinear_bert/encoder/Reshape_1^0_1" [label="[]", style=dashed]; +"407 DequantizeLinear_bert/encoder/Reshape_1^0_1" -> "414 bert/encoder/layer_0/attention/self/value/MatMul" [label="[]", style=solid]; +"408 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" -> "409 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"409 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" -> "414 bert/encoder/layer_0/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"410 QuantizeLinear_bert/encoder/Reshape_1^0_2" -> "411 DequantizeLinear_bert/encoder/Reshape_1^0_2" [label="[]", style=dashed]; +"411 DequantizeLinear_bert/encoder/Reshape_1^0_2" -> "420 bert/encoder/layer_0/attention/self/query/MatMul" [label="[]", style=solid]; +"412 QuantizeLinear_bert/encoder/Reshape_1^0_3" -> "413 DequantizeLinear_bert/encoder/Reshape_1^0_3" [label="[]", style=dashed]; +"413 DequantizeLinear_bert/encoder/Reshape_1^0_3" -> "428 bert/encoder/layer_0/attention/self/key/MatMul" [label="[]", style=solid]; "414 bert/encoder/layer_0/attention/self/value/MatMul" -> "415 bert/encoder/layer_0/attention/self/value/BiasAdd" [label="[]", style=solid]; "415 bert/encoder/layer_0/attention/self/value/BiasAdd" -> "416 bert/encoder/layer_0/attention/self/Reshape_2" [label="[]", style=solid]; "416 bert/encoder/layer_0/attention/self/Reshape_2" -> "417 bert/encoder/layer_0/attention/self/transpose_2" [label="[]", style=solid]; @@ -2095,18 +2095,18 @@ strict digraph { "492 bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2" -> "493 bert/encoder/layer_0/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "493 bert/encoder/layer_0/output/LayerNorm/batchnorm/sub" -> "495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "494 bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1" -> "495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "498 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "502 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "538 bert/encoder/layer_1/attention/output/add" [label="[]", style=solid]; -"496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" -> "497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" -> "518 bert/encoder/layer_1/attention/self/key/MatMul" [label="[]", style=solid]; -"498 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" -> "499 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"499 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" -> "510 bert/encoder/layer_1/attention/self/query/MatMul" [label="[]", style=solid]; -"500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" -> "501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" -> "504 bert/encoder/layer_1/attention/self/value/MatMul" [label="[]", style=solid]; -"502 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" -> "503 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"503 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" -> "504 bert/encoder/layer_1/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" -> "497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" -> "504 bert/encoder/layer_1/attention/self/value/MatMul" [label="[]", style=solid]; +"498 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" -> "499 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"499 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" -> "504 bert/encoder/layer_1/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" -> "501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" -> "510 bert/encoder/layer_1/attention/self/query/MatMul" [label="[]", style=solid]; +"502 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" -> "503 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"503 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" -> "518 bert/encoder/layer_1/attention/self/key/MatMul" [label="[]", style=solid]; "504 bert/encoder/layer_1/attention/self/value/MatMul" -> "505 bert/encoder/layer_1/attention/self/value/BiasAdd" [label="[]", style=solid]; "505 bert/encoder/layer_1/attention/self/value/BiasAdd" -> "506 bert/encoder/layer_1/attention/self/Reshape_2" [label="[]", style=solid]; "506 bert/encoder/layer_1/attention/self/Reshape_2" -> "507 bert/encoder/layer_1/attention/self/transpose_2" [label="[]", style=solid]; @@ -2199,18 +2199,18 @@ strict digraph { "582 bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2" -> "583 bert/encoder/layer_1/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "583 bert/encoder/layer_1/output/LayerNorm/batchnorm/sub" -> "585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "584 bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1" -> "585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "588 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "592 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "628 bert/encoder/layer_2/attention/output/add" [label="[]", style=solid]; -"586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" -> "587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" -> "608 bert/encoder/layer_2/attention/self/key/MatMul" [label="[]", style=solid]; -"588 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" -> "589 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"589 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" -> "600 bert/encoder/layer_2/attention/self/query/MatMul" [label="[]", style=solid]; -"590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" -> "591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" -> "594 bert/encoder/layer_2/attention/self/value/MatMul" [label="[]", style=solid]; -"592 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" -> "593 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"593 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" -> "594 bert/encoder/layer_2/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" -> "587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" -> "594 bert/encoder/layer_2/attention/self/value/MatMul" [label="[]", style=solid]; +"588 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" -> "589 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"589 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" -> "594 bert/encoder/layer_2/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" -> "591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" -> "600 bert/encoder/layer_2/attention/self/query/MatMul" [label="[]", style=solid]; +"592 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" -> "593 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"593 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" -> "608 bert/encoder/layer_2/attention/self/key/MatMul" [label="[]", style=solid]; "594 bert/encoder/layer_2/attention/self/value/MatMul" -> "595 bert/encoder/layer_2/attention/self/value/BiasAdd" [label="[]", style=solid]; "595 bert/encoder/layer_2/attention/self/value/BiasAdd" -> "596 bert/encoder/layer_2/attention/self/Reshape_2" [label="[]", style=solid]; "596 bert/encoder/layer_2/attention/self/Reshape_2" -> "597 bert/encoder/layer_2/attention/self/transpose_2" [label="[]", style=solid]; @@ -2303,18 +2303,18 @@ strict digraph { "672 bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2" -> "673 bert/encoder/layer_2/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "673 bert/encoder/layer_2/output/LayerNorm/batchnorm/sub" -> "675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "674 bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1" -> "675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "678 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "682 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "718 bert/encoder/layer_3/attention/output/add" [label="[]", style=solid]; -"676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" -> "677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" -> "698 bert/encoder/layer_3/attention/self/key/MatMul" [label="[]", style=solid]; -"678 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" -> "679 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"679 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" -> "690 bert/encoder/layer_3/attention/self/query/MatMul" [label="[]", style=solid]; -"680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" -> "681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" -> "684 bert/encoder/layer_3/attention/self/value/MatMul" [label="[]", style=solid]; -"682 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" -> "683 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"683 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" -> "684 bert/encoder/layer_3/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" -> "677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" -> "684 bert/encoder/layer_3/attention/self/value/MatMul" [label="[]", style=solid]; +"678 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" -> "679 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"679 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" -> "684 bert/encoder/layer_3/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" -> "681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" -> "690 bert/encoder/layer_3/attention/self/query/MatMul" [label="[]", style=solid]; +"682 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" -> "683 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"683 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" -> "698 bert/encoder/layer_3/attention/self/key/MatMul" [label="[]", style=solid]; "684 bert/encoder/layer_3/attention/self/value/MatMul" -> "685 bert/encoder/layer_3/attention/self/value/BiasAdd" [label="[]", style=solid]; "685 bert/encoder/layer_3/attention/self/value/BiasAdd" -> "686 bert/encoder/layer_3/attention/self/Reshape_2" [label="[]", style=solid]; "686 bert/encoder/layer_3/attention/self/Reshape_2" -> "687 bert/encoder/layer_3/attention/self/transpose_2" [label="[]", style=solid]; @@ -2407,18 +2407,18 @@ strict digraph { "762 bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2" -> "763 bert/encoder/layer_3/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "763 bert/encoder/layer_3/output/LayerNorm/batchnorm/sub" -> "765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "764 bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1" -> "765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "768 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "772 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "808 bert/encoder/layer_4/attention/output/add" [label="[]", style=solid]; -"766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" -> "767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" -> "788 bert/encoder/layer_4/attention/self/key/MatMul" [label="[]", style=solid]; -"768 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" -> "769 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"769 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" -> "780 bert/encoder/layer_4/attention/self/query/MatMul" [label="[]", style=solid]; -"770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" -> "771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" -> "774 bert/encoder/layer_4/attention/self/value/MatMul" [label="[]", style=solid]; -"772 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" -> "773 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"773 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" -> "774 bert/encoder/layer_4/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" -> "767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" -> "774 bert/encoder/layer_4/attention/self/value/MatMul" [label="[]", style=solid]; +"768 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" -> "769 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"769 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" -> "774 bert/encoder/layer_4/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" -> "771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" -> "780 bert/encoder/layer_4/attention/self/query/MatMul" [label="[]", style=solid]; +"772 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" -> "773 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"773 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" -> "788 bert/encoder/layer_4/attention/self/key/MatMul" [label="[]", style=solid]; "774 bert/encoder/layer_4/attention/self/value/MatMul" -> "775 bert/encoder/layer_4/attention/self/value/BiasAdd" [label="[]", style=solid]; "775 bert/encoder/layer_4/attention/self/value/BiasAdd" -> "776 bert/encoder/layer_4/attention/self/Reshape_2" [label="[]", style=solid]; "776 bert/encoder/layer_4/attention/self/Reshape_2" -> "777 bert/encoder/layer_4/attention/self/transpose_2" [label="[]", style=solid]; @@ -2511,18 +2511,18 @@ strict digraph { "852 bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2" -> "853 bert/encoder/layer_4/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "853 bert/encoder/layer_4/output/LayerNorm/batchnorm/sub" -> "855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "854 bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1" -> "855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "858 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "862 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "898 bert/encoder/layer_5/attention/output/add" [label="[]", style=solid]; -"856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" -> "857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" -> "878 bert/encoder/layer_5/attention/self/key/MatMul" [label="[]", style=solid]; -"858 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" -> "859 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"859 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" -> "870 bert/encoder/layer_5/attention/self/query/MatMul" [label="[]", style=solid]; -"860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" -> "861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" -> "864 bert/encoder/layer_5/attention/self/value/MatMul" [label="[]", style=solid]; -"862 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" -> "863 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"863 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" -> "864 bert/encoder/layer_5/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" -> "857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" -> "864 bert/encoder/layer_5/attention/self/value/MatMul" [label="[]", style=solid]; +"858 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" -> "859 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"859 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" -> "864 bert/encoder/layer_5/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" -> "861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" -> "870 bert/encoder/layer_5/attention/self/query/MatMul" [label="[]", style=solid]; +"862 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" -> "863 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"863 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" -> "878 bert/encoder/layer_5/attention/self/key/MatMul" [label="[]", style=solid]; "864 bert/encoder/layer_5/attention/self/value/MatMul" -> "865 bert/encoder/layer_5/attention/self/value/BiasAdd" [label="[]", style=solid]; "865 bert/encoder/layer_5/attention/self/value/BiasAdd" -> "866 bert/encoder/layer_5/attention/self/Reshape_2" [label="[]", style=solid]; "866 bert/encoder/layer_5/attention/self/Reshape_2" -> "867 bert/encoder/layer_5/attention/self/transpose_2" [label="[]", style=solid]; @@ -2615,18 +2615,18 @@ strict digraph { "942 bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2" -> "943 bert/encoder/layer_5/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "943 bert/encoder/layer_5/output/LayerNorm/batchnorm/sub" -> "945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "944 bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1" -> "945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "948 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "952 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "988 bert/encoder/layer_6/attention/output/add" [label="[]", style=solid]; -"946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" -> "947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" -> "968 bert/encoder/layer_6/attention/self/key/MatMul" [label="[]", style=solid]; -"948 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" -> "949 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"949 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" -> "960 bert/encoder/layer_6/attention/self/query/MatMul" [label="[]", style=solid]; -"950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" -> "951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" -> "954 bert/encoder/layer_6/attention/self/value/MatMul" [label="[]", style=solid]; -"952 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" -> "953 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"953 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" -> "954 bert/encoder/layer_6/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" -> "947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" -> "954 bert/encoder/layer_6/attention/self/value/MatMul" [label="[]", style=solid]; +"948 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" -> "949 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"949 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" -> "954 bert/encoder/layer_6/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" -> "951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" -> "960 bert/encoder/layer_6/attention/self/query/MatMul" [label="[]", style=solid]; +"952 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" -> "953 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"953 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" -> "968 bert/encoder/layer_6/attention/self/key/MatMul" [label="[]", style=solid]; "954 bert/encoder/layer_6/attention/self/value/MatMul" -> "955 bert/encoder/layer_6/attention/self/value/BiasAdd" [label="[]", style=solid]; "955 bert/encoder/layer_6/attention/self/value/BiasAdd" -> "956 bert/encoder/layer_6/attention/self/Reshape_2" [label="[]", style=solid]; "956 bert/encoder/layer_6/attention/self/Reshape_2" -> "957 bert/encoder/layer_6/attention/self/transpose_2" [label="[]", style=solid]; @@ -2719,18 +2719,18 @@ strict digraph { "1032 bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2" -> "1033 bert/encoder/layer_6/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "1033 bert/encoder/layer_6/output/LayerNorm/batchnorm/sub" -> "1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "1034 bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1" -> "1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1038 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1042 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1078 bert/encoder/layer_7/attention/output/add" [label="[]", style=solid]; -"1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" -> "1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" -> "1058 bert/encoder/layer_7/attention/self/key/MatMul" [label="[]", style=solid]; -"1038 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" -> "1039 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"1039 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" -> "1050 bert/encoder/layer_7/attention/self/query/MatMul" [label="[]", style=solid]; -"1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" -> "1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" -> "1044 bert/encoder/layer_7/attention/self/value/MatMul" [label="[]", style=solid]; -"1042 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" -> "1043 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"1043 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" -> "1044 bert/encoder/layer_7/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" -> "1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" -> "1044 bert/encoder/layer_7/attention/self/value/MatMul" [label="[]", style=solid]; +"1038 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" -> "1039 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"1039 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" -> "1044 bert/encoder/layer_7/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" -> "1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" -> "1050 bert/encoder/layer_7/attention/self/query/MatMul" [label="[]", style=solid]; +"1042 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" -> "1043 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"1043 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" -> "1058 bert/encoder/layer_7/attention/self/key/MatMul" [label="[]", style=solid]; "1044 bert/encoder/layer_7/attention/self/value/MatMul" -> "1045 bert/encoder/layer_7/attention/self/value/BiasAdd" [label="[]", style=solid]; "1045 bert/encoder/layer_7/attention/self/value/BiasAdd" -> "1046 bert/encoder/layer_7/attention/self/Reshape_2" [label="[]", style=solid]; "1046 bert/encoder/layer_7/attention/self/Reshape_2" -> "1047 bert/encoder/layer_7/attention/self/transpose_2" [label="[]", style=solid]; @@ -2823,18 +2823,18 @@ strict digraph { "1122 bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2" -> "1123 bert/encoder/layer_7/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "1123 bert/encoder/layer_7/output/LayerNorm/batchnorm/sub" -> "1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "1124 bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1" -> "1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1128 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1132 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1168 bert/encoder/layer_8/attention/output/add" [label="[]", style=solid]; -"1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" -> "1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" -> "1148 bert/encoder/layer_8/attention/self/key/MatMul" [label="[]", style=solid]; -"1128 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" -> "1129 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"1129 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" -> "1140 bert/encoder/layer_8/attention/self/query/MatMul" [label="[]", style=solid]; -"1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" -> "1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" -> "1134 bert/encoder/layer_8/attention/self/value/MatMul" [label="[]", style=solid]; -"1132 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" -> "1133 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"1133 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" -> "1134 bert/encoder/layer_8/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" -> "1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" -> "1134 bert/encoder/layer_8/attention/self/value/MatMul" [label="[]", style=solid]; +"1128 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" -> "1129 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"1129 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" -> "1134 bert/encoder/layer_8/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" -> "1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" -> "1140 bert/encoder/layer_8/attention/self/query/MatMul" [label="[]", style=solid]; +"1132 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" -> "1133 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"1133 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" -> "1148 bert/encoder/layer_8/attention/self/key/MatMul" [label="[]", style=solid]; "1134 bert/encoder/layer_8/attention/self/value/MatMul" -> "1135 bert/encoder/layer_8/attention/self/value/BiasAdd" [label="[]", style=solid]; "1135 bert/encoder/layer_8/attention/self/value/BiasAdd" -> "1136 bert/encoder/layer_8/attention/self/Reshape_2" [label="[]", style=solid]; "1136 bert/encoder/layer_8/attention/self/Reshape_2" -> "1137 bert/encoder/layer_8/attention/self/transpose_2" [label="[]", style=solid]; @@ -2927,18 +2927,18 @@ strict digraph { "1212 bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2" -> "1213 bert/encoder/layer_8/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "1213 bert/encoder/layer_8/output/LayerNorm/batchnorm/sub" -> "1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "1214 bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1" -> "1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1218 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1222 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1258 bert/encoder/layer_9/attention/output/add" [label="[]", style=solid]; -"1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" -> "1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" -> "1238 bert/encoder/layer_9/attention/self/key/MatMul" [label="[]", style=solid]; -"1218 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" -> "1219 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"1219 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" -> "1230 bert/encoder/layer_9/attention/self/query/MatMul" [label="[]", style=solid]; -"1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" -> "1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" -> "1224 bert/encoder/layer_9/attention/self/value/MatMul" [label="[]", style=solid]; -"1222 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" -> "1223 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"1223 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" -> "1224 bert/encoder/layer_9/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" -> "1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" -> "1224 bert/encoder/layer_9/attention/self/value/MatMul" [label="[]", style=solid]; +"1218 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" -> "1219 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"1219 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" -> "1224 bert/encoder/layer_9/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" -> "1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" -> "1230 bert/encoder/layer_9/attention/self/query/MatMul" [label="[]", style=solid]; +"1222 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" -> "1223 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"1223 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" -> "1238 bert/encoder/layer_9/attention/self/key/MatMul" [label="[]", style=solid]; "1224 bert/encoder/layer_9/attention/self/value/MatMul" -> "1225 bert/encoder/layer_9/attention/self/value/BiasAdd" [label="[]", style=solid]; "1225 bert/encoder/layer_9/attention/self/value/BiasAdd" -> "1226 bert/encoder/layer_9/attention/self/Reshape_2" [label="[]", style=solid]; "1226 bert/encoder/layer_9/attention/self/Reshape_2" -> "1227 bert/encoder/layer_9/attention/self/transpose_2" [label="[]", style=solid]; @@ -3031,18 +3031,18 @@ strict digraph { "1302 bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2" -> "1303 bert/encoder/layer_9/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "1303 bert/encoder/layer_9/output/LayerNorm/batchnorm/sub" -> "1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "1304 bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1" -> "1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1308 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1312 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1348 bert/encoder/layer_10/attention/output/add" [label="[]", style=solid]; -"1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" -> "1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" -> "1328 bert/encoder/layer_10/attention/self/key/MatMul" [label="[]", style=solid]; -"1308 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" -> "1309 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"1309 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" -> "1320 bert/encoder/layer_10/attention/self/query/MatMul" [label="[]", style=solid]; -"1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" -> "1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" -> "1314 bert/encoder/layer_10/attention/self/value/MatMul" [label="[]", style=solid]; -"1312 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" -> "1313 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"1313 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" -> "1314 bert/encoder/layer_10/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" -> "1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" -> "1314 bert/encoder/layer_10/attention/self/value/MatMul" [label="[]", style=solid]; +"1308 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" -> "1309 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"1309 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" -> "1314 bert/encoder/layer_10/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" -> "1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" -> "1320 bert/encoder/layer_10/attention/self/query/MatMul" [label="[]", style=solid]; +"1312 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" -> "1313 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"1313 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" -> "1328 bert/encoder/layer_10/attention/self/key/MatMul" [label="[]", style=solid]; "1314 bert/encoder/layer_10/attention/self/value/MatMul" -> "1315 bert/encoder/layer_10/attention/self/value/BiasAdd" [label="[]", style=solid]; "1315 bert/encoder/layer_10/attention/self/value/BiasAdd" -> "1316 bert/encoder/layer_10/attention/self/Reshape_2" [label="[]", style=solid]; "1316 bert/encoder/layer_10/attention/self/Reshape_2" -> "1317 bert/encoder/layer_10/attention/self/transpose_2" [label="[]", style=solid]; @@ -3135,18 +3135,18 @@ strict digraph { "1392 bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2" -> "1393 bert/encoder/layer_10/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "1393 bert/encoder/layer_10/output/LayerNorm/batchnorm/sub" -> "1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "1394 bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1" -> "1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1398 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1402 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1438 bert/encoder/layer_11/attention/output/add" [label="[]", style=solid]; -"1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" -> "1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" -> "1418 bert/encoder/layer_11/attention/self/key/MatMul" [label="[]", style=solid]; -"1398 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" -> "1399 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"1399 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" -> "1410 bert/encoder/layer_11/attention/self/query/MatMul" [label="[]", style=solid]; -"1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" -> "1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" -> "1404 bert/encoder/layer_11/attention/self/value/MatMul" [label="[]", style=solid]; -"1402 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" -> "1403 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"1403 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" -> "1404 bert/encoder/layer_11/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" -> "1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" -> "1404 bert/encoder/layer_11/attention/self/value/MatMul" [label="[]", style=solid]; +"1398 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" -> "1399 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"1399 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" -> "1404 bert/encoder/layer_11/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" -> "1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" -> "1410 bert/encoder/layer_11/attention/self/query/MatMul" [label="[]", style=solid]; +"1402 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" -> "1403 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"1403 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" -> "1418 bert/encoder/layer_11/attention/self/key/MatMul" [label="[]", style=solid]; "1404 bert/encoder/layer_11/attention/self/value/MatMul" -> "1405 bert/encoder/layer_11/attention/self/value/BiasAdd" [label="[]", style=solid]; "1405 bert/encoder/layer_11/attention/self/value/BiasAdd" -> "1406 bert/encoder/layer_11/attention/self/Reshape_2" [label="[]", style=solid]; "1406 bert/encoder/layer_11/attention/self/Reshape_2" -> "1407 bert/encoder/layer_11/attention/self/transpose_2" [label="[]", style=solid]; diff --git a/tests/onnx/data/reference_graphs/quantization/retinanet-9.dot b/tests/onnx/data/reference_graphs/quantization/retinanet-9.dot index 7aa64281d32..9d2f66780d5 100644 --- a/tests/onnx/data/reference_graphs/quantization/retinanet-9.dot +++ b/tests/onnx/data/reference_graphs/quantization/retinanet-9.dot @@ -855,10 +855,10 @@ strict digraph { "853 QuantizeLinear_backbones.ResNet101FPN.pyramid6.weight_1" [id=853, type=QuantizeLinear]; "854 DequantizeLinear_backbones.ResNet101FPN.pyramid6.weight_1" [id=854, type=DequantizeLinear]; "855 Conv_349" [id=855, type=Conv]; -"856 QuantizeLinear_1028_2" [id=856, type=QuantizeLinear]; -"857 DequantizeLinear_1028_2" [id=857, type=DequantizeLinear]; -"858 QuantizeLinear_1028_1" [id=858, type=QuantizeLinear]; -"859 DequantizeLinear_1028_1" [id=859, type=DequantizeLinear]; +"856 QuantizeLinear_1028_1" [id=856, type=QuantizeLinear]; +"857 DequantizeLinear_1028_1" [id=857, type=DequantizeLinear]; +"858 QuantizeLinear_1028_2" [id=858, type=QuantizeLinear]; +"859 DequantizeLinear_1028_2" [id=859, type=DequantizeLinear]; "860 Relu_350" [id=860, type=Relu]; "861 QuantizeLinear_1029_1" [id=861, type=QuantizeLinear]; "862 DequantizeLinear_1029_1" [id=862, type=DequantizeLinear]; @@ -1983,13 +1983,13 @@ strict digraph { "852 Add_348" -> "866 QuantizeLinear_1027_1" [label="[1, 256, 60, 80]", style=solid]; "853 QuantizeLinear_backbones.ResNet101FPN.pyramid6.weight_1" -> "854 DequantizeLinear_backbones.ResNet101FPN.pyramid6.weight_1" [label="[256, 2048, 3, 3]", style=dashed]; "854 DequantizeLinear_backbones.ResNet101FPN.pyramid6.weight_1" -> "855 Conv_349" [label="[256, 2048, 3, 3]", style=solid]; -"855 Conv_349" -> "856 QuantizeLinear_1028_2" [label="[1, 256, 8, 10]", style=solid]; -"855 Conv_349" -> "858 QuantizeLinear_1028_1" [label="[1, 256, 8, 10]", style=solid]; +"855 Conv_349" -> "856 QuantizeLinear_1028_1" [label="[1, 256, 8, 10]", style=solid]; +"855 Conv_349" -> "858 QuantizeLinear_1028_2" [label="[1, 256, 8, 10]", style=solid]; "855 Conv_349" -> "860 Relu_350" [label="[1, 256, 8, 10]", style=solid]; -"856 QuantizeLinear_1028_2" -> "857 DequantizeLinear_1028_2" [label="[1, 256, 8, 10]", style=dashed]; -"857 DequantizeLinear_1028_2" -> "1041 Conv_427" [label="[1, 256, 8, 10]", style=solid]; -"858 QuantizeLinear_1028_1" -> "859 DequantizeLinear_1028_1" [label="[1, 256, 8, 10]", style=dashed]; -"859 DequantizeLinear_1028_1" -> "944 Conv_382" [label="[1, 256, 8, 10]", style=solid]; +"856 QuantizeLinear_1028_1" -> "857 DequantizeLinear_1028_1" [label="[1, 256, 8, 10]", style=dashed]; +"857 DequantizeLinear_1028_1" -> "944 Conv_382" [label="[1, 256, 8, 10]", style=solid]; +"858 QuantizeLinear_1028_2" -> "859 DequantizeLinear_1028_2" [label="[1, 256, 8, 10]", style=dashed]; +"859 DequantizeLinear_1028_2" -> "1041 Conv_427" [label="[1, 256, 8, 10]", style=solid]; "860 Relu_350" -> "861 QuantizeLinear_1029_1" [label="[1, 256, 8, 10]", style=solid]; "861 QuantizeLinear_1029_1" -> "862 DequantizeLinear_1029_1" [label="[1, 256, 8, 10]", style=dashed]; "862 DequantizeLinear_1029_1" -> "865 Conv_351" [label="[1, 256, 8, 10]", style=solid]; From 1a6cb0c404e02711d0d303c80d76cb7d05fdf001 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 4 Oct 2023 11:47:35 +0200 Subject: [PATCH 12/21] style improvement --- nncf/onnx/graph/metatypes/onnx_metatypes.py | 17 +++--- nncf/onnx/graph/model_transformer.py | 24 +++++--- nncf/onnx/graph/nncf_graph_builder.py | 68 +++++++++++++-------- nncf/onnx/graph/onnx_helper.py | 63 ++++++++++++------- tests/onnx/quantization/common.py | 6 +- 5 files changed, 111 insertions(+), 67 deletions(-) diff --git a/nncf/onnx/graph/metatypes/onnx_metatypes.py b/nncf/onnx/graph/metatypes/onnx_metatypes.py index f1d2fd8f6da..69a72e6947b 100644 --- a/nncf/onnx/graph/metatypes/onnx_metatypes.py +++ b/nncf/onnx/graph/metatypes/onnx_metatypes.py @@ -9,14 +9,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List, Optional, Type +from typing import Dict, List, Optional, Tuple, Type import onnx from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.operator_metatypes import OperatorMetatypeRegistry from nncf.common.hardware.opset import HWConfigOpName -from nncf.onnx.graph.onnx_helper import get_output_edge_node_mapping +from nncf.onnx.graph.onnx_helper import get_edge_node_mapping from nncf.onnx.graph.onnx_helper import get_parent from nncf.onnx.graph.onnx_helper import get_tensor from nncf.onnx.graph.onnx_helper import has_tensor @@ -646,7 +646,10 @@ def get_metatype(model: onnx.ModelProto, node: onnx.NodeProto) -> ONNXOpMetatype def get_tensor_edge_name( - model: onnx.ModelProto, node: onnx.NodeProto, port_id: int, output_edge_node_mapping + model: onnx.ModelProto, + node: onnx.NodeProto, + port_id: int, + edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], ) -> Optional[str]: """ Returns an edge name associated with a weight of a node laying on an input port_id. @@ -677,14 +680,14 @@ def get_tensor_edge_name( + ONNXDequantizeLinearMetatype.get_all_aliases() ) END_NODES = ONNXConstantMetatype.get_all_aliases() - parent = get_parent(node, port_id, output_edge_node_mapping) + parent = get_parent(node, port_id, edge_node_mapping) if not parent: if has_tensor(model, node.input[port_id]): return node.input[port_id] elif parent.op_type in END_NODES: return node.input[port_id] elif parent.op_type in PROPAGATING_NODES: - return get_tensor_edge_name(model, parent, 0, output_edge_node_mapping) + return get_tensor_edge_name(model, parent, 0, edge_node_mapping) return None @@ -734,8 +737,8 @@ def _is_embedding(model: onnx.ModelProto, node: onnx.NodeProto) -> bool: """ tensor_port_id = ONNXEmbeddingMetatype.weight_port_ids[0] allowed_types_list = ["TensorProto.FLOAT"] - output_edge_node_mapping = get_output_edge_node_mapping(model) - weight_edge_name = get_tensor_edge_name(model, node, tensor_port_id, output_edge_node_mapping) + edge_node_mapping = get_edge_node_mapping(model) + weight_edge_name = get_tensor_edge_name(model, node, tensor_port_id, edge_node_mapping) if weight_edge_name is not None: tensor_data_type = get_tensor(model, weight_edge_name).data_type diff --git a/nncf/onnx/graph/model_transformer.py b/nncf/onnx/graph/model_transformer.py index 2cda5be9591..d06824ad1ca 100644 --- a/nncf/onnx/graph/model_transformer.py +++ b/nncf/onnx/graph/model_transformer.py @@ -21,8 +21,8 @@ from nncf.onnx.graph.node_utils import get_input_edge from nncf.onnx.graph.onnx_helper import get_children from nncf.onnx.graph.onnx_helper import get_edge_dtype -from nncf.onnx.graph.onnx_helper import get_edge_mapping -from nncf.onnx.graph.onnx_helper import get_input_edge_node_mapping +from nncf.onnx.graph.onnx_helper import get_edge_info_mapping +from nncf.onnx.graph.onnx_helper import get_edge_node_mapping from nncf.onnx.graph.onnx_helper import get_node_index from nncf.onnx.graph.onnx_helper import get_node_mapping from nncf.onnx.graph.onnx_helper import get_tensor @@ -153,9 +153,9 @@ def _insert_outputs(model: onnx.ModelProto, outputs: Union[List[str], Set[str]]) :return: New model with inserted outputs. """ model_outputs = [] - edge_mapping = get_edge_mapping(model) + edge_info_mapping = get_edge_info_mapping(model) for output in outputs: - edge = edge_mapping[output] + edge = edge_info_mapping[output] onnx_dtype = get_edge_dtype(edge) type_proto = onnx.helper.make_tensor_type_proto(onnx_dtype, shape=None) model_outputs.append(onnx.helper.make_value_info(name=output, type_proto=type_proto)) @@ -198,9 +198,9 @@ def _apply_quantizer_insertion_transformations( :return: New model with inserted QuantizeLinear-DequantizeLinear nodes pairs. """ self._added_target_edges = Counter() - input_edge_node_mapping = get_input_edge_node_mapping(model) # TODO: maybe update after transformation for transformation in transformations: - model = self._insert_quantizer_dequantizer(model, transformation, input_edge_node_mapping) + edge_node_mapping = get_edge_node_mapping(model) + model = self._insert_quantizer_dequantizer(model, transformation, edge_node_mapping) return model def _get_quantize_dequantize_nodes( @@ -299,13 +299,17 @@ def _get_quantizer_dequantizer_edge_name( return target_edge_name def _insert_quantizer_dequantizer( - self, model: onnx.ModelProto, transformation: ONNXQuantizerInsertionCommand, input_edge_node_mapping + self, + model: onnx.ModelProto, + transformation: ONNXQuantizerInsertionCommand, + edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], ) -> onnx.ModelProto: """ Inserts QuantizeLinear-DequantizeLinear nodes pair. :param model: Model to insert new nodes. :param transformation: QuantizeLinear-DequantizeLinear insertion transformation. + :param edge_node_mapping: Mapping describing start and consumed nodes of the edges. :return: Updated model with inserted QuantizeLinear-DequantizeLinear pair. """ node_mapping = get_node_mapping(model) @@ -317,7 +321,7 @@ def _insert_quantizer_dequantizer( # If several nodes on one edge input_nodes = [] - input_nodes.extend(input_edge_node_mapping[target_edge_name]) + input_nodes.extend(edge_node_mapping[target_edge_name][1]) if not input_nodes: raise RuntimeError( f"Can not add the quantizer to the {target_edge_name} edge. This edge does not have end node." @@ -403,12 +407,12 @@ def _apply_qdq_node_removing_transformations( :param transformations: Nodes removing transformations. :return: Model with removed nodes. """ - input_edge_node_mapping = get_input_edge_node_mapping(model) for transformation in transformations: node_mapping = get_node_mapping(model) + edge_node_mapping = get_edge_node_mapping(model) node = node_mapping[transformation.target_point.target_node_name] - node_children = get_children(node, input_edge_node_mapping) + node_children = get_children(node, edge_node_mapping) for node_child in node_children: for input_id, input_obj in enumerate(node_child.input): if input_obj == node.output[0]: diff --git a/nncf/onnx/graph/nncf_graph_builder.py b/nncf/onnx/graph/nncf_graph_builder.py index 94066583dd4..856c5d562b9 100644 --- a/nncf/onnx/graph/nncf_graph_builder.py +++ b/nncf/onnx/graph/nncf_graph_builder.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from collections import Counter -from typing import Any, Dict, List, Optional, Set +from typing import Any, Dict, List, Optional, Set, Tuple import onnx @@ -30,12 +30,11 @@ from nncf.onnx.graph.metatypes.onnx_metatypes import get_metatype from nncf.onnx.graph.metatypes.onnx_metatypes import get_tensor_edge_name from nncf.onnx.graph.onnx_helper import get_edge_dtype -from nncf.onnx.graph.onnx_helper import get_edge_mapping +from nncf.onnx.graph.onnx_helper import get_edge_info_mapping +from nncf.onnx.graph.onnx_helper import get_edge_node_mapping from nncf.onnx.graph.onnx_helper import get_edge_shape -from nncf.onnx.graph.onnx_helper import get_input_edge_node_mapping from nncf.onnx.graph.onnx_helper import get_input_port_id_for_node_after_input from nncf.onnx.graph.onnx_helper import get_model_inputs -from nncf.onnx.graph.onnx_helper import get_output_edge_node_mapping from nncf.onnx.graph.onnx_helper import get_output_port_id_for_node_before_output from nncf.onnx.graph.onnx_helper import get_port_ids_between_nodes from nncf.onnx.graph.onnx_helper import is_node_has_shared_weight @@ -112,7 +111,11 @@ def get_bias_tensor_port_id(metatype: ONNXOpWithWeightsMetatype) -> Optional[int return None -def _get_weight_port_ids(node: onnx.NodeProto, model: onnx.ModelProto, output_edge_node_mapping) -> Set[int]: +def _get_weight_port_ids( + node: onnx.NodeProto, + model: onnx.ModelProto, + edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], +) -> Set[int]: """ Returns all weight input ports. First, add constant weight port ids from metatype. @@ -120,6 +123,7 @@ def _get_weight_port_ids(node: onnx.NodeProto, model: onnx.ModelProto, output_ed :param node: ONNX node. :param model: ONNX model. + :param edge_node_mapping: Mapping describing start and consumed nodes of the edges. :return: Port ids with weights. """ port_ids = set() @@ -128,7 +132,7 @@ def _get_weight_port_ids(node: onnx.NodeProto, model: onnx.ModelProto, output_ed port_ids.update(constant_port_ids) possible_port_ids = get_possible_weight_port_ids(metatype) for port_id in possible_port_ids: - if get_tensor_edge_name(model, node, port_id, output_edge_node_mapping): + if get_tensor_edge_name(model, node, port_id, edge_node_mapping): port_ids.add(port_id) return port_ids @@ -177,19 +181,24 @@ def _get_node_attrs(node: onnx.NodeProto, model: onnx.ModelProto) -> Dict[str, A return {} -def _get_bias_attr(node: onnx.NodeProto, model: onnx.ModelProto, output_edge_node_mapping) -> Dict[str, str]: +def _get_bias_attr( + node: onnx.NodeProto, + model: onnx.ModelProto, + edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], +) -> Dict[str, str]: """ Returns bias tensor attributes. :param node: ONNX node. :param model: ONNX model. + :param edge_node_mapping: Mapping describing start and consumed nodes of the edges. :return: Bias tensor attributes. """ bias_attrs = {} metatype = get_metatype(model, node) if _is_node_with_bias(node, model): bias_tensor_port_id = get_bias_tensor_port_id(metatype) - bias_edge_name = get_tensor_edge_name(model, node, bias_tensor_port_id, output_edge_node_mapping) + bias_edge_name = get_tensor_edge_name(model, node, bias_tensor_port_id, edge_node_mapping) bias_attrs["name"] = bias_edge_name return bias_attrs @@ -225,13 +234,18 @@ def _replace_empty_node_name(model: onnx.ModelProto) -> onnx.ModelProto: @staticmethod def _add_nncf_input_nodes( - model: onnx.ModelProto, nncf_graph: NNCFGraph, edge_mapping, input_edge_node_mapping + model: onnx.ModelProto, + nncf_graph: NNCFGraph, + edge_info_mapping: Dict[str, onnx.ValueInfoProto], + edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], ) -> None: """ Adds special NNCF Input nodes to NNCFGraph. For all the ONNX model inputs, the special NNCF Input node is placed and then corresponding edges are added. :param model: ONNX model. :param nncf_graph: NNCFGraph, in which the new nodes will be added. + :param edge_info_mapping: Mapping from edge name to the edge info. + :param edge_node_mapping: Mapping describing start and consumed nodes of the edges. :return: None. """ for i, _input in enumerate(get_model_inputs(model)): @@ -243,10 +257,10 @@ def _add_nncf_input_nodes( node_metatype=InputNoopMetatype, layer_attributes=layer_attributes, ) - to_nodes = input_edge_node_mapping[input_name] + to_nodes = edge_node_mapping[input_name][1] input_node_node_id = input_node.node_id - edge = edge_mapping[input_name] + edge = edge_info_mapping[input_name] input_shape = get_edge_shape(edge) onnx_dtype = get_edge_dtype(edge) nncf_dtype = GraphConverter.convert_onnx_dtype_to_nncf_dtype(onnx_dtype) @@ -267,13 +281,18 @@ def _add_nncf_input_nodes( @staticmethod def _add_nncf_output_nodes( - model: onnx.ModelProto, nncf_graph: NNCFGraph, edge_mapping, output_edge_node_mapping + model: onnx.ModelProto, + nncf_graph: NNCFGraph, + edge_info_mapping: Dict[str, onnx.ValueInfoProto], + edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], ) -> None: """ Adds special NNCF Output nodes to NNCFGraph. For all the ONNX model outputs, the special NNCF Output node is placed and then corresponding edges are added. :param model: ONNX model. :param nncf_graph: NNCFGraph, in which the new nodes will be added. + :param edge_info_mapping: Mapping from edge name to the edge info. + :param edge_node_mapping: Mapping describing start and consumed nodes of the edges. :return: None. """ for i, _output in enumerate(model.graph.output): @@ -285,10 +304,10 @@ def _add_nncf_output_nodes( node_metatype=OutputNoopMetatype, layer_attributes=layer_attributes, ) - from_node = output_edge_node_mapping[output_name] + from_node = edge_node_mapping[output_name][0] output_node_node_id = output_node.node_id - edge = edge_mapping[output_name] + edge = edge_info_mapping[output_name] output_shape = get_edge_shape(edge) onnx_dtype = get_edge_dtype(edge) nncf_dtype = GraphConverter.convert_onnx_dtype_to_nncf_dtype(onnx_dtype) @@ -327,26 +346,25 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: """ onnx_model = GraphConverter._replace_empty_node_name(onnx_model) onnx_model = onnx.shape_inference.infer_shapes(onnx_model) - edge_mapping = get_edge_mapping(onnx_model) - input_edge_node_mapping = get_input_edge_node_mapping(onnx_model) - output_edge_node_mapping = get_output_edge_node_mapping(onnx_model) + edge_info_mapping = get_edge_info_mapping(onnx_model) + edge_node_mapping = get_edge_node_mapping(onnx_model) nncf_graph = NNCFGraph() for node in onnx_model.graph.node: metatype = get_metatype(onnx_model, node) - weight_port_ids = _get_weight_port_ids(node, onnx_model, output_edge_node_mapping) + weight_port_ids = _get_weight_port_ids(node, metatype, edge_node_mapping) is_shared = None weight_attrs = {} node_attrs = _get_node_attrs(node, onnx_model) - bias_attrs = _get_bias_attr(node, onnx_model, output_edge_node_mapping) + bias_attrs = _get_bias_attr(node, onnx_model, edge_node_mapping) if weight_port_ids: # If node has weight weight_edge_names = [] for weight_port_id in weight_port_ids: weight_edge_name = node.input[weight_port_id] weight_edge_names.append(weight_edge_name) - edge = edge_mapping[weight_edge_name] + edge = edge_info_mapping[weight_edge_name] weight_shape = get_edge_shape(edge) weight_attrs[weight_port_id] = {"name": weight_edge_name, "shape": weight_shape} - if not is_shared and is_node_has_shared_weight(node, weight_port_id, input_edge_node_mapping): + if not is_shared and is_node_has_shared_weight(node, weight_port_id, edge_node_mapping): is_shared = True layer_attributes = ONNXLayerAttributes( @@ -363,7 +381,7 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: for output_node in onnx_model.graph.node: output_edges = output_node.output for output_edge in output_edges: - edge = edge_mapping.get(output_edge) + edge = edge_info_mapping.get(output_edge) if edge is None: # If the edge is None it means that the edge was not added during shape inference of ONNX model. # BatchNorm exported in Training mode has unused outputs edges: mean, var, saved_mean, saved_var. @@ -373,7 +391,7 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: onnx_dtype = get_edge_dtype(edge) nncf_dtype = GraphConverter.convert_onnx_dtype_to_nncf_dtype(onnx_dtype) output_node_id = nncf_graph.get_node_by_name(output_node.name).node_id - input_nodes = input_edge_node_mapping[output_edge] + input_nodes = edge_node_mapping[output_edge][1] for input_node in input_nodes: port_ids = get_port_ids_between_nodes(output_node, input_node) input_port_id = port_ids["input_port_id"] @@ -388,6 +406,6 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: dtype=Dtype(nncf_dtype), ) - GraphConverter._add_nncf_input_nodes(onnx_model, nncf_graph, edge_mapping, input_edge_node_mapping) - GraphConverter._add_nncf_output_nodes(onnx_model, nncf_graph, edge_mapping, output_edge_node_mapping) + GraphConverter._add_nncf_input_nodes(onnx_model, nncf_graph, edge_info_mapping, edge_node_mapping) + GraphConverter._add_nncf_output_nodes(onnx_model, nncf_graph, edge_info_mapping, edge_node_mapping) return nncf_graph diff --git a/nncf/onnx/graph/onnx_helper.py b/nncf/onnx/graph/onnx_helper.py index ce0ed7a7aa5..d2362b0ff4b 100644 --- a/nncf/onnx/graph/onnx_helper.py +++ b/nncf/onnx/graph/onnx_helper.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from collections import defaultdict -from typing import Dict, Iterator, List, Optional, Union +from typing import Dict, Iterator, List, Optional, Tuple, Union import numpy as np import onnx @@ -26,7 +26,7 @@ def get_node_mapping(model: onnx.ModelProto) -> Dict[str, onnx.NodeProto]: return {node.name: node for node in model.graph.node} -def get_edge_mapping(model: onnx.ModelProto) -> Dict[str, onnx.ValueInfoProto]: +def get_edge_info_mapping(model: onnx.ModelProto) -> Dict[str, onnx.ValueInfoProto]: """ Retuns mapping from edge name to the edge info. @@ -39,19 +39,27 @@ def get_edge_mapping(model: onnx.ModelProto) -> Dict[str, onnx.ValueInfoProto]: } -def get_input_edge_node_mapping(model: onnx.ModelProto) -> Dict[str, List[onnx.ValueInfoProto]]: - output = defaultdict(list) - for node in model.graph.node: - for input_edge in node.input: - output[input_edge].append(node) - return output - +def get_edge_node_mapping(model: onnx.ModelProto) -> Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]]: + """ + Returns mapping describing start and consumed nodes of the edges. + The mapping key is an edge name, while value is a tuple, + containig on 0-index a node from which the edge is started and 1-index containing nodes which consumes this edge. + If None on 0-index means that there is no start node (e.g. model input edge). + If None on 1-index means that there are no consuming nodes. -def get_output_edge_node_mapping(model: onnx.ModelProto) -> Dict[str, onnx.ValueInfoProto]: - output = defaultdict(list) + :param model: ONNX model from which the mapping is built. + :return: Mapping between edge name and a tuple of from node and to nodes. + """ + output = {} for node in model.graph.node: - for input_edge in node.output: - output[input_edge] = node + for input_edge in node.input: + if input_edge not in output: + output[input_edge] = [None, []] + output[input_edge][1].append(node) # To node + for output_edge in node.output: + if output_edge not in output: + output[output_edge] = [None, []] + output[output_edge][0] = node # From node return output @@ -228,42 +236,53 @@ def get_edge_dtype(edge: Union[onnx.ValueInfoProto, onnx.TensorProto]) -> int: return edge.data_type -def get_parent(node: onnx.NodeProto, port_id: int, output_edge_node_mapping) -> Optional[onnx.NodeProto]: +def get_parent( + node: onnx.NodeProto, + port_id: int, + edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], +) -> Optional[onnx.NodeProto]: """ Returns parents of the node. If there is no parent node, returns None. - :param model: ONNX model. :param node: The child node. :param port_id: Input port id on which the parent is seeked. + :param edge_node_mapping: Mapping describing start and consumed nodes of the edges. :return: Parent node. """ if port_id < len(node.input): - return output_edge_node_mapping[node.input[port_id]] + return edge_node_mapping[node.input[port_id]][0] return None -def get_children(node: onnx.NodeProto, input_edge_node_mapping) -> List[onnx.NodeProto]: +def get_children( + node: onnx.NodeProto, edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]] +) -> List[onnx.NodeProto]: """ Returns children of the node. - :param model: ONNX model. :param node: The parent node. + :param edge_node_mapping: Mapping describing start and consumed nodes of the edges. :return: All children nodes. """ output = [] for node_edge in node.output: - output.extend(input_edge_node_mapping[node_edge]) + output.extend(edge_node_mapping[node_edge][1]) return output -def is_node_has_shared_weight(node: onnx.NodeProto, weight_port_id: int, input_edge_node_mapping) -> bool: +def is_node_has_shared_weight( + node: onnx.NodeProto, + weight_port_id: int, + edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], +) -> bool: """ Returns whether the node share a weight. - :param model: ONNX model. :param node: Node. + :param weight_port_id: Port id on which there is a weight. + :param edge_node_mapping: Mapping describing start and consumed nodes of the edges. :return: True whether node shares a weight - otherwise False. """ weight_tensor_edge = node.input[weight_port_id] - nodes = input_edge_node_mapping[weight_tensor_edge] + nodes = edge_node_mapping[weight_tensor_edge][1] return len(nodes) > 1 diff --git a/tests/onnx/quantization/common.py b/tests/onnx/quantization/common.py index 06bbc5ee34d..66880e627bf 100644 --- a/tests/onnx/quantization/common.py +++ b/tests/onnx/quantization/common.py @@ -18,7 +18,7 @@ from nncf import Dataset from nncf.onnx.graph.nncf_graph_builder import GraphConverter from nncf.onnx.graph.onnx_helper import get_edge_dtype -from nncf.onnx.graph.onnx_helper import get_edge_mapping +from nncf.onnx.graph.onnx_helper import get_edge_info_mapping from nncf.onnx.graph.onnx_helper import get_edge_shape from nncf.onnx.statistics.statistics import ONNXMinMaxTensorStatistic from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters @@ -55,12 +55,12 @@ def _get_input_keys(original_model: onnx.ModelProto) -> str: def get_random_dataset_for_test(model: onnx.ModelProto, has_batch_dim: bool, length: Optional[int] = 10): keys = _get_input_keys(model) - edge_mapping = get_edge_mapping(model) + edge_info_mapping = get_edge_info_mapping(model) def transform_fn(i): output = {} for key in keys: - edge = edge_mapping[key] + edge = edge_info_mapping[key] input_dtype = get_edge_dtype(edge) input_np_dtype = onnx.helper.tensor_dtype_to_np_dtype(input_dtype) shape = get_edge_shape(edge) From b4ba1b84e33eee8e777f0cd9bb4e526fb5dfe7fb Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 4 Oct 2023 13:03:06 +0200 Subject: [PATCH 13/21] typo --- nncf/onnx/graph/nncf_graph_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nncf/onnx/graph/nncf_graph_builder.py b/nncf/onnx/graph/nncf_graph_builder.py index 856c5d562b9..7a381e461f6 100644 --- a/nncf/onnx/graph/nncf_graph_builder.py +++ b/nncf/onnx/graph/nncf_graph_builder.py @@ -351,7 +351,7 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: nncf_graph = NNCFGraph() for node in onnx_model.graph.node: metatype = get_metatype(onnx_model, node) - weight_port_ids = _get_weight_port_ids(node, metatype, edge_node_mapping) + weight_port_ids = _get_weight_port_ids(node, onnx_model, edge_node_mapping) is_shared = None weight_attrs = {} node_attrs = _get_node_attrs(node, onnx_model) From 2f87ebbe126621449f5cf693f9ea83d2ceab1785 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 4 Oct 2023 14:21:34 +0200 Subject: [PATCH 14/21] split edge_node_mapping into children_node_mapping and parents_node_mapping --- nncf/onnx/graph/metatypes/onnx_metatypes.py | 12 +++--- nncf/onnx/graph/model_transformer.py | 15 ++++---- nncf/onnx/graph/nncf_graph_builder.py | 34 +++++++++-------- nncf/onnx/graph/onnx_helper.py | 42 +++++++++------------ 4 files changed, 49 insertions(+), 54 deletions(-) diff --git a/nncf/onnx/graph/metatypes/onnx_metatypes.py b/nncf/onnx/graph/metatypes/onnx_metatypes.py index 69a72e6947b..48aabc87a85 100644 --- a/nncf/onnx/graph/metatypes/onnx_metatypes.py +++ b/nncf/onnx/graph/metatypes/onnx_metatypes.py @@ -16,8 +16,8 @@ from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.operator_metatypes import OperatorMetatypeRegistry from nncf.common.hardware.opset import HWConfigOpName -from nncf.onnx.graph.onnx_helper import get_edge_node_mapping from nncf.onnx.graph.onnx_helper import get_parent +from nncf.onnx.graph.onnx_helper import get_parents_node_mapping from nncf.onnx.graph.onnx_helper import get_tensor from nncf.onnx.graph.onnx_helper import has_tensor @@ -649,7 +649,7 @@ def get_tensor_edge_name( model: onnx.ModelProto, node: onnx.NodeProto, port_id: int, - edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], + parents_node_mapping: Dict[str, List[onnx.ValueInfoProto]], ) -> Optional[str]: """ Returns an edge name associated with a weight of a node laying on an input port_id. @@ -680,14 +680,14 @@ def get_tensor_edge_name( + ONNXDequantizeLinearMetatype.get_all_aliases() ) END_NODES = ONNXConstantMetatype.get_all_aliases() - parent = get_parent(node, port_id, edge_node_mapping) + parent = get_parent(node, port_id, parents_node_mapping) if not parent: if has_tensor(model, node.input[port_id]): return node.input[port_id] elif parent.op_type in END_NODES: return node.input[port_id] elif parent.op_type in PROPAGATING_NODES: - return get_tensor_edge_name(model, parent, 0, edge_node_mapping) + return get_tensor_edge_name(model, parent, 0, parents_node_mapping) return None @@ -737,8 +737,8 @@ def _is_embedding(model: onnx.ModelProto, node: onnx.NodeProto) -> bool: """ tensor_port_id = ONNXEmbeddingMetatype.weight_port_ids[0] allowed_types_list = ["TensorProto.FLOAT"] - edge_node_mapping = get_edge_node_mapping(model) - weight_edge_name = get_tensor_edge_name(model, node, tensor_port_id, edge_node_mapping) + parents_node_mapping = get_parents_node_mapping(model) + weight_edge_name = get_tensor_edge_name(model, node, tensor_port_id, parents_node_mapping) if weight_edge_name is not None: tensor_data_type = get_tensor(model, weight_edge_name).data_type diff --git a/nncf/onnx/graph/model_transformer.py b/nncf/onnx/graph/model_transformer.py index d06824ad1ca..91da8af6eec 100644 --- a/nncf/onnx/graph/model_transformer.py +++ b/nncf/onnx/graph/model_transformer.py @@ -20,11 +20,12 @@ from nncf.common.graph.transformations.layout import TransformationLayout from nncf.onnx.graph.node_utils import get_input_edge from nncf.onnx.graph.onnx_helper import get_children +from nncf.onnx.graph.onnx_helper import get_children_node_mapping from nncf.onnx.graph.onnx_helper import get_edge_dtype from nncf.onnx.graph.onnx_helper import get_edge_info_mapping -from nncf.onnx.graph.onnx_helper import get_edge_node_mapping from nncf.onnx.graph.onnx_helper import get_node_index from nncf.onnx.graph.onnx_helper import get_node_mapping +from nncf.onnx.graph.onnx_helper import get_parents_node_mapping from nncf.onnx.graph.onnx_helper import get_tensor from nncf.onnx.graph.transformations.commands import ONNXBiasCorrectionCommand from nncf.onnx.graph.transformations.commands import ONNXModelExtractionCommand @@ -199,8 +200,8 @@ def _apply_quantizer_insertion_transformations( """ self._added_target_edges = Counter() for transformation in transformations: - edge_node_mapping = get_edge_node_mapping(model) - model = self._insert_quantizer_dequantizer(model, transformation, edge_node_mapping) + children_node_mapping = get_children_node_mapping(model) + model = self._insert_quantizer_dequantizer(model, transformation, children_node_mapping) return model def _get_quantize_dequantize_nodes( @@ -302,7 +303,7 @@ def _insert_quantizer_dequantizer( self, model: onnx.ModelProto, transformation: ONNXQuantizerInsertionCommand, - edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], + children_node_mapping: Dict[str, List[onnx.ValueInfoProto]], ) -> onnx.ModelProto: """ Inserts QuantizeLinear-DequantizeLinear nodes pair. @@ -321,7 +322,7 @@ def _insert_quantizer_dequantizer( # If several nodes on one edge input_nodes = [] - input_nodes.extend(edge_node_mapping[target_edge_name][1]) + input_nodes.extend(children_node_mapping[target_edge_name]) if not input_nodes: raise RuntimeError( f"Can not add the quantizer to the {target_edge_name} edge. This edge does not have end node." @@ -409,10 +410,10 @@ def _apply_qdq_node_removing_transformations( """ for transformation in transformations: node_mapping = get_node_mapping(model) - edge_node_mapping = get_edge_node_mapping(model) + children_node_mapping = get_children_node_mapping(model) node = node_mapping[transformation.target_point.target_node_name] - node_children = get_children(node, edge_node_mapping) + node_children = get_children(node, children_node_mapping) for node_child in node_children: for input_id, input_obj in enumerate(node_child.input): if input_obj == node.output[0]: diff --git a/nncf/onnx/graph/nncf_graph_builder.py b/nncf/onnx/graph/nncf_graph_builder.py index 7a381e461f6..aa7f9e09201 100644 --- a/nncf/onnx/graph/nncf_graph_builder.py +++ b/nncf/onnx/graph/nncf_graph_builder.py @@ -29,13 +29,14 @@ from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXOpWithWeightsMetatype from nncf.onnx.graph.metatypes.onnx_metatypes import get_metatype from nncf.onnx.graph.metatypes.onnx_metatypes import get_tensor_edge_name +from nncf.onnx.graph.onnx_helper import get_children_node_mapping from nncf.onnx.graph.onnx_helper import get_edge_dtype from nncf.onnx.graph.onnx_helper import get_edge_info_mapping -from nncf.onnx.graph.onnx_helper import get_edge_node_mapping from nncf.onnx.graph.onnx_helper import get_edge_shape from nncf.onnx.graph.onnx_helper import get_input_port_id_for_node_after_input from nncf.onnx.graph.onnx_helper import get_model_inputs from nncf.onnx.graph.onnx_helper import get_output_port_id_for_node_before_output +from nncf.onnx.graph.onnx_helper import get_parents_node_mapping from nncf.onnx.graph.onnx_helper import get_port_ids_between_nodes from nncf.onnx.graph.onnx_helper import is_node_has_shared_weight @@ -114,7 +115,7 @@ def get_bias_tensor_port_id(metatype: ONNXOpWithWeightsMetatype) -> Optional[int def _get_weight_port_ids( node: onnx.NodeProto, model: onnx.ModelProto, - edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], + parents_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], ) -> Set[int]: """ Returns all weight input ports. @@ -132,7 +133,7 @@ def _get_weight_port_ids( port_ids.update(constant_port_ids) possible_port_ids = get_possible_weight_port_ids(metatype) for port_id in possible_port_ids: - if get_tensor_edge_name(model, node, port_id, edge_node_mapping): + if get_tensor_edge_name(model, node, port_id, parents_node_mapping): port_ids.add(port_id) return port_ids @@ -184,7 +185,7 @@ def _get_node_attrs(node: onnx.NodeProto, model: onnx.ModelProto) -> Dict[str, A def _get_bias_attr( node: onnx.NodeProto, model: onnx.ModelProto, - edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], + parents_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], ) -> Dict[str, str]: """ Returns bias tensor attributes. @@ -198,7 +199,7 @@ def _get_bias_attr( metatype = get_metatype(model, node) if _is_node_with_bias(node, model): bias_tensor_port_id = get_bias_tensor_port_id(metatype) - bias_edge_name = get_tensor_edge_name(model, node, bias_tensor_port_id, edge_node_mapping) + bias_edge_name = get_tensor_edge_name(model, node, bias_tensor_port_id, parents_node_mapping) bias_attrs["name"] = bias_edge_name return bias_attrs @@ -237,7 +238,7 @@ def _add_nncf_input_nodes( model: onnx.ModelProto, nncf_graph: NNCFGraph, edge_info_mapping: Dict[str, onnx.ValueInfoProto], - edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], + children_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], ) -> None: """ Adds special NNCF Input nodes to NNCFGraph. @@ -257,7 +258,7 @@ def _add_nncf_input_nodes( node_metatype=InputNoopMetatype, layer_attributes=layer_attributes, ) - to_nodes = edge_node_mapping[input_name][1] + to_nodes = children_node_mapping[input_name] input_node_node_id = input_node.node_id edge = edge_info_mapping[input_name] @@ -284,7 +285,7 @@ def _add_nncf_output_nodes( model: onnx.ModelProto, nncf_graph: NNCFGraph, edge_info_mapping: Dict[str, onnx.ValueInfoProto], - edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], + parents_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], ) -> None: """ Adds special NNCF Output nodes to NNCFGraph. @@ -304,7 +305,7 @@ def _add_nncf_output_nodes( node_metatype=OutputNoopMetatype, layer_attributes=layer_attributes, ) - from_node = edge_node_mapping[output_name][0] + from_node = parents_node_mapping[output_name] output_node_node_id = output_node.node_id edge = edge_info_mapping[output_name] @@ -347,15 +348,16 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: onnx_model = GraphConverter._replace_empty_node_name(onnx_model) onnx_model = onnx.shape_inference.infer_shapes(onnx_model) edge_info_mapping = get_edge_info_mapping(onnx_model) - edge_node_mapping = get_edge_node_mapping(onnx_model) + children_node_mapping = get_children_node_mapping(onnx_model) + parents_node_mapping = get_parents_node_mapping(onnx_model) nncf_graph = NNCFGraph() for node in onnx_model.graph.node: metatype = get_metatype(onnx_model, node) - weight_port_ids = _get_weight_port_ids(node, onnx_model, edge_node_mapping) + weight_port_ids = _get_weight_port_ids(node, onnx_model, parents_node_mapping) is_shared = None weight_attrs = {} node_attrs = _get_node_attrs(node, onnx_model) - bias_attrs = _get_bias_attr(node, onnx_model, edge_node_mapping) + bias_attrs = _get_bias_attr(node, onnx_model, parents_node_mapping) if weight_port_ids: # If node has weight weight_edge_names = [] for weight_port_id in weight_port_ids: @@ -364,7 +366,7 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: edge = edge_info_mapping[weight_edge_name] weight_shape = get_edge_shape(edge) weight_attrs[weight_port_id] = {"name": weight_edge_name, "shape": weight_shape} - if not is_shared and is_node_has_shared_weight(node, weight_port_id, edge_node_mapping): + if not is_shared and is_node_has_shared_weight(node, weight_port_id, children_node_mapping): is_shared = True layer_attributes = ONNXLayerAttributes( @@ -391,7 +393,7 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: onnx_dtype = get_edge_dtype(edge) nncf_dtype = GraphConverter.convert_onnx_dtype_to_nncf_dtype(onnx_dtype) output_node_id = nncf_graph.get_node_by_name(output_node.name).node_id - input_nodes = edge_node_mapping[output_edge][1] + input_nodes = children_node_mapping[output_edge] for input_node in input_nodes: port_ids = get_port_ids_between_nodes(output_node, input_node) input_port_id = port_ids["input_port_id"] @@ -406,6 +408,6 @@ def create_nncf_graph(onnx_model: onnx.ModelProto) -> NNCFGraph: dtype=Dtype(nncf_dtype), ) - GraphConverter._add_nncf_input_nodes(onnx_model, nncf_graph, edge_info_mapping, edge_node_mapping) - GraphConverter._add_nncf_output_nodes(onnx_model, nncf_graph, edge_info_mapping, edge_node_mapping) + GraphConverter._add_nncf_input_nodes(onnx_model, nncf_graph, edge_info_mapping, children_node_mapping) + GraphConverter._add_nncf_output_nodes(onnx_model, nncf_graph, edge_info_mapping, parents_node_mapping) return nncf_graph diff --git a/nncf/onnx/graph/onnx_helper.py b/nncf/onnx/graph/onnx_helper.py index d2362b0ff4b..81c1eaf43bd 100644 --- a/nncf/onnx/graph/onnx_helper.py +++ b/nncf/onnx/graph/onnx_helper.py @@ -39,27 +39,19 @@ def get_edge_info_mapping(model: onnx.ModelProto) -> Dict[str, onnx.ValueInfoPro } -def get_edge_node_mapping(model: onnx.ModelProto) -> Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]]: - """ - Returns mapping describing start and consumed nodes of the edges. - The mapping key is an edge name, while value is a tuple, - containig on 0-index a node from which the edge is started and 1-index containing nodes which consumes this edge. - If None on 0-index means that there is no start node (e.g. model input edge). - If None on 1-index means that there are no consuming nodes. - - :param model: ONNX model from which the mapping is built. - :return: Mapping between edge name and a tuple of from node and to nodes. - """ - output = {} +def get_children_node_mapping(model: onnx.ModelProto) -> Dict[str, List[onnx.ValueInfoProto]]: + output = defaultdict(list) for node in model.graph.node: for input_edge in node.input: - if input_edge not in output: - output[input_edge] = [None, []] - output[input_edge][1].append(node) # To node - for output_edge in node.output: - if output_edge not in output: - output[output_edge] = [None, []] - output[output_edge][0] = node # From node + output[input_edge].append(node) + return output + + +def get_parents_node_mapping(model: onnx.ModelProto) -> Dict[str, onnx.ValueInfoProto]: + output = defaultdict(list) + for node in model.graph.node: + for input_edge in node.output: + output[input_edge] = node return output @@ -239,7 +231,7 @@ def get_edge_dtype(edge: Union[onnx.ValueInfoProto, onnx.TensorProto]) -> int: def get_parent( node: onnx.NodeProto, port_id: int, - edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], + parents_node_mapping: Dict[str, List[onnx.ValueInfoProto]], ) -> Optional[onnx.NodeProto]: """ Returns parents of the node. If there is no parent node, returns None. @@ -250,12 +242,12 @@ def get_parent( :return: Parent node. """ if port_id < len(node.input): - return edge_node_mapping[node.input[port_id]][0] + return parents_node_mapping[node.input[port_id]] return None def get_children( - node: onnx.NodeProto, edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]] + node: onnx.NodeProto, children_node_mapping: Dict[str, List[onnx.ValueInfoProto]] ) -> List[onnx.NodeProto]: """ Returns children of the node. @@ -266,14 +258,14 @@ def get_children( """ output = [] for node_edge in node.output: - output.extend(edge_node_mapping[node_edge][1]) + output.extend(children_node_mapping[node_edge]) return output def is_node_has_shared_weight( node: onnx.NodeProto, weight_port_id: int, - edge_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], + children_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], ) -> bool: """ Returns whether the node share a weight. @@ -284,5 +276,5 @@ def is_node_has_shared_weight( :return: True whether node shares a weight - otherwise False. """ weight_tensor_edge = node.input[weight_port_id] - nodes = edge_node_mapping[weight_tensor_edge][1] + nodes = children_node_mapping[weight_tensor_edge] return len(nodes) > 1 From ccbc51fc518cb82b0cb6d86d776a3dfd72bf547d Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 4 Oct 2023 16:25:43 +0200 Subject: [PATCH 15/21] revert graph changes --- .../quantization/MaskRCNN-12.dot | 100 ++-- .../quantization/bertsquad-12.dot | 456 +++++++++--------- .../quantization/retinanet-9.dot | 20 +- 3 files changed, 288 insertions(+), 288 deletions(-) diff --git a/tests/onnx/data/reference_graphs/quantization/MaskRCNN-12.dot b/tests/onnx/data/reference_graphs/quantization/MaskRCNN-12.dot index e2a0350705f..0775c236c76 100644 --- a/tests/onnx/data/reference_graphs/quantization/MaskRCNN-12.dot +++ b/tests/onnx/data/reference_graphs/quantization/MaskRCNN-12.dot @@ -481,10 +481,10 @@ strict digraph { "479 QuantizeLinear_389_1" [id=479, type=QuantizeLinear]; "480 DequantizeLinear_389_1" [id=480, type=DequantizeLinear]; "481 390" [id=481, type=Conv]; -"482 QuantizeLinear_391_1" [id=482, type=QuantizeLinear]; -"483 DequantizeLinear_391_1" [id=483, type=DequantizeLinear]; -"484 QuantizeLinear_391_2" [id=484, type=QuantizeLinear]; -"485 DequantizeLinear_391_2" [id=485, type=DequantizeLinear]; +"482 QuantizeLinear_391_2" [id=482, type=QuantizeLinear]; +"483 DequantizeLinear_391_2" [id=483, type=DequantizeLinear]; +"484 QuantizeLinear_391_1" [id=484, type=QuantizeLinear]; +"485 DequantizeLinear_391_1" [id=485, type=DequantizeLinear]; "486 487" [id=486, type=MaxPool]; "487 QuantizeLinear_489_1" [id=487, type=QuantizeLinear]; "488 DequantizeLinear_489_1" [id=488, type=DequantizeLinear]; @@ -1749,14 +1749,14 @@ strict digraph { "1747 1172" [id=1747, type=Gather]; "1748 2479" [id=1748, type=Concat]; "1749 2490" [id=1749, type=Gather]; -"1750 QuantizeLinear_2527_1" [id=1750, type=QuantizeLinear]; -"1751 DequantizeLinear_2527_1" [id=1751, type=DequantizeLinear]; -"1752 QuantizeLinear_2527_2" [id=1752, type=QuantizeLinear]; -"1753 DequantizeLinear_2527_2" [id=1753, type=DequantizeLinear]; -"1754 QuantizeLinear_2527_3" [id=1754, type=QuantizeLinear]; -"1755 DequantizeLinear_2527_3" [id=1755, type=DequantizeLinear]; -"1756 QuantizeLinear_2527_4" [id=1756, type=QuantizeLinear]; -"1757 DequantizeLinear_2527_4" [id=1757, type=DequantizeLinear]; +"1750 QuantizeLinear_2527_4" [id=1750, type=QuantizeLinear]; +"1751 DequantizeLinear_2527_4" [id=1751, type=DequantizeLinear]; +"1752 QuantizeLinear_2527_3" [id=1752, type=QuantizeLinear]; +"1753 DequantizeLinear_2527_3" [id=1753, type=DequantizeLinear]; +"1754 QuantizeLinear_2527_2" [id=1754, type=QuantizeLinear]; +"1755 DequantizeLinear_2527_2" [id=1755, type=DequantizeLinear]; +"1756 QuantizeLinear_2527_1" [id=1756, type=QuantizeLinear]; +"1757 DequantizeLinear_2527_1" [id=1757, type=DequantizeLinear]; "1758 2532" [id=1758, type=Slice]; "1759 2534" [id=1759, type=Gather]; "1760 2525" [id=1760, type=Slice]; @@ -3692,14 +3692,14 @@ strict digraph { "3690 3030" [id=3690, type=Gather]; "3691 6518" [id=3691, type=Concat]; "3692 6530" [id=3692, type=Gather]; -"3693 QuantizeLinear_6568_1" [id=3693, type=QuantizeLinear]; -"3694 DequantizeLinear_6568_1" [id=3694, type=DequantizeLinear]; -"3695 QuantizeLinear_6568_2" [id=3695, type=QuantizeLinear]; -"3696 DequantizeLinear_6568_2" [id=3696, type=DequantizeLinear]; -"3697 QuantizeLinear_6568_3" [id=3697, type=QuantizeLinear]; -"3698 DequantizeLinear_6568_3" [id=3698, type=DequantizeLinear]; -"3699 QuantizeLinear_6568_4" [id=3699, type=QuantizeLinear]; -"3700 DequantizeLinear_6568_4" [id=3700, type=DequantizeLinear]; +"3693 QuantizeLinear_6568_4" [id=3693, type=QuantizeLinear]; +"3694 DequantizeLinear_6568_4" [id=3694, type=DequantizeLinear]; +"3695 QuantizeLinear_6568_3" [id=3695, type=QuantizeLinear]; +"3696 DequantizeLinear_6568_3" [id=3696, type=DequantizeLinear]; +"3697 QuantizeLinear_6568_2" [id=3697, type=QuantizeLinear]; +"3698 DequantizeLinear_6568_2" [id=3698, type=DequantizeLinear]; +"3699 QuantizeLinear_6568_1" [id=3699, type=QuantizeLinear]; +"3700 DequantizeLinear_6568_1" [id=3700, type=DequantizeLinear]; "3701 6576" [id=3701, type=Slice]; "3702 6578" [id=3702, type=Gather]; "3703 6569" [id=3703, type=Slice]; @@ -4788,16 +4788,16 @@ strict digraph { "478 DequantizeLinear_388_1" -> "481 390" [label="[1, 256, -1, -1]", style=solid]; "479 QuantizeLinear_389_1" -> "480 DequantizeLinear_389_1" [label="[256, 256, 3, 3]", style=dashed]; "480 DequantizeLinear_389_1" -> "481 390" [label="[256, 256, 3, 3]", style=solid]; -"481 390" -> "482 QuantizeLinear_391_1" [label="[1, 256, -1, -1]", style=solid]; -"481 390" -> "484 QuantizeLinear_391_2" [label="[1, 256, -1, -1]", style=solid]; +"481 390" -> "482 QuantizeLinear_391_2" [label="[1, 256, -1, -1]", style=solid]; +"481 390" -> "484 QuantizeLinear_391_1" [label="[1, 256, -1, -1]", style=solid]; "481 390" -> "784 536" [label="[1, 256, -1, -1]", style=solid]; "481 390" -> "787 533" [label="[1, 256, -1, -1]", style=solid]; "481 390" -> "1929 2620" [label="[1, 256, -1, -1]", style=solid]; "481 390" -> "3872 6664" [label="[1, 256, -1, -1]", style=solid]; -"482 QuantizeLinear_391_1" -> "483 DequantizeLinear_391_1" [label="[1, 256, -1, -1]", style=dashed]; -"483 DequantizeLinear_391_1" -> "486 487" [label="[1, 256, -1, -1]", style=solid]; -"484 QuantizeLinear_391_2" -> "485 DequantizeLinear_391_2" [label="[1, 256, -1, -1]", style=dashed]; -"485 DequantizeLinear_391_2" -> "722 506" [label="[1, 256, -1, -1]", style=solid]; +"482 QuantizeLinear_391_2" -> "483 DequantizeLinear_391_2" [label="[1, 256, -1, -1]", style=dashed]; +"483 DequantizeLinear_391_2" -> "722 506" [label="[1, 256, -1, -1]", style=solid]; +"484 QuantizeLinear_391_1" -> "485 DequantizeLinear_391_1" [label="[1, 256, -1, -1]", style=dashed]; +"485 DequantizeLinear_391_1" -> "486 487" [label="[1, 256, -1, -1]", style=solid]; "486 487" -> "489 510" [label="[1, 256, -1, -1]", style=solid]; "486 487" -> "555 542" [label="[1, 256, -1, -1]", style=solid]; "486 487" -> "558 539" [label="[1, 256, -1, -1]", style=solid]; @@ -6341,21 +6341,21 @@ strict digraph { "1746 1171" -> "1747 1172" [label="[-1]", style=dashed]; "1747 1172" -> "1748 2479" [label="[-1, 4]", style=solid]; "1748 2479" -> "1749 2490" [label="[-1, 4]", style=solid]; -"1749 2490" -> "1750 QuantizeLinear_2527_1" [label="[]", style=solid]; -"1749 2490" -> "1752 QuantizeLinear_2527_2" [label="[]", style=solid]; -"1749 2490" -> "1754 QuantizeLinear_2527_3" [label="[]", style=solid]; -"1749 2490" -> "1756 QuantizeLinear_2527_4" [label="[]", style=solid]; +"1749 2490" -> "1750 QuantizeLinear_2527_4" [label="[]", style=solid]; +"1749 2490" -> "1752 QuantizeLinear_2527_3" [label="[]", style=solid]; +"1749 2490" -> "1754 QuantizeLinear_2527_2" [label="[]", style=solid]; +"1749 2490" -> "1756 QuantizeLinear_2527_1" [label="[]", style=solid]; "1749 2490" -> "1803 2495" [label="[]", style=solid]; "1749 2490" -> "1807 2503" [label="[]", style=solid]; "1749 2490" -> "2009 2775" [label="[]", style=solid]; -"1750 QuantizeLinear_2527_1" -> "1751 DequantizeLinear_2527_1" [label="[]", style=dashed]; -"1751 DequantizeLinear_2527_1" -> "1758 2532" [label="[]", style=solid]; -"1752 QuantizeLinear_2527_2" -> "1753 DequantizeLinear_2527_2" [label="[]", style=dashed]; -"1753 DequantizeLinear_2527_2" -> "1760 2525" [label="[]", style=solid]; -"1754 QuantizeLinear_2527_3" -> "1755 DequantizeLinear_2527_3" [label="[]", style=dashed]; -"1755 DequantizeLinear_2527_3" -> "1766 2515" [label="[]", style=solid]; -"1756 QuantizeLinear_2527_4" -> "1757 DequantizeLinear_2527_4" [label="[]", style=dashed]; -"1757 DequantizeLinear_2527_4" -> "1768 2508" [label="[]", style=solid]; +"1750 QuantizeLinear_2527_4" -> "1751 DequantizeLinear_2527_4" [label="[]", style=dashed]; +"1751 DequantizeLinear_2527_4" -> "1768 2508" [label="[]", style=solid]; +"1752 QuantizeLinear_2527_3" -> "1753 DequantizeLinear_2527_3" [label="[]", style=dashed]; +"1753 DequantizeLinear_2527_3" -> "1766 2515" [label="[]", style=solid]; +"1754 QuantizeLinear_2527_2" -> "1755 DequantizeLinear_2527_2" [label="[]", style=dashed]; +"1755 DequantizeLinear_2527_2" -> "1760 2525" [label="[]", style=solid]; +"1756 QuantizeLinear_2527_1" -> "1757 DequantizeLinear_2527_1" [label="[]", style=dashed]; +"1757 DequantizeLinear_2527_1" -> "1758 2532" [label="[]", style=solid]; "1758 2532" -> "1759 2534" [label="[]", style=solid]; "1759 2534" -> "1762 2535" [label="[]", style=solid]; "1760 2525" -> "1761 2527" [label="[]", style=solid]; @@ -8988,21 +8988,21 @@ strict digraph { "3690 3030" -> "3691 6518" [label="[]", style=solid]; "3690 3030" -> "4259 3037" [label="[]", style=solid]; "3691 6518" -> "3692 6530" [label="[]", style=solid]; -"3692 6530" -> "3693 QuantizeLinear_6568_1" [label="[-1, 4]", style=solid]; -"3692 6530" -> "3695 QuantizeLinear_6568_2" [label="[-1, 4]", style=solid]; -"3692 6530" -> "3697 QuantizeLinear_6568_3" [label="[-1, 4]", style=solid]; -"3692 6530" -> "3699 QuantizeLinear_6568_4" [label="[-1, 4]", style=solid]; +"3692 6530" -> "3693 QuantizeLinear_6568_4" [label="[-1, 4]", style=solid]; +"3692 6530" -> "3695 QuantizeLinear_6568_3" [label="[-1, 4]", style=solid]; +"3692 6530" -> "3697 QuantizeLinear_6568_2" [label="[-1, 4]", style=solid]; +"3692 6530" -> "3699 QuantizeLinear_6568_1" [label="[-1, 4]", style=solid]; "3692 6530" -> "3746 6539" [label="[-1, 4]", style=solid]; "3692 6530" -> "3750 6547" [label="[-1, 4]", style=solid]; "3692 6530" -> "4281 nncf_model_output_0" [label="[-1, 4]", style=solid]; -"3693 QuantizeLinear_6568_1" -> "3694 DequantizeLinear_6568_1" [label="[-1, 4]", style=dashed]; -"3694 DequantizeLinear_6568_1" -> "3701 6576" [label="[-1, 4]", style=solid]; -"3695 QuantizeLinear_6568_2" -> "3696 DequantizeLinear_6568_2" [label="[-1, 4]", style=dashed]; -"3696 DequantizeLinear_6568_2" -> "3703 6569" [label="[-1, 4]", style=solid]; -"3697 QuantizeLinear_6568_3" -> "3698 DequantizeLinear_6568_3" [label="[-1, 4]", style=dashed]; -"3698 DequantizeLinear_6568_3" -> "3709 6559" [label="[-1, 4]", style=solid]; -"3699 QuantizeLinear_6568_4" -> "3700 DequantizeLinear_6568_4" [label="[-1, 4]", style=dashed]; -"3700 DequantizeLinear_6568_4" -> "3711 6552" [label="[-1, 4]", style=solid]; +"3693 QuantizeLinear_6568_4" -> "3694 DequantizeLinear_6568_4" [label="[-1, 4]", style=dashed]; +"3694 DequantizeLinear_6568_4" -> "3711 6552" [label="[-1, 4]", style=solid]; +"3695 QuantizeLinear_6568_3" -> "3696 DequantizeLinear_6568_3" [label="[-1, 4]", style=dashed]; +"3696 DequantizeLinear_6568_3" -> "3709 6559" [label="[-1, 4]", style=solid]; +"3697 QuantizeLinear_6568_2" -> "3698 DequantizeLinear_6568_2" [label="[-1, 4]", style=dashed]; +"3698 DequantizeLinear_6568_2" -> "3703 6569" [label="[-1, 4]", style=solid]; +"3699 QuantizeLinear_6568_1" -> "3700 DequantizeLinear_6568_1" [label="[-1, 4]", style=dashed]; +"3700 DequantizeLinear_6568_1" -> "3701 6576" [label="[-1, 4]", style=solid]; "3701 6576" -> "3702 6578" [label="[-1, 4]", style=solid]; "3702 6578" -> "3705 6579" [label="[-1]", style=solid]; "3703 6569" -> "3704 6571" [label="[-1, 4]", style=solid]; diff --git a/tests/onnx/data/reference_graphs/quantization/bertsquad-12.dot b/tests/onnx/data/reference_graphs/quantization/bertsquad-12.dot index 5e2502ba0c4..246765a6663 100644 --- a/tests/onnx/data/reference_graphs/quantization/bertsquad-12.dot +++ b/tests/onnx/data/reference_graphs/quantization/bertsquad-12.dot @@ -405,14 +405,14 @@ strict digraph { "403 bert/encoder/Reshape_13/shape_Concat__301" [id=403, type=Concat]; "404 bert/encoder/Reshape_13__471" [id=404, type=Cast]; "405 bert/encoder/Reshape_1" [id=405, type=Reshape]; -"406 QuantizeLinear_bert/encoder/Reshape_1^0_1" [id=406, label="406 QuantizeLinear_bert/encoder/Reshape_1:0_1", type=QuantizeLinear]; -"407 DequantizeLinear_bert/encoder/Reshape_1^0_1" [id=407, label="407 DequantizeLinear_bert/encoder/Reshape_1:0_1", type=DequantizeLinear]; -"408 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [id=408, label="408 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"409 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [id=409, label="409 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel:0_1", type=DequantizeLinear]; -"410 QuantizeLinear_bert/encoder/Reshape_1^0_2" [id=410, label="410 QuantizeLinear_bert/encoder/Reshape_1:0_2", type=QuantizeLinear]; -"411 DequantizeLinear_bert/encoder/Reshape_1^0_2" [id=411, label="411 DequantizeLinear_bert/encoder/Reshape_1:0_2", type=DequantizeLinear]; -"412 QuantizeLinear_bert/encoder/Reshape_1^0_3" [id=412, label="412 QuantizeLinear_bert/encoder/Reshape_1:0_3", type=QuantizeLinear]; -"413 DequantizeLinear_bert/encoder/Reshape_1^0_3" [id=413, label="413 DequantizeLinear_bert/encoder/Reshape_1:0_3", type=DequantizeLinear]; +"406 QuantizeLinear_bert/encoder/Reshape_1^0_3" [id=406, label="406 QuantizeLinear_bert/encoder/Reshape_1:0_3", type=QuantizeLinear]; +"407 DequantizeLinear_bert/encoder/Reshape_1^0_3" [id=407, label="407 DequantizeLinear_bert/encoder/Reshape_1:0_3", type=DequantizeLinear]; +"408 QuantizeLinear_bert/encoder/Reshape_1^0_2" [id=408, label="408 QuantizeLinear_bert/encoder/Reshape_1:0_2", type=QuantizeLinear]; +"409 DequantizeLinear_bert/encoder/Reshape_1^0_2" [id=409, label="409 DequantizeLinear_bert/encoder/Reshape_1:0_2", type=DequantizeLinear]; +"410 QuantizeLinear_bert/encoder/Reshape_1^0_1" [id=410, label="410 QuantizeLinear_bert/encoder/Reshape_1:0_1", type=QuantizeLinear]; +"411 DequantizeLinear_bert/encoder/Reshape_1^0_1" [id=411, label="411 DequantizeLinear_bert/encoder/Reshape_1:0_1", type=DequantizeLinear]; +"412 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [id=412, label="412 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"413 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [id=413, label="413 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel:0_1", type=DequantizeLinear]; "414 bert/encoder/layer_0/attention/self/value/MatMul" [id=414, type=MatMul]; "415 bert/encoder/layer_0/attention/self/value/BiasAdd" [id=415, type=Add]; "416 bert/encoder/layer_0/attention/self/Reshape_2" [id=416, type=Reshape]; @@ -495,14 +495,14 @@ strict digraph { "493 bert/encoder/layer_0/output/LayerNorm/batchnorm/sub" [id=493, type=Sub]; "494 bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1" [id=494, type=Mul]; "495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" [id=495, type=Add]; -"496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [id=496, label="496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [id=497, label="497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"498 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [id=498, label="498 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"499 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [id=499, label="499 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel:0_1", type=DequantizeLinear]; -"500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [id=500, label="500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [id=501, label="501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"502 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [id=502, label="502 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"503 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [id=503, label="503 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [id=496, label="496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [id=497, label="497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"498 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [id=498, label="498 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"499 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [id=499, label="499 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [id=500, label="500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [id=501, label="501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"502 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [id=502, label="502 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"503 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [id=503, label="503 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel:0_1", type=DequantizeLinear]; "504 bert/encoder/layer_1/attention/self/value/MatMul" [id=504, type=MatMul]; "505 bert/encoder/layer_1/attention/self/value/BiasAdd" [id=505, type=Add]; "506 bert/encoder/layer_1/attention/self/Reshape_2" [id=506, type=Reshape]; @@ -585,14 +585,14 @@ strict digraph { "583 bert/encoder/layer_1/output/LayerNorm/batchnorm/sub" [id=583, type=Sub]; "584 bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1" [id=584, type=Mul]; "585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" [id=585, type=Add]; -"586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [id=586, label="586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [id=587, label="587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"588 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [id=588, label="588 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"589 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [id=589, label="589 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel:0_1", type=DequantizeLinear]; -"590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [id=590, label="590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [id=591, label="591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"592 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [id=592, label="592 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"593 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [id=593, label="593 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [id=586, label="586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [id=587, label="587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"588 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [id=588, label="588 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"589 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [id=589, label="589 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [id=590, label="590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [id=591, label="591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"592 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [id=592, label="592 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"593 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [id=593, label="593 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel:0_1", type=DequantizeLinear]; "594 bert/encoder/layer_2/attention/self/value/MatMul" [id=594, type=MatMul]; "595 bert/encoder/layer_2/attention/self/value/BiasAdd" [id=595, type=Add]; "596 bert/encoder/layer_2/attention/self/Reshape_2" [id=596, type=Reshape]; @@ -675,14 +675,14 @@ strict digraph { "673 bert/encoder/layer_2/output/LayerNorm/batchnorm/sub" [id=673, type=Sub]; "674 bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1" [id=674, type=Mul]; "675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" [id=675, type=Add]; -"676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [id=676, label="676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [id=677, label="677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"678 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [id=678, label="678 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"679 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [id=679, label="679 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel:0_1", type=DequantizeLinear]; -"680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [id=680, label="680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [id=681, label="681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"682 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [id=682, label="682 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"683 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [id=683, label="683 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [id=676, label="676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [id=677, label="677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"678 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [id=678, label="678 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"679 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [id=679, label="679 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [id=680, label="680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [id=681, label="681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"682 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [id=682, label="682 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"683 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [id=683, label="683 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel:0_1", type=DequantizeLinear]; "684 bert/encoder/layer_3/attention/self/value/MatMul" [id=684, type=MatMul]; "685 bert/encoder/layer_3/attention/self/value/BiasAdd" [id=685, type=Add]; "686 bert/encoder/layer_3/attention/self/Reshape_2" [id=686, type=Reshape]; @@ -765,14 +765,14 @@ strict digraph { "763 bert/encoder/layer_3/output/LayerNorm/batchnorm/sub" [id=763, type=Sub]; "764 bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1" [id=764, type=Mul]; "765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" [id=765, type=Add]; -"766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [id=766, label="766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [id=767, label="767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"768 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [id=768, label="768 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"769 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [id=769, label="769 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel:0_1", type=DequantizeLinear]; -"770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [id=770, label="770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [id=771, label="771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"772 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [id=772, label="772 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"773 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [id=773, label="773 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [id=766, label="766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [id=767, label="767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"768 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [id=768, label="768 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"769 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [id=769, label="769 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [id=770, label="770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [id=771, label="771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"772 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [id=772, label="772 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"773 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [id=773, label="773 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel:0_1", type=DequantizeLinear]; "774 bert/encoder/layer_4/attention/self/value/MatMul" [id=774, type=MatMul]; "775 bert/encoder/layer_4/attention/self/value/BiasAdd" [id=775, type=Add]; "776 bert/encoder/layer_4/attention/self/Reshape_2" [id=776, type=Reshape]; @@ -855,14 +855,14 @@ strict digraph { "853 bert/encoder/layer_4/output/LayerNorm/batchnorm/sub" [id=853, type=Sub]; "854 bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1" [id=854, type=Mul]; "855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" [id=855, type=Add]; -"856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [id=856, label="856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [id=857, label="857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"858 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [id=858, label="858 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"859 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [id=859, label="859 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel:0_1", type=DequantizeLinear]; -"860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [id=860, label="860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [id=861, label="861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"862 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [id=862, label="862 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"863 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [id=863, label="863 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [id=856, label="856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [id=857, label="857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"858 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [id=858, label="858 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"859 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [id=859, label="859 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [id=860, label="860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [id=861, label="861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"862 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [id=862, label="862 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"863 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [id=863, label="863 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel:0_1", type=DequantizeLinear]; "864 bert/encoder/layer_5/attention/self/value/MatMul" [id=864, type=MatMul]; "865 bert/encoder/layer_5/attention/self/value/BiasAdd" [id=865, type=Add]; "866 bert/encoder/layer_5/attention/self/Reshape_2" [id=866, type=Reshape]; @@ -945,14 +945,14 @@ strict digraph { "943 bert/encoder/layer_5/output/LayerNorm/batchnorm/sub" [id=943, type=Sub]; "944 bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1" [id=944, type=Mul]; "945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" [id=945, type=Add]; -"946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [id=946, label="946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [id=947, label="947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"948 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [id=948, label="948 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"949 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [id=949, label="949 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel:0_1", type=DequantizeLinear]; -"950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [id=950, label="950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [id=951, label="951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"952 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [id=952, label="952 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"953 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [id=953, label="953 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [id=946, label="946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [id=947, label="947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"948 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [id=948, label="948 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"949 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [id=949, label="949 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [id=950, label="950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [id=951, label="951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"952 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [id=952, label="952 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"953 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [id=953, label="953 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel:0_1", type=DequantizeLinear]; "954 bert/encoder/layer_6/attention/self/value/MatMul" [id=954, type=MatMul]; "955 bert/encoder/layer_6/attention/self/value/BiasAdd" [id=955, type=Add]; "956 bert/encoder/layer_6/attention/self/Reshape_2" [id=956, type=Reshape]; @@ -1035,14 +1035,14 @@ strict digraph { "1033 bert/encoder/layer_6/output/LayerNorm/batchnorm/sub" [id=1033, type=Sub]; "1034 bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1" [id=1034, type=Mul]; "1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" [id=1035, type=Add]; -"1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [id=1036, label="1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [id=1037, label="1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"1038 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [id=1038, label="1038 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"1039 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [id=1039, label="1039 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel:0_1", type=DequantizeLinear]; -"1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [id=1040, label="1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [id=1041, label="1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"1042 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [id=1042, label="1042 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"1043 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [id=1043, label="1043 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [id=1036, label="1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [id=1037, label="1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"1038 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [id=1038, label="1038 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"1039 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [id=1039, label="1039 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [id=1040, label="1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [id=1041, label="1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"1042 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [id=1042, label="1042 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"1043 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [id=1043, label="1043 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel:0_1", type=DequantizeLinear]; "1044 bert/encoder/layer_7/attention/self/value/MatMul" [id=1044, type=MatMul]; "1045 bert/encoder/layer_7/attention/self/value/BiasAdd" [id=1045, type=Add]; "1046 bert/encoder/layer_7/attention/self/Reshape_2" [id=1046, type=Reshape]; @@ -1125,14 +1125,14 @@ strict digraph { "1123 bert/encoder/layer_7/output/LayerNorm/batchnorm/sub" [id=1123, type=Sub]; "1124 bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1" [id=1124, type=Mul]; "1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" [id=1125, type=Add]; -"1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [id=1126, label="1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [id=1127, label="1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"1128 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [id=1128, label="1128 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"1129 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [id=1129, label="1129 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel:0_1", type=DequantizeLinear]; -"1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [id=1130, label="1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [id=1131, label="1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"1132 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [id=1132, label="1132 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"1133 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [id=1133, label="1133 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [id=1126, label="1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [id=1127, label="1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"1128 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [id=1128, label="1128 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"1129 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [id=1129, label="1129 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [id=1130, label="1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [id=1131, label="1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"1132 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [id=1132, label="1132 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"1133 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [id=1133, label="1133 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel:0_1", type=DequantizeLinear]; "1134 bert/encoder/layer_8/attention/self/value/MatMul" [id=1134, type=MatMul]; "1135 bert/encoder/layer_8/attention/self/value/BiasAdd" [id=1135, type=Add]; "1136 bert/encoder/layer_8/attention/self/Reshape_2" [id=1136, type=Reshape]; @@ -1215,14 +1215,14 @@ strict digraph { "1213 bert/encoder/layer_8/output/LayerNorm/batchnorm/sub" [id=1213, type=Sub]; "1214 bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1" [id=1214, type=Mul]; "1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" [id=1215, type=Add]; -"1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [id=1216, label="1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [id=1217, label="1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"1218 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [id=1218, label="1218 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"1219 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [id=1219, label="1219 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel:0_1", type=DequantizeLinear]; -"1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [id=1220, label="1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [id=1221, label="1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"1222 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [id=1222, label="1222 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"1223 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [id=1223, label="1223 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [id=1216, label="1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [id=1217, label="1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"1218 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [id=1218, label="1218 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"1219 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [id=1219, label="1219 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [id=1220, label="1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [id=1221, label="1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"1222 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [id=1222, label="1222 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"1223 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [id=1223, label="1223 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel:0_1", type=DequantizeLinear]; "1224 bert/encoder/layer_9/attention/self/value/MatMul" [id=1224, type=MatMul]; "1225 bert/encoder/layer_9/attention/self/value/BiasAdd" [id=1225, type=Add]; "1226 bert/encoder/layer_9/attention/self/Reshape_2" [id=1226, type=Reshape]; @@ -1305,14 +1305,14 @@ strict digraph { "1303 bert/encoder/layer_9/output/LayerNorm/batchnorm/sub" [id=1303, type=Sub]; "1304 bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1" [id=1304, type=Mul]; "1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" [id=1305, type=Add]; -"1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [id=1306, label="1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [id=1307, label="1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"1308 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [id=1308, label="1308 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"1309 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [id=1309, label="1309 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel:0_1", type=DequantizeLinear]; -"1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [id=1310, label="1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [id=1311, label="1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"1312 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [id=1312, label="1312 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"1313 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [id=1313, label="1313 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [id=1306, label="1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [id=1307, label="1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"1308 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [id=1308, label="1308 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"1309 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [id=1309, label="1309 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [id=1310, label="1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [id=1311, label="1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"1312 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [id=1312, label="1312 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"1313 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [id=1313, label="1313 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel:0_1", type=DequantizeLinear]; "1314 bert/encoder/layer_10/attention/self/value/MatMul" [id=1314, type=MatMul]; "1315 bert/encoder/layer_10/attention/self/value/BiasAdd" [id=1315, type=Add]; "1316 bert/encoder/layer_10/attention/self/Reshape_2" [id=1316, type=Reshape]; @@ -1395,14 +1395,14 @@ strict digraph { "1393 bert/encoder/layer_10/output/LayerNorm/batchnorm/sub" [id=1393, type=Sub]; "1394 bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1" [id=1394, type=Mul]; "1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" [id=1395, type=Add]; -"1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [id=1396, label="1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [id=1397, label="1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"1398 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [id=1398, label="1398 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"1399 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [id=1399, label="1399 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel:0_1", type=DequantizeLinear]; -"1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [id=1400, label="1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [id=1401, label="1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"1402 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [id=1402, label="1402 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"1403 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [id=1403, label="1403 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [id=1396, label="1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [id=1397, label="1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; +"1398 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [id=1398, label="1398 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"1399 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [id=1399, label="1399 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [id=1400, label="1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [id=1401, label="1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"1402 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [id=1402, label="1402 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"1403 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [id=1403, label="1403 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel:0_1", type=DequantizeLinear]; "1404 bert/encoder/layer_11/attention/self/value/MatMul" [id=1404, type=MatMul]; "1405 bert/encoder/layer_11/attention/self/value/BiasAdd" [id=1405, type=Add]; "1406 bert/encoder/layer_11/attention/self/Reshape_2" [id=1406, type=Reshape]; @@ -1991,18 +1991,18 @@ strict digraph { "402 bert/encoder/Reshape_13/shape_Unsqueeze__298" -> "403 bert/encoder/Reshape_13/shape_Concat__301" [label="[1]", style=dashed]; "403 bert/encoder/Reshape_13/shape_Concat__301" -> "404 bert/encoder/Reshape_13__471" [label="[3]", style=dashed]; "404 bert/encoder/Reshape_13__471" -> "1488 bert/encoder/Reshape_13" [label="[3]", style=dashed]; -"405 bert/encoder/Reshape_1" -> "406 QuantizeLinear_bert/encoder/Reshape_1^0_1" [label="[]", style=solid]; -"405 bert/encoder/Reshape_1" -> "410 QuantizeLinear_bert/encoder/Reshape_1^0_2" [label="[]", style=solid]; -"405 bert/encoder/Reshape_1" -> "412 QuantizeLinear_bert/encoder/Reshape_1^0_3" [label="[]", style=solid]; +"405 bert/encoder/Reshape_1" -> "406 QuantizeLinear_bert/encoder/Reshape_1^0_3" [label="[]", style=solid]; +"405 bert/encoder/Reshape_1" -> "408 QuantizeLinear_bert/encoder/Reshape_1^0_2" [label="[]", style=solid]; +"405 bert/encoder/Reshape_1" -> "410 QuantizeLinear_bert/encoder/Reshape_1^0_1" [label="[]", style=solid]; "405 bert/encoder/Reshape_1" -> "448 bert/encoder/layer_0/attention/output/add" [label="[]", style=solid]; -"406 QuantizeLinear_bert/encoder/Reshape_1^0_1" -> "407 DequantizeLinear_bert/encoder/Reshape_1^0_1" [label="[]", style=dashed]; -"407 DequantizeLinear_bert/encoder/Reshape_1^0_1" -> "414 bert/encoder/layer_0/attention/self/value/MatMul" [label="[]", style=solid]; -"408 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" -> "409 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"409 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" -> "414 bert/encoder/layer_0/attention/self/value/MatMul" [label="[768, 768]", style=solid]; -"410 QuantizeLinear_bert/encoder/Reshape_1^0_2" -> "411 DequantizeLinear_bert/encoder/Reshape_1^0_2" [label="[]", style=dashed]; -"411 DequantizeLinear_bert/encoder/Reshape_1^0_2" -> "420 bert/encoder/layer_0/attention/self/query/MatMul" [label="[]", style=solid]; -"412 QuantizeLinear_bert/encoder/Reshape_1^0_3" -> "413 DequantizeLinear_bert/encoder/Reshape_1^0_3" [label="[]", style=dashed]; -"413 DequantizeLinear_bert/encoder/Reshape_1^0_3" -> "428 bert/encoder/layer_0/attention/self/key/MatMul" [label="[]", style=solid]; +"406 QuantizeLinear_bert/encoder/Reshape_1^0_3" -> "407 DequantizeLinear_bert/encoder/Reshape_1^0_3" [label="[]", style=dashed]; +"407 DequantizeLinear_bert/encoder/Reshape_1^0_3" -> "428 bert/encoder/layer_0/attention/self/key/MatMul" [label="[]", style=solid]; +"408 QuantizeLinear_bert/encoder/Reshape_1^0_2" -> "409 DequantizeLinear_bert/encoder/Reshape_1^0_2" [label="[]", style=dashed]; +"409 DequantizeLinear_bert/encoder/Reshape_1^0_2" -> "420 bert/encoder/layer_0/attention/self/query/MatMul" [label="[]", style=solid]; +"410 QuantizeLinear_bert/encoder/Reshape_1^0_1" -> "411 DequantizeLinear_bert/encoder/Reshape_1^0_1" [label="[]", style=dashed]; +"411 DequantizeLinear_bert/encoder/Reshape_1^0_1" -> "414 bert/encoder/layer_0/attention/self/value/MatMul" [label="[]", style=solid]; +"412 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" -> "413 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"413 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" -> "414 bert/encoder/layer_0/attention/self/value/MatMul" [label="[768, 768]", style=solid]; "414 bert/encoder/layer_0/attention/self/value/MatMul" -> "415 bert/encoder/layer_0/attention/self/value/BiasAdd" [label="[]", style=solid]; "415 bert/encoder/layer_0/attention/self/value/BiasAdd" -> "416 bert/encoder/layer_0/attention/self/Reshape_2" [label="[]", style=solid]; "416 bert/encoder/layer_0/attention/self/Reshape_2" -> "417 bert/encoder/layer_0/attention/self/transpose_2" [label="[]", style=solid]; @@ -2095,18 +2095,18 @@ strict digraph { "492 bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2" -> "493 bert/encoder/layer_0/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "493 bert/encoder/layer_0/output/LayerNorm/batchnorm/sub" -> "495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "494 bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1" -> "495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; -"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "502 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "498 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; "495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "538 bert/encoder/layer_1/attention/output/add" [label="[]", style=solid]; -"496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" -> "497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" -> "504 bert/encoder/layer_1/attention/self/value/MatMul" [label="[]", style=solid]; -"498 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" -> "499 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"499 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" -> "504 bert/encoder/layer_1/attention/self/value/MatMul" [label="[768, 768]", style=solid]; -"500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" -> "501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" -> "510 bert/encoder/layer_1/attention/self/query/MatMul" [label="[]", style=solid]; -"502 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" -> "503 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"503 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" -> "518 bert/encoder/layer_1/attention/self/key/MatMul" [label="[]", style=solid]; +"496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" -> "497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" -> "518 bert/encoder/layer_1/attention/self/key/MatMul" [label="[]", style=solid]; +"498 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" -> "499 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"499 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" -> "510 bert/encoder/layer_1/attention/self/query/MatMul" [label="[]", style=solid]; +"500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" -> "501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" -> "504 bert/encoder/layer_1/attention/self/value/MatMul" [label="[]", style=solid]; +"502 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" -> "503 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"503 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" -> "504 bert/encoder/layer_1/attention/self/value/MatMul" [label="[768, 768]", style=solid]; "504 bert/encoder/layer_1/attention/self/value/MatMul" -> "505 bert/encoder/layer_1/attention/self/value/BiasAdd" [label="[]", style=solid]; "505 bert/encoder/layer_1/attention/self/value/BiasAdd" -> "506 bert/encoder/layer_1/attention/self/Reshape_2" [label="[]", style=solid]; "506 bert/encoder/layer_1/attention/self/Reshape_2" -> "507 bert/encoder/layer_1/attention/self/transpose_2" [label="[]", style=solid]; @@ -2199,18 +2199,18 @@ strict digraph { "582 bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2" -> "583 bert/encoder/layer_1/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "583 bert/encoder/layer_1/output/LayerNorm/batchnorm/sub" -> "585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "584 bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1" -> "585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; -"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "592 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "588 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; "585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "628 bert/encoder/layer_2/attention/output/add" [label="[]", style=solid]; -"586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" -> "587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" -> "594 bert/encoder/layer_2/attention/self/value/MatMul" [label="[]", style=solid]; -"588 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" -> "589 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"589 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" -> "594 bert/encoder/layer_2/attention/self/value/MatMul" [label="[768, 768]", style=solid]; -"590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" -> "591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" -> "600 bert/encoder/layer_2/attention/self/query/MatMul" [label="[]", style=solid]; -"592 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" -> "593 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"593 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" -> "608 bert/encoder/layer_2/attention/self/key/MatMul" [label="[]", style=solid]; +"586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" -> "587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" -> "608 bert/encoder/layer_2/attention/self/key/MatMul" [label="[]", style=solid]; +"588 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" -> "589 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"589 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" -> "600 bert/encoder/layer_2/attention/self/query/MatMul" [label="[]", style=solid]; +"590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" -> "591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" -> "594 bert/encoder/layer_2/attention/self/value/MatMul" [label="[]", style=solid]; +"592 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" -> "593 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"593 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" -> "594 bert/encoder/layer_2/attention/self/value/MatMul" [label="[768, 768]", style=solid]; "594 bert/encoder/layer_2/attention/self/value/MatMul" -> "595 bert/encoder/layer_2/attention/self/value/BiasAdd" [label="[]", style=solid]; "595 bert/encoder/layer_2/attention/self/value/BiasAdd" -> "596 bert/encoder/layer_2/attention/self/Reshape_2" [label="[]", style=solid]; "596 bert/encoder/layer_2/attention/self/Reshape_2" -> "597 bert/encoder/layer_2/attention/self/transpose_2" [label="[]", style=solid]; @@ -2303,18 +2303,18 @@ strict digraph { "672 bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2" -> "673 bert/encoder/layer_2/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "673 bert/encoder/layer_2/output/LayerNorm/batchnorm/sub" -> "675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "674 bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1" -> "675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; -"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "682 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "678 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; "675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "718 bert/encoder/layer_3/attention/output/add" [label="[]", style=solid]; -"676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" -> "677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" -> "684 bert/encoder/layer_3/attention/self/value/MatMul" [label="[]", style=solid]; -"678 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" -> "679 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"679 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" -> "684 bert/encoder/layer_3/attention/self/value/MatMul" [label="[768, 768]", style=solid]; -"680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" -> "681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" -> "690 bert/encoder/layer_3/attention/self/query/MatMul" [label="[]", style=solid]; -"682 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" -> "683 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"683 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" -> "698 bert/encoder/layer_3/attention/self/key/MatMul" [label="[]", style=solid]; +"676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" -> "677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" -> "698 bert/encoder/layer_3/attention/self/key/MatMul" [label="[]", style=solid]; +"678 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" -> "679 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"679 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" -> "690 bert/encoder/layer_3/attention/self/query/MatMul" [label="[]", style=solid]; +"680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" -> "681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" -> "684 bert/encoder/layer_3/attention/self/value/MatMul" [label="[]", style=solid]; +"682 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" -> "683 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"683 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" -> "684 bert/encoder/layer_3/attention/self/value/MatMul" [label="[768, 768]", style=solid]; "684 bert/encoder/layer_3/attention/self/value/MatMul" -> "685 bert/encoder/layer_3/attention/self/value/BiasAdd" [label="[]", style=solid]; "685 bert/encoder/layer_3/attention/self/value/BiasAdd" -> "686 bert/encoder/layer_3/attention/self/Reshape_2" [label="[]", style=solid]; "686 bert/encoder/layer_3/attention/self/Reshape_2" -> "687 bert/encoder/layer_3/attention/self/transpose_2" [label="[]", style=solid]; @@ -2407,18 +2407,18 @@ strict digraph { "762 bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2" -> "763 bert/encoder/layer_3/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "763 bert/encoder/layer_3/output/LayerNorm/batchnorm/sub" -> "765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "764 bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1" -> "765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; -"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "772 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "768 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; "765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "808 bert/encoder/layer_4/attention/output/add" [label="[]", style=solid]; -"766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" -> "767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" -> "774 bert/encoder/layer_4/attention/self/value/MatMul" [label="[]", style=solid]; -"768 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" -> "769 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"769 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" -> "774 bert/encoder/layer_4/attention/self/value/MatMul" [label="[768, 768]", style=solid]; -"770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" -> "771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" -> "780 bert/encoder/layer_4/attention/self/query/MatMul" [label="[]", style=solid]; -"772 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" -> "773 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"773 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" -> "788 bert/encoder/layer_4/attention/self/key/MatMul" [label="[]", style=solid]; +"766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" -> "767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" -> "788 bert/encoder/layer_4/attention/self/key/MatMul" [label="[]", style=solid]; +"768 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" -> "769 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"769 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" -> "780 bert/encoder/layer_4/attention/self/query/MatMul" [label="[]", style=solid]; +"770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" -> "771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" -> "774 bert/encoder/layer_4/attention/self/value/MatMul" [label="[]", style=solid]; +"772 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" -> "773 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"773 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" -> "774 bert/encoder/layer_4/attention/self/value/MatMul" [label="[768, 768]", style=solid]; "774 bert/encoder/layer_4/attention/self/value/MatMul" -> "775 bert/encoder/layer_4/attention/self/value/BiasAdd" [label="[]", style=solid]; "775 bert/encoder/layer_4/attention/self/value/BiasAdd" -> "776 bert/encoder/layer_4/attention/self/Reshape_2" [label="[]", style=solid]; "776 bert/encoder/layer_4/attention/self/Reshape_2" -> "777 bert/encoder/layer_4/attention/self/transpose_2" [label="[]", style=solid]; @@ -2511,18 +2511,18 @@ strict digraph { "852 bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2" -> "853 bert/encoder/layer_4/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "853 bert/encoder/layer_4/output/LayerNorm/batchnorm/sub" -> "855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "854 bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1" -> "855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; -"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "862 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "858 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; "855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "898 bert/encoder/layer_5/attention/output/add" [label="[]", style=solid]; -"856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" -> "857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" -> "864 bert/encoder/layer_5/attention/self/value/MatMul" [label="[]", style=solid]; -"858 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" -> "859 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"859 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" -> "864 bert/encoder/layer_5/attention/self/value/MatMul" [label="[768, 768]", style=solid]; -"860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" -> "861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" -> "870 bert/encoder/layer_5/attention/self/query/MatMul" [label="[]", style=solid]; -"862 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" -> "863 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"863 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" -> "878 bert/encoder/layer_5/attention/self/key/MatMul" [label="[]", style=solid]; +"856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" -> "857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" -> "878 bert/encoder/layer_5/attention/self/key/MatMul" [label="[]", style=solid]; +"858 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" -> "859 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"859 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" -> "870 bert/encoder/layer_5/attention/self/query/MatMul" [label="[]", style=solid]; +"860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" -> "861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" -> "864 bert/encoder/layer_5/attention/self/value/MatMul" [label="[]", style=solid]; +"862 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" -> "863 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"863 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" -> "864 bert/encoder/layer_5/attention/self/value/MatMul" [label="[768, 768]", style=solid]; "864 bert/encoder/layer_5/attention/self/value/MatMul" -> "865 bert/encoder/layer_5/attention/self/value/BiasAdd" [label="[]", style=solid]; "865 bert/encoder/layer_5/attention/self/value/BiasAdd" -> "866 bert/encoder/layer_5/attention/self/Reshape_2" [label="[]", style=solid]; "866 bert/encoder/layer_5/attention/self/Reshape_2" -> "867 bert/encoder/layer_5/attention/self/transpose_2" [label="[]", style=solid]; @@ -2615,18 +2615,18 @@ strict digraph { "942 bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2" -> "943 bert/encoder/layer_5/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "943 bert/encoder/layer_5/output/LayerNorm/batchnorm/sub" -> "945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "944 bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1" -> "945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; -"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "952 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "948 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; "945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "988 bert/encoder/layer_6/attention/output/add" [label="[]", style=solid]; -"946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" -> "947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" -> "954 bert/encoder/layer_6/attention/self/value/MatMul" [label="[]", style=solid]; -"948 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" -> "949 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"949 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" -> "954 bert/encoder/layer_6/attention/self/value/MatMul" [label="[768, 768]", style=solid]; -"950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" -> "951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" -> "960 bert/encoder/layer_6/attention/self/query/MatMul" [label="[]", style=solid]; -"952 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" -> "953 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"953 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" -> "968 bert/encoder/layer_6/attention/self/key/MatMul" [label="[]", style=solid]; +"946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" -> "947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" -> "968 bert/encoder/layer_6/attention/self/key/MatMul" [label="[]", style=solid]; +"948 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" -> "949 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"949 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" -> "960 bert/encoder/layer_6/attention/self/query/MatMul" [label="[]", style=solid]; +"950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" -> "951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" -> "954 bert/encoder/layer_6/attention/self/value/MatMul" [label="[]", style=solid]; +"952 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" -> "953 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"953 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" -> "954 bert/encoder/layer_6/attention/self/value/MatMul" [label="[768, 768]", style=solid]; "954 bert/encoder/layer_6/attention/self/value/MatMul" -> "955 bert/encoder/layer_6/attention/self/value/BiasAdd" [label="[]", style=solid]; "955 bert/encoder/layer_6/attention/self/value/BiasAdd" -> "956 bert/encoder/layer_6/attention/self/Reshape_2" [label="[]", style=solid]; "956 bert/encoder/layer_6/attention/self/Reshape_2" -> "957 bert/encoder/layer_6/attention/self/transpose_2" [label="[]", style=solid]; @@ -2719,18 +2719,18 @@ strict digraph { "1032 bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2" -> "1033 bert/encoder/layer_6/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "1033 bert/encoder/layer_6/output/LayerNorm/batchnorm/sub" -> "1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "1034 bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1" -> "1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; -"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1042 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1038 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; "1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1078 bert/encoder/layer_7/attention/output/add" [label="[]", style=solid]; -"1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" -> "1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" -> "1044 bert/encoder/layer_7/attention/self/value/MatMul" [label="[]", style=solid]; -"1038 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" -> "1039 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"1039 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" -> "1044 bert/encoder/layer_7/attention/self/value/MatMul" [label="[768, 768]", style=solid]; -"1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" -> "1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" -> "1050 bert/encoder/layer_7/attention/self/query/MatMul" [label="[]", style=solid]; -"1042 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" -> "1043 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"1043 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" -> "1058 bert/encoder/layer_7/attention/self/key/MatMul" [label="[]", style=solid]; +"1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" -> "1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" -> "1058 bert/encoder/layer_7/attention/self/key/MatMul" [label="[]", style=solid]; +"1038 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" -> "1039 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"1039 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" -> "1050 bert/encoder/layer_7/attention/self/query/MatMul" [label="[]", style=solid]; +"1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" -> "1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" -> "1044 bert/encoder/layer_7/attention/self/value/MatMul" [label="[]", style=solid]; +"1042 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" -> "1043 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"1043 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" -> "1044 bert/encoder/layer_7/attention/self/value/MatMul" [label="[768, 768]", style=solid]; "1044 bert/encoder/layer_7/attention/self/value/MatMul" -> "1045 bert/encoder/layer_7/attention/self/value/BiasAdd" [label="[]", style=solid]; "1045 bert/encoder/layer_7/attention/self/value/BiasAdd" -> "1046 bert/encoder/layer_7/attention/self/Reshape_2" [label="[]", style=solid]; "1046 bert/encoder/layer_7/attention/self/Reshape_2" -> "1047 bert/encoder/layer_7/attention/self/transpose_2" [label="[]", style=solid]; @@ -2823,18 +2823,18 @@ strict digraph { "1122 bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2" -> "1123 bert/encoder/layer_7/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "1123 bert/encoder/layer_7/output/LayerNorm/batchnorm/sub" -> "1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "1124 bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1" -> "1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; -"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1132 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1128 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; "1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1168 bert/encoder/layer_8/attention/output/add" [label="[]", style=solid]; -"1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" -> "1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" -> "1134 bert/encoder/layer_8/attention/self/value/MatMul" [label="[]", style=solid]; -"1128 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" -> "1129 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"1129 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" -> "1134 bert/encoder/layer_8/attention/self/value/MatMul" [label="[768, 768]", style=solid]; -"1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" -> "1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" -> "1140 bert/encoder/layer_8/attention/self/query/MatMul" [label="[]", style=solid]; -"1132 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" -> "1133 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"1133 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" -> "1148 bert/encoder/layer_8/attention/self/key/MatMul" [label="[]", style=solid]; +"1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" -> "1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" -> "1148 bert/encoder/layer_8/attention/self/key/MatMul" [label="[]", style=solid]; +"1128 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" -> "1129 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"1129 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" -> "1140 bert/encoder/layer_8/attention/self/query/MatMul" [label="[]", style=solid]; +"1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" -> "1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" -> "1134 bert/encoder/layer_8/attention/self/value/MatMul" [label="[]", style=solid]; +"1132 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" -> "1133 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"1133 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" -> "1134 bert/encoder/layer_8/attention/self/value/MatMul" [label="[768, 768]", style=solid]; "1134 bert/encoder/layer_8/attention/self/value/MatMul" -> "1135 bert/encoder/layer_8/attention/self/value/BiasAdd" [label="[]", style=solid]; "1135 bert/encoder/layer_8/attention/self/value/BiasAdd" -> "1136 bert/encoder/layer_8/attention/self/Reshape_2" [label="[]", style=solid]; "1136 bert/encoder/layer_8/attention/self/Reshape_2" -> "1137 bert/encoder/layer_8/attention/self/transpose_2" [label="[]", style=solid]; @@ -2927,18 +2927,18 @@ strict digraph { "1212 bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2" -> "1213 bert/encoder/layer_8/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "1213 bert/encoder/layer_8/output/LayerNorm/batchnorm/sub" -> "1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "1214 bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1" -> "1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; -"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1222 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1218 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; "1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1258 bert/encoder/layer_9/attention/output/add" [label="[]", style=solid]; -"1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" -> "1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" -> "1224 bert/encoder/layer_9/attention/self/value/MatMul" [label="[]", style=solid]; -"1218 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" -> "1219 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"1219 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" -> "1224 bert/encoder/layer_9/attention/self/value/MatMul" [label="[768, 768]", style=solid]; -"1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" -> "1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" -> "1230 bert/encoder/layer_9/attention/self/query/MatMul" [label="[]", style=solid]; -"1222 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" -> "1223 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"1223 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" -> "1238 bert/encoder/layer_9/attention/self/key/MatMul" [label="[]", style=solid]; +"1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" -> "1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" -> "1238 bert/encoder/layer_9/attention/self/key/MatMul" [label="[]", style=solid]; +"1218 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" -> "1219 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"1219 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" -> "1230 bert/encoder/layer_9/attention/self/query/MatMul" [label="[]", style=solid]; +"1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" -> "1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" -> "1224 bert/encoder/layer_9/attention/self/value/MatMul" [label="[]", style=solid]; +"1222 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" -> "1223 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"1223 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" -> "1224 bert/encoder/layer_9/attention/self/value/MatMul" [label="[768, 768]", style=solid]; "1224 bert/encoder/layer_9/attention/self/value/MatMul" -> "1225 bert/encoder/layer_9/attention/self/value/BiasAdd" [label="[]", style=solid]; "1225 bert/encoder/layer_9/attention/self/value/BiasAdd" -> "1226 bert/encoder/layer_9/attention/self/Reshape_2" [label="[]", style=solid]; "1226 bert/encoder/layer_9/attention/self/Reshape_2" -> "1227 bert/encoder/layer_9/attention/self/transpose_2" [label="[]", style=solid]; @@ -3031,18 +3031,18 @@ strict digraph { "1302 bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2" -> "1303 bert/encoder/layer_9/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "1303 bert/encoder/layer_9/output/LayerNorm/batchnorm/sub" -> "1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "1304 bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1" -> "1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; -"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1312 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1308 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; "1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1348 bert/encoder/layer_10/attention/output/add" [label="[]", style=solid]; -"1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" -> "1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" -> "1314 bert/encoder/layer_10/attention/self/value/MatMul" [label="[]", style=solid]; -"1308 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" -> "1309 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"1309 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" -> "1314 bert/encoder/layer_10/attention/self/value/MatMul" [label="[768, 768]", style=solid]; -"1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" -> "1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" -> "1320 bert/encoder/layer_10/attention/self/query/MatMul" [label="[]", style=solid]; -"1312 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" -> "1313 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"1313 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" -> "1328 bert/encoder/layer_10/attention/self/key/MatMul" [label="[]", style=solid]; +"1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" -> "1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" -> "1328 bert/encoder/layer_10/attention/self/key/MatMul" [label="[]", style=solid]; +"1308 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" -> "1309 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"1309 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" -> "1320 bert/encoder/layer_10/attention/self/query/MatMul" [label="[]", style=solid]; +"1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" -> "1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" -> "1314 bert/encoder/layer_10/attention/self/value/MatMul" [label="[]", style=solid]; +"1312 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" -> "1313 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"1313 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" -> "1314 bert/encoder/layer_10/attention/self/value/MatMul" [label="[768, 768]", style=solid]; "1314 bert/encoder/layer_10/attention/self/value/MatMul" -> "1315 bert/encoder/layer_10/attention/self/value/BiasAdd" [label="[]", style=solid]; "1315 bert/encoder/layer_10/attention/self/value/BiasAdd" -> "1316 bert/encoder/layer_10/attention/self/Reshape_2" [label="[]", style=solid]; "1316 bert/encoder/layer_10/attention/self/Reshape_2" -> "1317 bert/encoder/layer_10/attention/self/transpose_2" [label="[]", style=solid]; @@ -3135,18 +3135,18 @@ strict digraph { "1392 bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2" -> "1393 bert/encoder/layer_10/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "1393 bert/encoder/layer_10/output/LayerNorm/batchnorm/sub" -> "1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "1394 bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1" -> "1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; -"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1402 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; +"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1398 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; "1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1438 bert/encoder/layer_11/attention/output/add" [label="[]", style=solid]; -"1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" -> "1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" -> "1404 bert/encoder/layer_11/attention/self/value/MatMul" [label="[]", style=solid]; -"1398 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" -> "1399 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"1399 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" -> "1404 bert/encoder/layer_11/attention/self/value/MatMul" [label="[768, 768]", style=solid]; -"1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" -> "1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" -> "1410 bert/encoder/layer_11/attention/self/query/MatMul" [label="[]", style=solid]; -"1402 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" -> "1403 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"1403 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" -> "1418 bert/encoder/layer_11/attention/self/key/MatMul" [label="[]", style=solid]; +"1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" -> "1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" -> "1418 bert/encoder/layer_11/attention/self/key/MatMul" [label="[]", style=solid]; +"1398 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" -> "1399 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"1399 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" -> "1410 bert/encoder/layer_11/attention/self/query/MatMul" [label="[]", style=solid]; +"1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" -> "1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" -> "1404 bert/encoder/layer_11/attention/self/value/MatMul" [label="[]", style=solid]; +"1402 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" -> "1403 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"1403 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" -> "1404 bert/encoder/layer_11/attention/self/value/MatMul" [label="[768, 768]", style=solid]; "1404 bert/encoder/layer_11/attention/self/value/MatMul" -> "1405 bert/encoder/layer_11/attention/self/value/BiasAdd" [label="[]", style=solid]; "1405 bert/encoder/layer_11/attention/self/value/BiasAdd" -> "1406 bert/encoder/layer_11/attention/self/Reshape_2" [label="[]", style=solid]; "1406 bert/encoder/layer_11/attention/self/Reshape_2" -> "1407 bert/encoder/layer_11/attention/self/transpose_2" [label="[]", style=solid]; diff --git a/tests/onnx/data/reference_graphs/quantization/retinanet-9.dot b/tests/onnx/data/reference_graphs/quantization/retinanet-9.dot index 9d2f66780d5..7aa64281d32 100644 --- a/tests/onnx/data/reference_graphs/quantization/retinanet-9.dot +++ b/tests/onnx/data/reference_graphs/quantization/retinanet-9.dot @@ -855,10 +855,10 @@ strict digraph { "853 QuantizeLinear_backbones.ResNet101FPN.pyramid6.weight_1" [id=853, type=QuantizeLinear]; "854 DequantizeLinear_backbones.ResNet101FPN.pyramid6.weight_1" [id=854, type=DequantizeLinear]; "855 Conv_349" [id=855, type=Conv]; -"856 QuantizeLinear_1028_1" [id=856, type=QuantizeLinear]; -"857 DequantizeLinear_1028_1" [id=857, type=DequantizeLinear]; -"858 QuantizeLinear_1028_2" [id=858, type=QuantizeLinear]; -"859 DequantizeLinear_1028_2" [id=859, type=DequantizeLinear]; +"856 QuantizeLinear_1028_2" [id=856, type=QuantizeLinear]; +"857 DequantizeLinear_1028_2" [id=857, type=DequantizeLinear]; +"858 QuantizeLinear_1028_1" [id=858, type=QuantizeLinear]; +"859 DequantizeLinear_1028_1" [id=859, type=DequantizeLinear]; "860 Relu_350" [id=860, type=Relu]; "861 QuantizeLinear_1029_1" [id=861, type=QuantizeLinear]; "862 DequantizeLinear_1029_1" [id=862, type=DequantizeLinear]; @@ -1983,13 +1983,13 @@ strict digraph { "852 Add_348" -> "866 QuantizeLinear_1027_1" [label="[1, 256, 60, 80]", style=solid]; "853 QuantizeLinear_backbones.ResNet101FPN.pyramid6.weight_1" -> "854 DequantizeLinear_backbones.ResNet101FPN.pyramid6.weight_1" [label="[256, 2048, 3, 3]", style=dashed]; "854 DequantizeLinear_backbones.ResNet101FPN.pyramid6.weight_1" -> "855 Conv_349" [label="[256, 2048, 3, 3]", style=solid]; -"855 Conv_349" -> "856 QuantizeLinear_1028_1" [label="[1, 256, 8, 10]", style=solid]; -"855 Conv_349" -> "858 QuantizeLinear_1028_2" [label="[1, 256, 8, 10]", style=solid]; +"855 Conv_349" -> "856 QuantizeLinear_1028_2" [label="[1, 256, 8, 10]", style=solid]; +"855 Conv_349" -> "858 QuantizeLinear_1028_1" [label="[1, 256, 8, 10]", style=solid]; "855 Conv_349" -> "860 Relu_350" [label="[1, 256, 8, 10]", style=solid]; -"856 QuantizeLinear_1028_1" -> "857 DequantizeLinear_1028_1" [label="[1, 256, 8, 10]", style=dashed]; -"857 DequantizeLinear_1028_1" -> "944 Conv_382" [label="[1, 256, 8, 10]", style=solid]; -"858 QuantizeLinear_1028_2" -> "859 DequantizeLinear_1028_2" [label="[1, 256, 8, 10]", style=dashed]; -"859 DequantizeLinear_1028_2" -> "1041 Conv_427" [label="[1, 256, 8, 10]", style=solid]; +"856 QuantizeLinear_1028_2" -> "857 DequantizeLinear_1028_2" [label="[1, 256, 8, 10]", style=dashed]; +"857 DequantizeLinear_1028_2" -> "1041 Conv_427" [label="[1, 256, 8, 10]", style=solid]; +"858 QuantizeLinear_1028_1" -> "859 DequantizeLinear_1028_1" [label="[1, 256, 8, 10]", style=dashed]; +"859 DequantizeLinear_1028_1" -> "944 Conv_382" [label="[1, 256, 8, 10]", style=solid]; "860 Relu_350" -> "861 QuantizeLinear_1029_1" [label="[1, 256, 8, 10]", style=solid]; "861 QuantizeLinear_1029_1" -> "862 DequantizeLinear_1029_1" [label="[1, 256, 8, 10]", style=dashed]; "862 DequantizeLinear_1029_1" -> "865 Conv_351" [label="[1, 256, 8, 10]", style=solid]; From 1c8313dc25581b8c1679f76f2f612278bb50ca18 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 5 Oct 2023 10:45:54 +0200 Subject: [PATCH 16/21] docstrings --- nncf/onnx/graph/model_transformer.py | 7 ++++--- nncf/onnx/graph/nncf_graph_builder.py | 8 ++++---- nncf/onnx/graph/onnx_helper.py | 18 +++++++++++++++--- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/nncf/onnx/graph/model_transformer.py b/nncf/onnx/graph/model_transformer.py index 91da8af6eec..8a987d55f17 100644 --- a/nncf/onnx/graph/model_transformer.py +++ b/nncf/onnx/graph/model_transformer.py @@ -56,7 +56,7 @@ def _get_target_edge( port_id: int, node_name: str, transform_type: TargetType, - node_mapping, + node_mapping: Dict[str, onnx.NodeProto], input_edges_mapping: Dict[str, str], ) -> str: """ @@ -65,8 +65,9 @@ def _get_target_edge( :param port_id: Edge number of port. :param node_name: Node name. :param transform_type: Type of transformation. + :param node_mapping: Mapping from a node name to the node. :param input_edges_mapping: Mapping between NNCF Input nodes and - the following ONNX nodes and corresponding input port id. + the following ONNX nodes and corresponding input port id. :return: Target edge name. """ if transform_type in [TargetType.PRE_LAYER_OPERATION, TargetType.OPERATION_WITH_WEIGHTS]: @@ -310,7 +311,7 @@ def _insert_quantizer_dequantizer( :param model: Model to insert new nodes. :param transformation: QuantizeLinear-DequantizeLinear insertion transformation. - :param edge_node_mapping: Mapping describing start and consumed nodes of the edges. + :param children_node_mapping: Mapping from edge name to nodes which consume this edge as an input. :return: Updated model with inserted QuantizeLinear-DequantizeLinear pair. """ node_mapping = get_node_mapping(model) diff --git a/nncf/onnx/graph/nncf_graph_builder.py b/nncf/onnx/graph/nncf_graph_builder.py index aa7f9e09201..2327c7f3903 100644 --- a/nncf/onnx/graph/nncf_graph_builder.py +++ b/nncf/onnx/graph/nncf_graph_builder.py @@ -238,7 +238,7 @@ def _add_nncf_input_nodes( model: onnx.ModelProto, nncf_graph: NNCFGraph, edge_info_mapping: Dict[str, onnx.ValueInfoProto], - children_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], + children_node_mapping: Dict[str, List[onnx.NodeProto]], ) -> None: """ Adds special NNCF Input nodes to NNCFGraph. @@ -246,7 +246,7 @@ def _add_nncf_input_nodes( :param model: ONNX model. :param nncf_graph: NNCFGraph, in which the new nodes will be added. :param edge_info_mapping: Mapping from edge name to the edge info. - :param edge_node_mapping: Mapping describing start and consumed nodes of the edges. + :param children_node_mapping: Mapping from edge name to nodes which consume this edge as an input. :return: None. """ for i, _input in enumerate(get_model_inputs(model)): @@ -285,7 +285,7 @@ def _add_nncf_output_nodes( model: onnx.ModelProto, nncf_graph: NNCFGraph, edge_info_mapping: Dict[str, onnx.ValueInfoProto], - parents_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], + parents_node_mapping: Dict[str, List[onnx.NodeProto]], ) -> None: """ Adds special NNCF Output nodes to NNCFGraph. @@ -293,7 +293,7 @@ def _add_nncf_output_nodes( :param model: ONNX model. :param nncf_graph: NNCFGraph, in which the new nodes will be added. :param edge_info_mapping: Mapping from edge name to the edge info. - :param edge_node_mapping: Mapping describing start and consumed nodes of the edges. + :param parents_node_mapping: Mapping from edge name to node which outputs this edge. :return: None. """ for i, _output in enumerate(model.graph.output): diff --git a/nncf/onnx/graph/onnx_helper.py b/nncf/onnx/graph/onnx_helper.py index 81c1eaf43bd..5e026dbe523 100644 --- a/nncf/onnx/graph/onnx_helper.py +++ b/nncf/onnx/graph/onnx_helper.py @@ -39,7 +39,13 @@ def get_edge_info_mapping(model: onnx.ModelProto) -> Dict[str, onnx.ValueInfoPro } -def get_children_node_mapping(model: onnx.ModelProto) -> Dict[str, List[onnx.ValueInfoProto]]: +def get_children_node_mapping(model: onnx.ModelProto) -> Dict[str, List[onnx.NodeProto]]: + """ + Returns a mapping from edge name to nodes which consume this edge as an input. + + :param model: ONNX model. + :return: Mapping from edge name to nodes which consume this edge as an input. + """ output = defaultdict(list) for node in model.graph.node: for input_edge in node.input: @@ -47,8 +53,14 @@ def get_children_node_mapping(model: onnx.ModelProto) -> Dict[str, List[onnx.Val return output -def get_parents_node_mapping(model: onnx.ModelProto) -> Dict[str, onnx.ValueInfoProto]: - output = defaultdict(list) +def get_parents_node_mapping(model: onnx.ModelProto) -> Dict[str, onnx.NodeProto]: + """ + Returns a mapping from edge name to node which outputs this edge. + + :param model: ONNX model. + :return: Mapping from edge name to node which outputs this edge. + """ + output = {} for node in model.graph.node: for input_edge in node.output: output[input_edge] = node From cfefa322a9e8b28c4df9b9ea199eee6493cb77bf Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 5 Oct 2023 13:25:15 +0200 Subject: [PATCH 17/21] fix bug; add docstrings --- nncf/onnx/graph/metatypes/onnx_metatypes.py | 3 ++- nncf/onnx/graph/nncf_graph_builder.py | 10 +++++----- nncf/onnx/graph/onnx_helper.py | 18 ++++++++---------- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/nncf/onnx/graph/metatypes/onnx_metatypes.py b/nncf/onnx/graph/metatypes/onnx_metatypes.py index 48aabc87a85..4657012eadf 100644 --- a/nncf/onnx/graph/metatypes/onnx_metatypes.py +++ b/nncf/onnx/graph/metatypes/onnx_metatypes.py @@ -649,7 +649,7 @@ def get_tensor_edge_name( model: onnx.ModelProto, node: onnx.NodeProto, port_id: int, - parents_node_mapping: Dict[str, List[onnx.ValueInfoProto]], + parents_node_mapping: Dict[str, onnx.NodeProto], ) -> Optional[str]: """ Returns an edge name associated with a weight of a node laying on an input port_id. @@ -670,6 +670,7 @@ def get_tensor_edge_name( :param model: ONNX model. :param node: Node. :param port_id: Port id on which a weight edge is seeking. + :param parents_node_mapping: Mapping from edge name to node which outputs this edge. :return: Edge name associated with a weight. """ PROPAGATING_NODES = ( diff --git a/nncf/onnx/graph/nncf_graph_builder.py b/nncf/onnx/graph/nncf_graph_builder.py index 2327c7f3903..dd40b5a4a4a 100644 --- a/nncf/onnx/graph/nncf_graph_builder.py +++ b/nncf/onnx/graph/nncf_graph_builder.py @@ -115,7 +115,7 @@ def get_bias_tensor_port_id(metatype: ONNXOpWithWeightsMetatype) -> Optional[int def _get_weight_port_ids( node: onnx.NodeProto, model: onnx.ModelProto, - parents_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], + parents_node_mapping: Dict[str, onnx.NodeProto], ) -> Set[int]: """ Returns all weight input ports. @@ -124,7 +124,7 @@ def _get_weight_port_ids( :param node: ONNX node. :param model: ONNX model. - :param edge_node_mapping: Mapping describing start and consumed nodes of the edges. + :param parents_node_mapping: Mapping from edge name to node which outputs this edge. :return: Port ids with weights. """ port_ids = set() @@ -185,14 +185,14 @@ def _get_node_attrs(node: onnx.NodeProto, model: onnx.ModelProto) -> Dict[str, A def _get_bias_attr( node: onnx.NodeProto, model: onnx.ModelProto, - parents_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], + parents_node_mapping: Dict[str, onnx.NodeProto], ) -> Dict[str, str]: """ Returns bias tensor attributes. :param node: ONNX node. :param model: ONNX model. - :param edge_node_mapping: Mapping describing start and consumed nodes of the edges. + :param parents_node_mapping: Mapping from edge name to node which outputs this edge. :return: Bias tensor attributes. """ bias_attrs = {} @@ -285,7 +285,7 @@ def _add_nncf_output_nodes( model: onnx.ModelProto, nncf_graph: NNCFGraph, edge_info_mapping: Dict[str, onnx.ValueInfoProto], - parents_node_mapping: Dict[str, List[onnx.NodeProto]], + parents_node_mapping: Dict[str, onnx.NodeProto], ) -> None: """ Adds special NNCF Output nodes to NNCFGraph. diff --git a/nncf/onnx/graph/onnx_helper.py b/nncf/onnx/graph/onnx_helper.py index 5e026dbe523..7438335d382 100644 --- a/nncf/onnx/graph/onnx_helper.py +++ b/nncf/onnx/graph/onnx_helper.py @@ -48,8 +48,8 @@ def get_children_node_mapping(model: onnx.ModelProto) -> Dict[str, List[onnx.Nod """ output = defaultdict(list) for node in model.graph.node: - for input_edge in node.input: - output[input_edge].append(node) + for edge in node.input: + output[edge].append(node) return output @@ -62,8 +62,8 @@ def get_parents_node_mapping(model: onnx.ModelProto) -> Dict[str, onnx.NodeProto """ output = {} for node in model.graph.node: - for input_edge in node.output: - output[input_edge] = node + for edge in node.output: + output[edge] = node return output @@ -243,7 +243,7 @@ def get_edge_dtype(edge: Union[onnx.ValueInfoProto, onnx.TensorProto]) -> int: def get_parent( node: onnx.NodeProto, port_id: int, - parents_node_mapping: Dict[str, List[onnx.ValueInfoProto]], + parents_node_mapping: Dict[str, onnx.NodeProto], ) -> Optional[onnx.NodeProto]: """ Returns parents of the node. If there is no parent node, returns None. @@ -254,13 +254,11 @@ def get_parent( :return: Parent node. """ if port_id < len(node.input): - return parents_node_mapping[node.input[port_id]] + return parents_node_mapping.get(node.input[port_id]) return None -def get_children( - node: onnx.NodeProto, children_node_mapping: Dict[str, List[onnx.ValueInfoProto]] -) -> List[onnx.NodeProto]: +def get_children(node: onnx.NodeProto, children_node_mapping: Dict[str, List[onnx.NodeProto]]) -> List[onnx.NodeProto]: """ Returns children of the node. @@ -277,7 +275,7 @@ def get_children( def is_node_has_shared_weight( node: onnx.NodeProto, weight_port_id: int, - children_node_mapping: Dict[str, Tuple[onnx.ValueInfoProto, List[onnx.ValueInfoProto]]], + children_node_mapping: Dict[str, List[onnx.NodeProto]], ) -> bool: """ Returns whether the node share a weight. From 59b8ad2eb72d90c8b4ffa703bf3b104b6ef6303c Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 5 Oct 2023 15:26:55 +0200 Subject: [PATCH 18/21] unused imports --- nncf/onnx/graph/metatypes/onnx_metatypes.py | 2 +- nncf/onnx/graph/model_transformer.py | 1 - nncf/onnx/graph/nncf_graph_builder.py | 2 +- nncf/onnx/graph/onnx_helper.py | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/nncf/onnx/graph/metatypes/onnx_metatypes.py b/nncf/onnx/graph/metatypes/onnx_metatypes.py index 4657012eadf..0148c99ab7e 100644 --- a/nncf/onnx/graph/metatypes/onnx_metatypes.py +++ b/nncf/onnx/graph/metatypes/onnx_metatypes.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Optional, Tuple, Type +from typing import Dict, List, Optional, Type import onnx diff --git a/nncf/onnx/graph/model_transformer.py b/nncf/onnx/graph/model_transformer.py index 8a987d55f17..e89233102fd 100644 --- a/nncf/onnx/graph/model_transformer.py +++ b/nncf/onnx/graph/model_transformer.py @@ -25,7 +25,6 @@ from nncf.onnx.graph.onnx_helper import get_edge_info_mapping from nncf.onnx.graph.onnx_helper import get_node_index from nncf.onnx.graph.onnx_helper import get_node_mapping -from nncf.onnx.graph.onnx_helper import get_parents_node_mapping from nncf.onnx.graph.onnx_helper import get_tensor from nncf.onnx.graph.transformations.commands import ONNXBiasCorrectionCommand from nncf.onnx.graph.transformations.commands import ONNXModelExtractionCommand diff --git a/nncf/onnx/graph/nncf_graph_builder.py b/nncf/onnx/graph/nncf_graph_builder.py index dd40b5a4a4a..148de756c30 100644 --- a/nncf/onnx/graph/nncf_graph_builder.py +++ b/nncf/onnx/graph/nncf_graph_builder.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from collections import Counter -from typing import Any, Dict, List, Optional, Set, Tuple +from typing import Any, Dict, List, Optional, Set import onnx diff --git a/nncf/onnx/graph/onnx_helper.py b/nncf/onnx/graph/onnx_helper.py index 7438335d382..157b458f48d 100644 --- a/nncf/onnx/graph/onnx_helper.py +++ b/nncf/onnx/graph/onnx_helper.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from collections import defaultdict -from typing import Dict, Iterator, List, Optional, Tuple, Union +from typing import Dict, Iterator, List, Optional, Union import numpy as np import onnx From 11f91f77bc2d2f346760e72a88c3d7987b52f0c8 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 9 Oct 2023 12:00:44 +0200 Subject: [PATCH 19/21] update method name --- nncf/onnx/graph/model_transformer.py | 12 ++++++------ nncf/onnx/graph/onnx_helper.py | 4 ++-- nncf/onnx/statistics/aggregator.py | 4 ++-- .../algorithms/bias_correction/onnx_backend.py | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/nncf/onnx/graph/model_transformer.py b/nncf/onnx/graph/model_transformer.py index e89233102fd..b6db3d36b0d 100644 --- a/nncf/onnx/graph/model_transformer.py +++ b/nncf/onnx/graph/model_transformer.py @@ -23,8 +23,8 @@ from nncf.onnx.graph.onnx_helper import get_children_node_mapping from nncf.onnx.graph.onnx_helper import get_edge_dtype from nncf.onnx.graph.onnx_helper import get_edge_info_mapping +from nncf.onnx.graph.onnx_helper import get_name_to_node_map from nncf.onnx.graph.onnx_helper import get_node_index -from nncf.onnx.graph.onnx_helper import get_node_mapping from nncf.onnx.graph.onnx_helper import get_tensor from nncf.onnx.graph.transformations.commands import ONNXBiasCorrectionCommand from nncf.onnx.graph.transformations.commands import ONNXModelExtractionCommand @@ -131,7 +131,7 @@ def _apply_output_insertion_transformations( :return: New model with inserted outputs. """ model_outputs = set(output.name for output in self._model.graph.output) - node_mapping = get_node_mapping(self._model) + node_mapping = get_name_to_node_map(self._model) for transformation in transformations: port_id = transformation.target_point.port_id node_name = transformation.target_point.target_node_name @@ -313,7 +313,7 @@ def _insert_quantizer_dequantizer( :param children_node_mapping: Mapping from edge name to nodes which consume this edge as an input. :return: Updated model with inserted QuantizeLinear-DequantizeLinear pair. """ - node_mapping = get_node_mapping(model) + node_mapping = get_name_to_node_map(model) target_edge_name = self._get_quantizer_dequantizer_edge_name(transformation, node_mapping) quantizer, dequantizer = self._get_quantize_dequantize_nodes(transformation, target_edge_name) onnx_scale_tensor, onnx_zero_point_tensor = ONNXModelTransformer._get_scale_zero_point_tensors( @@ -363,7 +363,7 @@ def _apply_bias_correction_transformations( :param transformations: Bias correction transformations. :return: Copy of original model with updated biases. """ - node_mapping = get_node_mapping(model) + node_mapping = get_name_to_node_map(model) for transformation in transformations: bias_tensor_position = transformation.target_point.port_id node_name = transformation.target_point.target_node_name @@ -383,7 +383,7 @@ def _apply_model_extraction_transformation(self, transformation: ONNXModelExtrac :return: Extracted sub-model. """ input_tensor_names = [] - node_mapping = get_node_mapping(self._model) + node_mapping = get_name_to_node_map(self._model) for input_node_name in transformation.inputs: input_onnx_node = node_mapping[input_node_name] input_tensor_names.append(input_onnx_node.input[0]) @@ -409,7 +409,7 @@ def _apply_qdq_node_removing_transformations( :return: Model with removed nodes. """ for transformation in transformations: - node_mapping = get_node_mapping(model) + node_mapping = get_name_to_node_map(model) children_node_mapping = get_children_node_mapping(model) node = node_mapping[transformation.target_point.target_node_name] diff --git a/nncf/onnx/graph/onnx_helper.py b/nncf/onnx/graph/onnx_helper.py index 157b458f48d..f6b082050a0 100644 --- a/nncf/onnx/graph/onnx_helper.py +++ b/nncf/onnx/graph/onnx_helper.py @@ -16,9 +16,9 @@ from onnx import numpy_helper -def get_node_mapping(model: onnx.ModelProto) -> Dict[str, onnx.NodeProto]: +def get_name_to_node_map(model: onnx.ModelProto) -> Dict[str, onnx.NodeProto]: """ - Retuns mapping from node name to the node. + Returns mapping from node name to the node. :param model: Model from mapping is built. :return: Mapping. diff --git a/nncf/onnx/statistics/aggregator.py b/nncf/onnx/statistics/aggregator.py index 6c5c3c6aef8..a768a855258 100644 --- a/nncf/onnx/statistics/aggregator.py +++ b/nncf/onnx/statistics/aggregator.py @@ -22,7 +22,7 @@ from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.onnx.graph.node_utils import get_input_edge from nncf.onnx.graph.node_utils import get_input_edges_mapping -from nncf.onnx.graph.onnx_helper import get_node_mapping +from nncf.onnx.graph.onnx_helper import get_name_to_node_map from nncf.onnx.graph.transformations.commands import ONNXOutputInsertionCommand from nncf.onnx.tensor import ONNXNNCFTensor @@ -30,7 +30,7 @@ class ONNXStatisticsAggregator(StatisticsAggregator): def collect_statistics(self, model: onnx.ModelProto, graph: NNCFGraph) -> None: self.input_edges_mapping = get_input_edges_mapping(graph) - self.node_mapping = get_node_mapping(model) + self.node_mapping = get_name_to_node_map(model) self._registered_weights = set() super().collect_statistics(model, graph) diff --git a/nncf/quantization/algorithms/bias_correction/onnx_backend.py b/nncf/quantization/algorithms/bias_correction/onnx_backend.py index fea6163b984..d7f34936bfd 100644 --- a/nncf/quantization/algorithms/bias_correction/onnx_backend.py +++ b/nncf/quantization/algorithms/bias_correction/onnx_backend.py @@ -22,7 +22,7 @@ from nncf.onnx.graph.node_utils import get_bias_value from nncf.onnx.graph.node_utils import is_any_weight_quantized from nncf.onnx.graph.node_utils import is_node_with_bias -from nncf.onnx.graph.onnx_helper import get_node_mapping +from nncf.onnx.graph.onnx_helper import get_name_to_node_map from nncf.onnx.graph.transformations.command_creation import create_bias_correction_command from nncf.onnx.graph.transformations.commands import ONNXBiasCorrectionCommand from nncf.onnx.graph.transformations.commands import ONNXModelExtractionCommand @@ -101,12 +101,12 @@ def get_bias_value(node: NNCFNode, model: onnx.ModelProto, nncf_graph: NNCFGraph @staticmethod def get_input_name(model: onnx.ModelProto, node_name: str) -> str: - node_mapping = get_node_mapping(model) + node_mapping = get_name_to_node_map(model) return node_mapping[node_name].input[0] @staticmethod def get_output_name(model: onnx.ModelProto, node_name: str, output_id: int) -> List[str]: - node_mapping = get_node_mapping(model) + node_mapping = get_name_to_node_map(model) return node_mapping[node_name].output[output_id] @staticmethod From 352a419ea07aeea5ff4aa89a28e12cb27a6abc1c Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 9 Oct 2023 14:09:59 +0200 Subject: [PATCH 20/21] reusing mapping inside _insert_quantizer_dequantizer --- nncf/onnx/graph/model_transformer.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/nncf/onnx/graph/model_transformer.py b/nncf/onnx/graph/model_transformer.py index b6db3d36b0d..877edb6838b 100644 --- a/nncf/onnx/graph/model_transformer.py +++ b/nncf/onnx/graph/model_transformer.py @@ -199,9 +199,10 @@ def _apply_quantizer_insertion_transformations( :return: New model with inserted QuantizeLinear-DequantizeLinear nodes pairs. """ self._added_target_edges = Counter() + node_mapping = get_name_to_node_map(model) + children_node_mapping = get_children_node_mapping(model) for transformation in transformations: - children_node_mapping = get_children_node_mapping(model) - model = self._insert_quantizer_dequantizer(model, transformation, children_node_mapping) + model = self._insert_quantizer_dequantizer(model, transformation, node_mapping, children_node_mapping) return model def _get_quantize_dequantize_nodes( @@ -303,6 +304,7 @@ def _insert_quantizer_dequantizer( self, model: onnx.ModelProto, transformation: ONNXQuantizerInsertionCommand, + node_mapping: Dict[str, onnx.NodeProto], children_node_mapping: Dict[str, List[onnx.ValueInfoProto]], ) -> onnx.ModelProto: """ @@ -310,10 +312,10 @@ def _insert_quantizer_dequantizer( :param model: Model to insert new nodes. :param transformation: QuantizeLinear-DequantizeLinear insertion transformation. + :param node_mapping: Mapping from node name to the node. :param children_node_mapping: Mapping from edge name to nodes which consume this edge as an input. :return: Updated model with inserted QuantizeLinear-DequantizeLinear pair. """ - node_mapping = get_name_to_node_map(model) target_edge_name = self._get_quantizer_dequantizer_edge_name(transformation, node_mapping) quantizer, dequantizer = self._get_quantize_dequantize_nodes(transformation, target_edge_name) onnx_scale_tensor, onnx_zero_point_tensor = ONNXModelTransformer._get_scale_zero_point_tensors( From 903a87c95f74cfcdc427ac36fb59dae202d7641b Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 10 Oct 2023 15:22:48 +0200 Subject: [PATCH 21/21] upd graphs --- .../quantization/MaskRCNN-12.dot | 100 ++-- .../quantization/bertsquad-12.dot | 456 +++++++++--------- .../quantization/retinanet-9.dot | 20 +- 3 files changed, 288 insertions(+), 288 deletions(-) diff --git a/tests/onnx/data/reference_graphs/quantization/MaskRCNN-12.dot b/tests/onnx/data/reference_graphs/quantization/MaskRCNN-12.dot index 0775c236c76..e2a0350705f 100644 --- a/tests/onnx/data/reference_graphs/quantization/MaskRCNN-12.dot +++ b/tests/onnx/data/reference_graphs/quantization/MaskRCNN-12.dot @@ -481,10 +481,10 @@ strict digraph { "479 QuantizeLinear_389_1" [id=479, type=QuantizeLinear]; "480 DequantizeLinear_389_1" [id=480, type=DequantizeLinear]; "481 390" [id=481, type=Conv]; -"482 QuantizeLinear_391_2" [id=482, type=QuantizeLinear]; -"483 DequantizeLinear_391_2" [id=483, type=DequantizeLinear]; -"484 QuantizeLinear_391_1" [id=484, type=QuantizeLinear]; -"485 DequantizeLinear_391_1" [id=485, type=DequantizeLinear]; +"482 QuantizeLinear_391_1" [id=482, type=QuantizeLinear]; +"483 DequantizeLinear_391_1" [id=483, type=DequantizeLinear]; +"484 QuantizeLinear_391_2" [id=484, type=QuantizeLinear]; +"485 DequantizeLinear_391_2" [id=485, type=DequantizeLinear]; "486 487" [id=486, type=MaxPool]; "487 QuantizeLinear_489_1" [id=487, type=QuantizeLinear]; "488 DequantizeLinear_489_1" [id=488, type=DequantizeLinear]; @@ -1749,14 +1749,14 @@ strict digraph { "1747 1172" [id=1747, type=Gather]; "1748 2479" [id=1748, type=Concat]; "1749 2490" [id=1749, type=Gather]; -"1750 QuantizeLinear_2527_4" [id=1750, type=QuantizeLinear]; -"1751 DequantizeLinear_2527_4" [id=1751, type=DequantizeLinear]; -"1752 QuantizeLinear_2527_3" [id=1752, type=QuantizeLinear]; -"1753 DequantizeLinear_2527_3" [id=1753, type=DequantizeLinear]; -"1754 QuantizeLinear_2527_2" [id=1754, type=QuantizeLinear]; -"1755 DequantizeLinear_2527_2" [id=1755, type=DequantizeLinear]; -"1756 QuantizeLinear_2527_1" [id=1756, type=QuantizeLinear]; -"1757 DequantizeLinear_2527_1" [id=1757, type=DequantizeLinear]; +"1750 QuantizeLinear_2527_1" [id=1750, type=QuantizeLinear]; +"1751 DequantizeLinear_2527_1" [id=1751, type=DequantizeLinear]; +"1752 QuantizeLinear_2527_2" [id=1752, type=QuantizeLinear]; +"1753 DequantizeLinear_2527_2" [id=1753, type=DequantizeLinear]; +"1754 QuantizeLinear_2527_3" [id=1754, type=QuantizeLinear]; +"1755 DequantizeLinear_2527_3" [id=1755, type=DequantizeLinear]; +"1756 QuantizeLinear_2527_4" [id=1756, type=QuantizeLinear]; +"1757 DequantizeLinear_2527_4" [id=1757, type=DequantizeLinear]; "1758 2532" [id=1758, type=Slice]; "1759 2534" [id=1759, type=Gather]; "1760 2525" [id=1760, type=Slice]; @@ -3692,14 +3692,14 @@ strict digraph { "3690 3030" [id=3690, type=Gather]; "3691 6518" [id=3691, type=Concat]; "3692 6530" [id=3692, type=Gather]; -"3693 QuantizeLinear_6568_4" [id=3693, type=QuantizeLinear]; -"3694 DequantizeLinear_6568_4" [id=3694, type=DequantizeLinear]; -"3695 QuantizeLinear_6568_3" [id=3695, type=QuantizeLinear]; -"3696 DequantizeLinear_6568_3" [id=3696, type=DequantizeLinear]; -"3697 QuantizeLinear_6568_2" [id=3697, type=QuantizeLinear]; -"3698 DequantizeLinear_6568_2" [id=3698, type=DequantizeLinear]; -"3699 QuantizeLinear_6568_1" [id=3699, type=QuantizeLinear]; -"3700 DequantizeLinear_6568_1" [id=3700, type=DequantizeLinear]; +"3693 QuantizeLinear_6568_1" [id=3693, type=QuantizeLinear]; +"3694 DequantizeLinear_6568_1" [id=3694, type=DequantizeLinear]; +"3695 QuantizeLinear_6568_2" [id=3695, type=QuantizeLinear]; +"3696 DequantizeLinear_6568_2" [id=3696, type=DequantizeLinear]; +"3697 QuantizeLinear_6568_3" [id=3697, type=QuantizeLinear]; +"3698 DequantizeLinear_6568_3" [id=3698, type=DequantizeLinear]; +"3699 QuantizeLinear_6568_4" [id=3699, type=QuantizeLinear]; +"3700 DequantizeLinear_6568_4" [id=3700, type=DequantizeLinear]; "3701 6576" [id=3701, type=Slice]; "3702 6578" [id=3702, type=Gather]; "3703 6569" [id=3703, type=Slice]; @@ -4788,16 +4788,16 @@ strict digraph { "478 DequantizeLinear_388_1" -> "481 390" [label="[1, 256, -1, -1]", style=solid]; "479 QuantizeLinear_389_1" -> "480 DequantizeLinear_389_1" [label="[256, 256, 3, 3]", style=dashed]; "480 DequantizeLinear_389_1" -> "481 390" [label="[256, 256, 3, 3]", style=solid]; -"481 390" -> "482 QuantizeLinear_391_2" [label="[1, 256, -1, -1]", style=solid]; -"481 390" -> "484 QuantizeLinear_391_1" [label="[1, 256, -1, -1]", style=solid]; +"481 390" -> "482 QuantizeLinear_391_1" [label="[1, 256, -1, -1]", style=solid]; +"481 390" -> "484 QuantizeLinear_391_2" [label="[1, 256, -1, -1]", style=solid]; "481 390" -> "784 536" [label="[1, 256, -1, -1]", style=solid]; "481 390" -> "787 533" [label="[1, 256, -1, -1]", style=solid]; "481 390" -> "1929 2620" [label="[1, 256, -1, -1]", style=solid]; "481 390" -> "3872 6664" [label="[1, 256, -1, -1]", style=solid]; -"482 QuantizeLinear_391_2" -> "483 DequantizeLinear_391_2" [label="[1, 256, -1, -1]", style=dashed]; -"483 DequantizeLinear_391_2" -> "722 506" [label="[1, 256, -1, -1]", style=solid]; -"484 QuantizeLinear_391_1" -> "485 DequantizeLinear_391_1" [label="[1, 256, -1, -1]", style=dashed]; -"485 DequantizeLinear_391_1" -> "486 487" [label="[1, 256, -1, -1]", style=solid]; +"482 QuantizeLinear_391_1" -> "483 DequantizeLinear_391_1" [label="[1, 256, -1, -1]", style=dashed]; +"483 DequantizeLinear_391_1" -> "486 487" [label="[1, 256, -1, -1]", style=solid]; +"484 QuantizeLinear_391_2" -> "485 DequantizeLinear_391_2" [label="[1, 256, -1, -1]", style=dashed]; +"485 DequantizeLinear_391_2" -> "722 506" [label="[1, 256, -1, -1]", style=solid]; "486 487" -> "489 510" [label="[1, 256, -1, -1]", style=solid]; "486 487" -> "555 542" [label="[1, 256, -1, -1]", style=solid]; "486 487" -> "558 539" [label="[1, 256, -1, -1]", style=solid]; @@ -6341,21 +6341,21 @@ strict digraph { "1746 1171" -> "1747 1172" [label="[-1]", style=dashed]; "1747 1172" -> "1748 2479" [label="[-1, 4]", style=solid]; "1748 2479" -> "1749 2490" [label="[-1, 4]", style=solid]; -"1749 2490" -> "1750 QuantizeLinear_2527_4" [label="[]", style=solid]; -"1749 2490" -> "1752 QuantizeLinear_2527_3" [label="[]", style=solid]; -"1749 2490" -> "1754 QuantizeLinear_2527_2" [label="[]", style=solid]; -"1749 2490" -> "1756 QuantizeLinear_2527_1" [label="[]", style=solid]; +"1749 2490" -> "1750 QuantizeLinear_2527_1" [label="[]", style=solid]; +"1749 2490" -> "1752 QuantizeLinear_2527_2" [label="[]", style=solid]; +"1749 2490" -> "1754 QuantizeLinear_2527_3" [label="[]", style=solid]; +"1749 2490" -> "1756 QuantizeLinear_2527_4" [label="[]", style=solid]; "1749 2490" -> "1803 2495" [label="[]", style=solid]; "1749 2490" -> "1807 2503" [label="[]", style=solid]; "1749 2490" -> "2009 2775" [label="[]", style=solid]; -"1750 QuantizeLinear_2527_4" -> "1751 DequantizeLinear_2527_4" [label="[]", style=dashed]; -"1751 DequantizeLinear_2527_4" -> "1768 2508" [label="[]", style=solid]; -"1752 QuantizeLinear_2527_3" -> "1753 DequantizeLinear_2527_3" [label="[]", style=dashed]; -"1753 DequantizeLinear_2527_3" -> "1766 2515" [label="[]", style=solid]; -"1754 QuantizeLinear_2527_2" -> "1755 DequantizeLinear_2527_2" [label="[]", style=dashed]; -"1755 DequantizeLinear_2527_2" -> "1760 2525" [label="[]", style=solid]; -"1756 QuantizeLinear_2527_1" -> "1757 DequantizeLinear_2527_1" [label="[]", style=dashed]; -"1757 DequantizeLinear_2527_1" -> "1758 2532" [label="[]", style=solid]; +"1750 QuantizeLinear_2527_1" -> "1751 DequantizeLinear_2527_1" [label="[]", style=dashed]; +"1751 DequantizeLinear_2527_1" -> "1758 2532" [label="[]", style=solid]; +"1752 QuantizeLinear_2527_2" -> "1753 DequantizeLinear_2527_2" [label="[]", style=dashed]; +"1753 DequantizeLinear_2527_2" -> "1760 2525" [label="[]", style=solid]; +"1754 QuantizeLinear_2527_3" -> "1755 DequantizeLinear_2527_3" [label="[]", style=dashed]; +"1755 DequantizeLinear_2527_3" -> "1766 2515" [label="[]", style=solid]; +"1756 QuantizeLinear_2527_4" -> "1757 DequantizeLinear_2527_4" [label="[]", style=dashed]; +"1757 DequantizeLinear_2527_4" -> "1768 2508" [label="[]", style=solid]; "1758 2532" -> "1759 2534" [label="[]", style=solid]; "1759 2534" -> "1762 2535" [label="[]", style=solid]; "1760 2525" -> "1761 2527" [label="[]", style=solid]; @@ -8988,21 +8988,21 @@ strict digraph { "3690 3030" -> "3691 6518" [label="[]", style=solid]; "3690 3030" -> "4259 3037" [label="[]", style=solid]; "3691 6518" -> "3692 6530" [label="[]", style=solid]; -"3692 6530" -> "3693 QuantizeLinear_6568_4" [label="[-1, 4]", style=solid]; -"3692 6530" -> "3695 QuantizeLinear_6568_3" [label="[-1, 4]", style=solid]; -"3692 6530" -> "3697 QuantizeLinear_6568_2" [label="[-1, 4]", style=solid]; -"3692 6530" -> "3699 QuantizeLinear_6568_1" [label="[-1, 4]", style=solid]; +"3692 6530" -> "3693 QuantizeLinear_6568_1" [label="[-1, 4]", style=solid]; +"3692 6530" -> "3695 QuantizeLinear_6568_2" [label="[-1, 4]", style=solid]; +"3692 6530" -> "3697 QuantizeLinear_6568_3" [label="[-1, 4]", style=solid]; +"3692 6530" -> "3699 QuantizeLinear_6568_4" [label="[-1, 4]", style=solid]; "3692 6530" -> "3746 6539" [label="[-1, 4]", style=solid]; "3692 6530" -> "3750 6547" [label="[-1, 4]", style=solid]; "3692 6530" -> "4281 nncf_model_output_0" [label="[-1, 4]", style=solid]; -"3693 QuantizeLinear_6568_4" -> "3694 DequantizeLinear_6568_4" [label="[-1, 4]", style=dashed]; -"3694 DequantizeLinear_6568_4" -> "3711 6552" [label="[-1, 4]", style=solid]; -"3695 QuantizeLinear_6568_3" -> "3696 DequantizeLinear_6568_3" [label="[-1, 4]", style=dashed]; -"3696 DequantizeLinear_6568_3" -> "3709 6559" [label="[-1, 4]", style=solid]; -"3697 QuantizeLinear_6568_2" -> "3698 DequantizeLinear_6568_2" [label="[-1, 4]", style=dashed]; -"3698 DequantizeLinear_6568_2" -> "3703 6569" [label="[-1, 4]", style=solid]; -"3699 QuantizeLinear_6568_1" -> "3700 DequantizeLinear_6568_1" [label="[-1, 4]", style=dashed]; -"3700 DequantizeLinear_6568_1" -> "3701 6576" [label="[-1, 4]", style=solid]; +"3693 QuantizeLinear_6568_1" -> "3694 DequantizeLinear_6568_1" [label="[-1, 4]", style=dashed]; +"3694 DequantizeLinear_6568_1" -> "3701 6576" [label="[-1, 4]", style=solid]; +"3695 QuantizeLinear_6568_2" -> "3696 DequantizeLinear_6568_2" [label="[-1, 4]", style=dashed]; +"3696 DequantizeLinear_6568_2" -> "3703 6569" [label="[-1, 4]", style=solid]; +"3697 QuantizeLinear_6568_3" -> "3698 DequantizeLinear_6568_3" [label="[-1, 4]", style=dashed]; +"3698 DequantizeLinear_6568_3" -> "3709 6559" [label="[-1, 4]", style=solid]; +"3699 QuantizeLinear_6568_4" -> "3700 DequantizeLinear_6568_4" [label="[-1, 4]", style=dashed]; +"3700 DequantizeLinear_6568_4" -> "3711 6552" [label="[-1, 4]", style=solid]; "3701 6576" -> "3702 6578" [label="[-1, 4]", style=solid]; "3702 6578" -> "3705 6579" [label="[-1]", style=solid]; "3703 6569" -> "3704 6571" [label="[-1, 4]", style=solid]; diff --git a/tests/onnx/data/reference_graphs/quantization/bertsquad-12.dot b/tests/onnx/data/reference_graphs/quantization/bertsquad-12.dot index 246765a6663..5e2502ba0c4 100644 --- a/tests/onnx/data/reference_graphs/quantization/bertsquad-12.dot +++ b/tests/onnx/data/reference_graphs/quantization/bertsquad-12.dot @@ -405,14 +405,14 @@ strict digraph { "403 bert/encoder/Reshape_13/shape_Concat__301" [id=403, type=Concat]; "404 bert/encoder/Reshape_13__471" [id=404, type=Cast]; "405 bert/encoder/Reshape_1" [id=405, type=Reshape]; -"406 QuantizeLinear_bert/encoder/Reshape_1^0_3" [id=406, label="406 QuantizeLinear_bert/encoder/Reshape_1:0_3", type=QuantizeLinear]; -"407 DequantizeLinear_bert/encoder/Reshape_1^0_3" [id=407, label="407 DequantizeLinear_bert/encoder/Reshape_1:0_3", type=DequantizeLinear]; -"408 QuantizeLinear_bert/encoder/Reshape_1^0_2" [id=408, label="408 QuantizeLinear_bert/encoder/Reshape_1:0_2", type=QuantizeLinear]; -"409 DequantizeLinear_bert/encoder/Reshape_1^0_2" [id=409, label="409 DequantizeLinear_bert/encoder/Reshape_1:0_2", type=DequantizeLinear]; -"410 QuantizeLinear_bert/encoder/Reshape_1^0_1" [id=410, label="410 QuantizeLinear_bert/encoder/Reshape_1:0_1", type=QuantizeLinear]; -"411 DequantizeLinear_bert/encoder/Reshape_1^0_1" [id=411, label="411 DequantizeLinear_bert/encoder/Reshape_1:0_1", type=DequantizeLinear]; -"412 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [id=412, label="412 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"413 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [id=413, label="413 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"406 QuantizeLinear_bert/encoder/Reshape_1^0_1" [id=406, label="406 QuantizeLinear_bert/encoder/Reshape_1:0_1", type=QuantizeLinear]; +"407 DequantizeLinear_bert/encoder/Reshape_1^0_1" [id=407, label="407 DequantizeLinear_bert/encoder/Reshape_1:0_1", type=DequantizeLinear]; +"408 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [id=408, label="408 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"409 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [id=409, label="409 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"410 QuantizeLinear_bert/encoder/Reshape_1^0_2" [id=410, label="410 QuantizeLinear_bert/encoder/Reshape_1:0_2", type=QuantizeLinear]; +"411 DequantizeLinear_bert/encoder/Reshape_1^0_2" [id=411, label="411 DequantizeLinear_bert/encoder/Reshape_1:0_2", type=DequantizeLinear]; +"412 QuantizeLinear_bert/encoder/Reshape_1^0_3" [id=412, label="412 QuantizeLinear_bert/encoder/Reshape_1:0_3", type=QuantizeLinear]; +"413 DequantizeLinear_bert/encoder/Reshape_1^0_3" [id=413, label="413 DequantizeLinear_bert/encoder/Reshape_1:0_3", type=DequantizeLinear]; "414 bert/encoder/layer_0/attention/self/value/MatMul" [id=414, type=MatMul]; "415 bert/encoder/layer_0/attention/self/value/BiasAdd" [id=415, type=Add]; "416 bert/encoder/layer_0/attention/self/Reshape_2" [id=416, type=Reshape]; @@ -495,14 +495,14 @@ strict digraph { "493 bert/encoder/layer_0/output/LayerNorm/batchnorm/sub" [id=493, type=Sub]; "494 bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1" [id=494, type=Mul]; "495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" [id=495, type=Add]; -"496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [id=496, label="496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [id=497, label="497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"498 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [id=498, label="498 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"499 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [id=499, label="499 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [id=500, label="500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [id=501, label="501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"502 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [id=502, label="502 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"503 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [id=503, label="503 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [id=496, label="496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [id=497, label="497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"498 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [id=498, label="498 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"499 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [id=499, label="499 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [id=500, label="500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [id=501, label="501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"502 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [id=502, label="502 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"503 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [id=503, label="503 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "504 bert/encoder/layer_1/attention/self/value/MatMul" [id=504, type=MatMul]; "505 bert/encoder/layer_1/attention/self/value/BiasAdd" [id=505, type=Add]; "506 bert/encoder/layer_1/attention/self/Reshape_2" [id=506, type=Reshape]; @@ -585,14 +585,14 @@ strict digraph { "583 bert/encoder/layer_1/output/LayerNorm/batchnorm/sub" [id=583, type=Sub]; "584 bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1" [id=584, type=Mul]; "585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" [id=585, type=Add]; -"586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [id=586, label="586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [id=587, label="587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"588 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [id=588, label="588 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"589 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [id=589, label="589 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [id=590, label="590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [id=591, label="591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"592 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [id=592, label="592 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"593 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [id=593, label="593 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [id=586, label="586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [id=587, label="587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"588 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [id=588, label="588 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"589 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [id=589, label="589 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [id=590, label="590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [id=591, label="591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"592 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [id=592, label="592 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"593 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [id=593, label="593 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "594 bert/encoder/layer_2/attention/self/value/MatMul" [id=594, type=MatMul]; "595 bert/encoder/layer_2/attention/self/value/BiasAdd" [id=595, type=Add]; "596 bert/encoder/layer_2/attention/self/Reshape_2" [id=596, type=Reshape]; @@ -675,14 +675,14 @@ strict digraph { "673 bert/encoder/layer_2/output/LayerNorm/batchnorm/sub" [id=673, type=Sub]; "674 bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1" [id=674, type=Mul]; "675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" [id=675, type=Add]; -"676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [id=676, label="676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [id=677, label="677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"678 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [id=678, label="678 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"679 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [id=679, label="679 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [id=680, label="680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [id=681, label="681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"682 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [id=682, label="682 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"683 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [id=683, label="683 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [id=676, label="676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [id=677, label="677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"678 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [id=678, label="678 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"679 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [id=679, label="679 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [id=680, label="680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [id=681, label="681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"682 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [id=682, label="682 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"683 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [id=683, label="683 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "684 bert/encoder/layer_3/attention/self/value/MatMul" [id=684, type=MatMul]; "685 bert/encoder/layer_3/attention/self/value/BiasAdd" [id=685, type=Add]; "686 bert/encoder/layer_3/attention/self/Reshape_2" [id=686, type=Reshape]; @@ -765,14 +765,14 @@ strict digraph { "763 bert/encoder/layer_3/output/LayerNorm/batchnorm/sub" [id=763, type=Sub]; "764 bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1" [id=764, type=Mul]; "765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" [id=765, type=Add]; -"766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [id=766, label="766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [id=767, label="767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"768 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [id=768, label="768 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"769 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [id=769, label="769 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [id=770, label="770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [id=771, label="771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"772 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [id=772, label="772 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"773 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [id=773, label="773 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [id=766, label="766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [id=767, label="767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"768 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [id=768, label="768 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"769 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [id=769, label="769 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [id=770, label="770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [id=771, label="771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"772 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [id=772, label="772 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"773 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [id=773, label="773 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "774 bert/encoder/layer_4/attention/self/value/MatMul" [id=774, type=MatMul]; "775 bert/encoder/layer_4/attention/self/value/BiasAdd" [id=775, type=Add]; "776 bert/encoder/layer_4/attention/self/Reshape_2" [id=776, type=Reshape]; @@ -855,14 +855,14 @@ strict digraph { "853 bert/encoder/layer_4/output/LayerNorm/batchnorm/sub" [id=853, type=Sub]; "854 bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1" [id=854, type=Mul]; "855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" [id=855, type=Add]; -"856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [id=856, label="856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [id=857, label="857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"858 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [id=858, label="858 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"859 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [id=859, label="859 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [id=860, label="860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [id=861, label="861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"862 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [id=862, label="862 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"863 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [id=863, label="863 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [id=856, label="856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [id=857, label="857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"858 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [id=858, label="858 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"859 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [id=859, label="859 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [id=860, label="860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [id=861, label="861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"862 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [id=862, label="862 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"863 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [id=863, label="863 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "864 bert/encoder/layer_5/attention/self/value/MatMul" [id=864, type=MatMul]; "865 bert/encoder/layer_5/attention/self/value/BiasAdd" [id=865, type=Add]; "866 bert/encoder/layer_5/attention/self/Reshape_2" [id=866, type=Reshape]; @@ -945,14 +945,14 @@ strict digraph { "943 bert/encoder/layer_5/output/LayerNorm/batchnorm/sub" [id=943, type=Sub]; "944 bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1" [id=944, type=Mul]; "945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" [id=945, type=Add]; -"946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [id=946, label="946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [id=947, label="947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"948 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [id=948, label="948 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"949 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [id=949, label="949 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [id=950, label="950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [id=951, label="951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"952 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [id=952, label="952 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"953 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [id=953, label="953 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [id=946, label="946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [id=947, label="947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"948 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [id=948, label="948 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"949 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [id=949, label="949 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [id=950, label="950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [id=951, label="951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"952 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [id=952, label="952 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"953 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [id=953, label="953 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "954 bert/encoder/layer_6/attention/self/value/MatMul" [id=954, type=MatMul]; "955 bert/encoder/layer_6/attention/self/value/BiasAdd" [id=955, type=Add]; "956 bert/encoder/layer_6/attention/self/Reshape_2" [id=956, type=Reshape]; @@ -1035,14 +1035,14 @@ strict digraph { "1033 bert/encoder/layer_6/output/LayerNorm/batchnorm/sub" [id=1033, type=Sub]; "1034 bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1" [id=1034, type=Mul]; "1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" [id=1035, type=Add]; -"1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [id=1036, label="1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [id=1037, label="1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"1038 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [id=1038, label="1038 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"1039 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [id=1039, label="1039 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [id=1040, label="1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [id=1041, label="1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"1042 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [id=1042, label="1042 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"1043 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [id=1043, label="1043 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [id=1036, label="1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [id=1037, label="1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"1038 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [id=1038, label="1038 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"1039 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [id=1039, label="1039 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [id=1040, label="1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [id=1041, label="1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"1042 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [id=1042, label="1042 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"1043 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [id=1043, label="1043 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "1044 bert/encoder/layer_7/attention/self/value/MatMul" [id=1044, type=MatMul]; "1045 bert/encoder/layer_7/attention/self/value/BiasAdd" [id=1045, type=Add]; "1046 bert/encoder/layer_7/attention/self/Reshape_2" [id=1046, type=Reshape]; @@ -1125,14 +1125,14 @@ strict digraph { "1123 bert/encoder/layer_7/output/LayerNorm/batchnorm/sub" [id=1123, type=Sub]; "1124 bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1" [id=1124, type=Mul]; "1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" [id=1125, type=Add]; -"1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [id=1126, label="1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [id=1127, label="1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"1128 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [id=1128, label="1128 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"1129 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [id=1129, label="1129 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [id=1130, label="1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [id=1131, label="1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"1132 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [id=1132, label="1132 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"1133 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [id=1133, label="1133 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [id=1126, label="1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [id=1127, label="1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"1128 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [id=1128, label="1128 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"1129 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [id=1129, label="1129 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [id=1130, label="1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [id=1131, label="1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"1132 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [id=1132, label="1132 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"1133 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [id=1133, label="1133 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "1134 bert/encoder/layer_8/attention/self/value/MatMul" [id=1134, type=MatMul]; "1135 bert/encoder/layer_8/attention/self/value/BiasAdd" [id=1135, type=Add]; "1136 bert/encoder/layer_8/attention/self/Reshape_2" [id=1136, type=Reshape]; @@ -1215,14 +1215,14 @@ strict digraph { "1213 bert/encoder/layer_8/output/LayerNorm/batchnorm/sub" [id=1213, type=Sub]; "1214 bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1" [id=1214, type=Mul]; "1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" [id=1215, type=Add]; -"1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [id=1216, label="1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [id=1217, label="1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"1218 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [id=1218, label="1218 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"1219 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [id=1219, label="1219 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [id=1220, label="1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [id=1221, label="1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"1222 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [id=1222, label="1222 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"1223 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [id=1223, label="1223 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [id=1216, label="1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [id=1217, label="1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"1218 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [id=1218, label="1218 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"1219 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [id=1219, label="1219 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [id=1220, label="1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [id=1221, label="1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"1222 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [id=1222, label="1222 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"1223 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [id=1223, label="1223 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "1224 bert/encoder/layer_9/attention/self/value/MatMul" [id=1224, type=MatMul]; "1225 bert/encoder/layer_9/attention/self/value/BiasAdd" [id=1225, type=Add]; "1226 bert/encoder/layer_9/attention/self/Reshape_2" [id=1226, type=Reshape]; @@ -1305,14 +1305,14 @@ strict digraph { "1303 bert/encoder/layer_9/output/LayerNorm/batchnorm/sub" [id=1303, type=Sub]; "1304 bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1" [id=1304, type=Mul]; "1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" [id=1305, type=Add]; -"1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [id=1306, label="1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [id=1307, label="1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"1308 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [id=1308, label="1308 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"1309 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [id=1309, label="1309 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [id=1310, label="1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [id=1311, label="1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"1312 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [id=1312, label="1312 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"1313 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [id=1313, label="1313 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [id=1306, label="1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [id=1307, label="1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"1308 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [id=1308, label="1308 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"1309 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [id=1309, label="1309 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [id=1310, label="1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [id=1311, label="1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"1312 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [id=1312, label="1312 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"1313 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [id=1313, label="1313 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "1314 bert/encoder/layer_10/attention/self/value/MatMul" [id=1314, type=MatMul]; "1315 bert/encoder/layer_10/attention/self/value/BiasAdd" [id=1315, type=Add]; "1316 bert/encoder/layer_10/attention/self/Reshape_2" [id=1316, type=Reshape]; @@ -1395,14 +1395,14 @@ strict digraph { "1393 bert/encoder/layer_10/output/LayerNorm/batchnorm/sub" [id=1393, type=Sub]; "1394 bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1" [id=1394, type=Mul]; "1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" [id=1395, type=Add]; -"1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [id=1396, label="1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; -"1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [id=1397, label="1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; -"1398 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [id=1398, label="1398 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; -"1399 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [id=1399, label="1399 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; -"1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [id=1400, label="1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; -"1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [id=1401, label="1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; -"1402 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [id=1402, label="1402 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel:0_1", type=QuantizeLinear]; -"1403 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [id=1403, label="1403 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [id=1396, label="1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_1", type=QuantizeLinear]; +"1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [id=1397, label="1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_1", type=DequantizeLinear]; +"1398 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [id=1398, label="1398 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel:0_1", type=QuantizeLinear]; +"1399 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [id=1399, label="1399 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel:0_1", type=DequantizeLinear]; +"1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [id=1400, label="1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_2", type=QuantizeLinear]; +"1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [id=1401, label="1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_2", type=DequantizeLinear]; +"1402 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [id=1402, label="1402 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_3", type=QuantizeLinear]; +"1403 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [id=1403, label="1403 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0_3", type=DequantizeLinear]; "1404 bert/encoder/layer_11/attention/self/value/MatMul" [id=1404, type=MatMul]; "1405 bert/encoder/layer_11/attention/self/value/BiasAdd" [id=1405, type=Add]; "1406 bert/encoder/layer_11/attention/self/Reshape_2" [id=1406, type=Reshape]; @@ -1991,18 +1991,18 @@ strict digraph { "402 bert/encoder/Reshape_13/shape_Unsqueeze__298" -> "403 bert/encoder/Reshape_13/shape_Concat__301" [label="[1]", style=dashed]; "403 bert/encoder/Reshape_13/shape_Concat__301" -> "404 bert/encoder/Reshape_13__471" [label="[3]", style=dashed]; "404 bert/encoder/Reshape_13__471" -> "1488 bert/encoder/Reshape_13" [label="[3]", style=dashed]; -"405 bert/encoder/Reshape_1" -> "406 QuantizeLinear_bert/encoder/Reshape_1^0_3" [label="[]", style=solid]; -"405 bert/encoder/Reshape_1" -> "408 QuantizeLinear_bert/encoder/Reshape_1^0_2" [label="[]", style=solid]; -"405 bert/encoder/Reshape_1" -> "410 QuantizeLinear_bert/encoder/Reshape_1^0_1" [label="[]", style=solid]; +"405 bert/encoder/Reshape_1" -> "406 QuantizeLinear_bert/encoder/Reshape_1^0_1" [label="[]", style=solid]; +"405 bert/encoder/Reshape_1" -> "410 QuantizeLinear_bert/encoder/Reshape_1^0_2" [label="[]", style=solid]; +"405 bert/encoder/Reshape_1" -> "412 QuantizeLinear_bert/encoder/Reshape_1^0_3" [label="[]", style=solid]; "405 bert/encoder/Reshape_1" -> "448 bert/encoder/layer_0/attention/output/add" [label="[]", style=solid]; -"406 QuantizeLinear_bert/encoder/Reshape_1^0_3" -> "407 DequantizeLinear_bert/encoder/Reshape_1^0_3" [label="[]", style=dashed]; -"407 DequantizeLinear_bert/encoder/Reshape_1^0_3" -> "428 bert/encoder/layer_0/attention/self/key/MatMul" [label="[]", style=solid]; -"408 QuantizeLinear_bert/encoder/Reshape_1^0_2" -> "409 DequantizeLinear_bert/encoder/Reshape_1^0_2" [label="[]", style=dashed]; -"409 DequantizeLinear_bert/encoder/Reshape_1^0_2" -> "420 bert/encoder/layer_0/attention/self/query/MatMul" [label="[]", style=solid]; -"410 QuantizeLinear_bert/encoder/Reshape_1^0_1" -> "411 DequantizeLinear_bert/encoder/Reshape_1^0_1" [label="[]", style=dashed]; -"411 DequantizeLinear_bert/encoder/Reshape_1^0_1" -> "414 bert/encoder/layer_0/attention/self/value/MatMul" [label="[]", style=solid]; -"412 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" -> "413 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"413 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" -> "414 bert/encoder/layer_0/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"406 QuantizeLinear_bert/encoder/Reshape_1^0_1" -> "407 DequantizeLinear_bert/encoder/Reshape_1^0_1" [label="[]", style=dashed]; +"407 DequantizeLinear_bert/encoder/Reshape_1^0_1" -> "414 bert/encoder/layer_0/attention/self/value/MatMul" [label="[]", style=solid]; +"408 QuantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" -> "409 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"409 DequantizeLinear_bert/encoder/layer_0/attention/self/value/kernel^0_1" -> "414 bert/encoder/layer_0/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"410 QuantizeLinear_bert/encoder/Reshape_1^0_2" -> "411 DequantizeLinear_bert/encoder/Reshape_1^0_2" [label="[]", style=dashed]; +"411 DequantizeLinear_bert/encoder/Reshape_1^0_2" -> "420 bert/encoder/layer_0/attention/self/query/MatMul" [label="[]", style=solid]; +"412 QuantizeLinear_bert/encoder/Reshape_1^0_3" -> "413 DequantizeLinear_bert/encoder/Reshape_1^0_3" [label="[]", style=dashed]; +"413 DequantizeLinear_bert/encoder/Reshape_1^0_3" -> "428 bert/encoder/layer_0/attention/self/key/MatMul" [label="[]", style=solid]; "414 bert/encoder/layer_0/attention/self/value/MatMul" -> "415 bert/encoder/layer_0/attention/self/value/BiasAdd" [label="[]", style=solid]; "415 bert/encoder/layer_0/attention/self/value/BiasAdd" -> "416 bert/encoder/layer_0/attention/self/Reshape_2" [label="[]", style=solid]; "416 bert/encoder/layer_0/attention/self/Reshape_2" -> "417 bert/encoder/layer_0/attention/self/transpose_2" [label="[]", style=solid]; @@ -2095,18 +2095,18 @@ strict digraph { "492 bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2" -> "493 bert/encoder/layer_0/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "493 bert/encoder/layer_0/output/LayerNorm/batchnorm/sub" -> "495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "494 bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1" -> "495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "498 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "502 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "495 bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" -> "538 bert/encoder/layer_1/attention/output/add" [label="[]", style=solid]; -"496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" -> "497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" -> "518 bert/encoder/layer_1/attention/self/key/MatMul" [label="[]", style=solid]; -"498 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" -> "499 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"499 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" -> "510 bert/encoder/layer_1/attention/self/query/MatMul" [label="[]", style=solid]; -"500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" -> "501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" -> "504 bert/encoder/layer_1/attention/self/value/MatMul" [label="[]", style=solid]; -"502 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" -> "503 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"503 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" -> "504 bert/encoder/layer_1/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"496 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" -> "497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"497 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_1" -> "504 bert/encoder/layer_1/attention/self/value/MatMul" [label="[]", style=solid]; +"498 QuantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" -> "499 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"499 DequantizeLinear_bert/encoder/layer_1/attention/self/value/kernel^0_1" -> "504 bert/encoder/layer_1/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"500 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" -> "501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"501 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_2" -> "510 bert/encoder/layer_1/attention/self/query/MatMul" [label="[]", style=solid]; +"502 QuantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" -> "503 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"503 DequantizeLinear_bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1^0_3" -> "518 bert/encoder/layer_1/attention/self/key/MatMul" [label="[]", style=solid]; "504 bert/encoder/layer_1/attention/self/value/MatMul" -> "505 bert/encoder/layer_1/attention/self/value/BiasAdd" [label="[]", style=solid]; "505 bert/encoder/layer_1/attention/self/value/BiasAdd" -> "506 bert/encoder/layer_1/attention/self/Reshape_2" [label="[]", style=solid]; "506 bert/encoder/layer_1/attention/self/Reshape_2" -> "507 bert/encoder/layer_1/attention/self/transpose_2" [label="[]", style=solid]; @@ -2199,18 +2199,18 @@ strict digraph { "582 bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2" -> "583 bert/encoder/layer_1/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "583 bert/encoder/layer_1/output/LayerNorm/batchnorm/sub" -> "585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "584 bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1" -> "585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "588 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "592 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "585 bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" -> "628 bert/encoder/layer_2/attention/output/add" [label="[]", style=solid]; -"586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" -> "587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" -> "608 bert/encoder/layer_2/attention/self/key/MatMul" [label="[]", style=solid]; -"588 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" -> "589 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"589 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" -> "600 bert/encoder/layer_2/attention/self/query/MatMul" [label="[]", style=solid]; -"590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" -> "591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" -> "594 bert/encoder/layer_2/attention/self/value/MatMul" [label="[]", style=solid]; -"592 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" -> "593 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"593 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" -> "594 bert/encoder/layer_2/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"586 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" -> "587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"587 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_1" -> "594 bert/encoder/layer_2/attention/self/value/MatMul" [label="[]", style=solid]; +"588 QuantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" -> "589 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"589 DequantizeLinear_bert/encoder/layer_2/attention/self/value/kernel^0_1" -> "594 bert/encoder/layer_2/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"590 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" -> "591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"591 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_2" -> "600 bert/encoder/layer_2/attention/self/query/MatMul" [label="[]", style=solid]; +"592 QuantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" -> "593 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"593 DequantizeLinear_bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1^0_3" -> "608 bert/encoder/layer_2/attention/self/key/MatMul" [label="[]", style=solid]; "594 bert/encoder/layer_2/attention/self/value/MatMul" -> "595 bert/encoder/layer_2/attention/self/value/BiasAdd" [label="[]", style=solid]; "595 bert/encoder/layer_2/attention/self/value/BiasAdd" -> "596 bert/encoder/layer_2/attention/self/Reshape_2" [label="[]", style=solid]; "596 bert/encoder/layer_2/attention/self/Reshape_2" -> "597 bert/encoder/layer_2/attention/self/transpose_2" [label="[]", style=solid]; @@ -2303,18 +2303,18 @@ strict digraph { "672 bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2" -> "673 bert/encoder/layer_2/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "673 bert/encoder/layer_2/output/LayerNorm/batchnorm/sub" -> "675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "674 bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1" -> "675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "678 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "682 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "675 bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" -> "718 bert/encoder/layer_3/attention/output/add" [label="[]", style=solid]; -"676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" -> "677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" -> "698 bert/encoder/layer_3/attention/self/key/MatMul" [label="[]", style=solid]; -"678 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" -> "679 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"679 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" -> "690 bert/encoder/layer_3/attention/self/query/MatMul" [label="[]", style=solid]; -"680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" -> "681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" -> "684 bert/encoder/layer_3/attention/self/value/MatMul" [label="[]", style=solid]; -"682 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" -> "683 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"683 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" -> "684 bert/encoder/layer_3/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"676 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" -> "677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"677 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_1" -> "684 bert/encoder/layer_3/attention/self/value/MatMul" [label="[]", style=solid]; +"678 QuantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" -> "679 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"679 DequantizeLinear_bert/encoder/layer_3/attention/self/value/kernel^0_1" -> "684 bert/encoder/layer_3/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"680 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" -> "681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"681 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_2" -> "690 bert/encoder/layer_3/attention/self/query/MatMul" [label="[]", style=solid]; +"682 QuantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" -> "683 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"683 DequantizeLinear_bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1^0_3" -> "698 bert/encoder/layer_3/attention/self/key/MatMul" [label="[]", style=solid]; "684 bert/encoder/layer_3/attention/self/value/MatMul" -> "685 bert/encoder/layer_3/attention/self/value/BiasAdd" [label="[]", style=solid]; "685 bert/encoder/layer_3/attention/self/value/BiasAdd" -> "686 bert/encoder/layer_3/attention/self/Reshape_2" [label="[]", style=solid]; "686 bert/encoder/layer_3/attention/self/Reshape_2" -> "687 bert/encoder/layer_3/attention/self/transpose_2" [label="[]", style=solid]; @@ -2407,18 +2407,18 @@ strict digraph { "762 bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2" -> "763 bert/encoder/layer_3/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "763 bert/encoder/layer_3/output/LayerNorm/batchnorm/sub" -> "765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "764 bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1" -> "765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "768 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "772 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "765 bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" -> "808 bert/encoder/layer_4/attention/output/add" [label="[]", style=solid]; -"766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" -> "767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" -> "788 bert/encoder/layer_4/attention/self/key/MatMul" [label="[]", style=solid]; -"768 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" -> "769 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"769 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" -> "780 bert/encoder/layer_4/attention/self/query/MatMul" [label="[]", style=solid]; -"770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" -> "771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" -> "774 bert/encoder/layer_4/attention/self/value/MatMul" [label="[]", style=solid]; -"772 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" -> "773 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"773 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" -> "774 bert/encoder/layer_4/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"766 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" -> "767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"767 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_1" -> "774 bert/encoder/layer_4/attention/self/value/MatMul" [label="[]", style=solid]; +"768 QuantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" -> "769 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"769 DequantizeLinear_bert/encoder/layer_4/attention/self/value/kernel^0_1" -> "774 bert/encoder/layer_4/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"770 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" -> "771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"771 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_2" -> "780 bert/encoder/layer_4/attention/self/query/MatMul" [label="[]", style=solid]; +"772 QuantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" -> "773 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"773 DequantizeLinear_bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1^0_3" -> "788 bert/encoder/layer_4/attention/self/key/MatMul" [label="[]", style=solid]; "774 bert/encoder/layer_4/attention/self/value/MatMul" -> "775 bert/encoder/layer_4/attention/self/value/BiasAdd" [label="[]", style=solid]; "775 bert/encoder/layer_4/attention/self/value/BiasAdd" -> "776 bert/encoder/layer_4/attention/self/Reshape_2" [label="[]", style=solid]; "776 bert/encoder/layer_4/attention/self/Reshape_2" -> "777 bert/encoder/layer_4/attention/self/transpose_2" [label="[]", style=solid]; @@ -2511,18 +2511,18 @@ strict digraph { "852 bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2" -> "853 bert/encoder/layer_4/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "853 bert/encoder/layer_4/output/LayerNorm/batchnorm/sub" -> "855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "854 bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1" -> "855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "858 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "862 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "855 bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" -> "898 bert/encoder/layer_5/attention/output/add" [label="[]", style=solid]; -"856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" -> "857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" -> "878 bert/encoder/layer_5/attention/self/key/MatMul" [label="[]", style=solid]; -"858 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" -> "859 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"859 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" -> "870 bert/encoder/layer_5/attention/self/query/MatMul" [label="[]", style=solid]; -"860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" -> "861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" -> "864 bert/encoder/layer_5/attention/self/value/MatMul" [label="[]", style=solid]; -"862 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" -> "863 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"863 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" -> "864 bert/encoder/layer_5/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"856 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" -> "857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"857 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_1" -> "864 bert/encoder/layer_5/attention/self/value/MatMul" [label="[]", style=solid]; +"858 QuantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" -> "859 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"859 DequantizeLinear_bert/encoder/layer_5/attention/self/value/kernel^0_1" -> "864 bert/encoder/layer_5/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"860 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" -> "861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"861 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_2" -> "870 bert/encoder/layer_5/attention/self/query/MatMul" [label="[]", style=solid]; +"862 QuantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" -> "863 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"863 DequantizeLinear_bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1^0_3" -> "878 bert/encoder/layer_5/attention/self/key/MatMul" [label="[]", style=solid]; "864 bert/encoder/layer_5/attention/self/value/MatMul" -> "865 bert/encoder/layer_5/attention/self/value/BiasAdd" [label="[]", style=solid]; "865 bert/encoder/layer_5/attention/self/value/BiasAdd" -> "866 bert/encoder/layer_5/attention/self/Reshape_2" [label="[]", style=solid]; "866 bert/encoder/layer_5/attention/self/Reshape_2" -> "867 bert/encoder/layer_5/attention/self/transpose_2" [label="[]", style=solid]; @@ -2615,18 +2615,18 @@ strict digraph { "942 bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2" -> "943 bert/encoder/layer_5/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "943 bert/encoder/layer_5/output/LayerNorm/batchnorm/sub" -> "945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "944 bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1" -> "945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "948 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "952 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "945 bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" -> "988 bert/encoder/layer_6/attention/output/add" [label="[]", style=solid]; -"946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" -> "947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" -> "968 bert/encoder/layer_6/attention/self/key/MatMul" [label="[]", style=solid]; -"948 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" -> "949 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"949 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" -> "960 bert/encoder/layer_6/attention/self/query/MatMul" [label="[]", style=solid]; -"950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" -> "951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" -> "954 bert/encoder/layer_6/attention/self/value/MatMul" [label="[]", style=solid]; -"952 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" -> "953 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"953 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" -> "954 bert/encoder/layer_6/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"946 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" -> "947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"947 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_1" -> "954 bert/encoder/layer_6/attention/self/value/MatMul" [label="[]", style=solid]; +"948 QuantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" -> "949 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"949 DequantizeLinear_bert/encoder/layer_6/attention/self/value/kernel^0_1" -> "954 bert/encoder/layer_6/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"950 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" -> "951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"951 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_2" -> "960 bert/encoder/layer_6/attention/self/query/MatMul" [label="[]", style=solid]; +"952 QuantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" -> "953 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"953 DequantizeLinear_bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1^0_3" -> "968 bert/encoder/layer_6/attention/self/key/MatMul" [label="[]", style=solid]; "954 bert/encoder/layer_6/attention/self/value/MatMul" -> "955 bert/encoder/layer_6/attention/self/value/BiasAdd" [label="[]", style=solid]; "955 bert/encoder/layer_6/attention/self/value/BiasAdd" -> "956 bert/encoder/layer_6/attention/self/Reshape_2" [label="[]", style=solid]; "956 bert/encoder/layer_6/attention/self/Reshape_2" -> "957 bert/encoder/layer_6/attention/self/transpose_2" [label="[]", style=solid]; @@ -2719,18 +2719,18 @@ strict digraph { "1032 bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2" -> "1033 bert/encoder/layer_6/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "1033 bert/encoder/layer_6/output/LayerNorm/batchnorm/sub" -> "1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "1034 bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1" -> "1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1038 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1042 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "1035 bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" -> "1078 bert/encoder/layer_7/attention/output/add" [label="[]", style=solid]; -"1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" -> "1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" -> "1058 bert/encoder/layer_7/attention/self/key/MatMul" [label="[]", style=solid]; -"1038 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" -> "1039 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"1039 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" -> "1050 bert/encoder/layer_7/attention/self/query/MatMul" [label="[]", style=solid]; -"1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" -> "1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" -> "1044 bert/encoder/layer_7/attention/self/value/MatMul" [label="[]", style=solid]; -"1042 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" -> "1043 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"1043 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" -> "1044 bert/encoder/layer_7/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1036 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" -> "1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"1037 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_1" -> "1044 bert/encoder/layer_7/attention/self/value/MatMul" [label="[]", style=solid]; +"1038 QuantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" -> "1039 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"1039 DequantizeLinear_bert/encoder/layer_7/attention/self/value/kernel^0_1" -> "1044 bert/encoder/layer_7/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1040 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" -> "1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"1041 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_2" -> "1050 bert/encoder/layer_7/attention/self/query/MatMul" [label="[]", style=solid]; +"1042 QuantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" -> "1043 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"1043 DequantizeLinear_bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1^0_3" -> "1058 bert/encoder/layer_7/attention/self/key/MatMul" [label="[]", style=solid]; "1044 bert/encoder/layer_7/attention/self/value/MatMul" -> "1045 bert/encoder/layer_7/attention/self/value/BiasAdd" [label="[]", style=solid]; "1045 bert/encoder/layer_7/attention/self/value/BiasAdd" -> "1046 bert/encoder/layer_7/attention/self/Reshape_2" [label="[]", style=solid]; "1046 bert/encoder/layer_7/attention/self/Reshape_2" -> "1047 bert/encoder/layer_7/attention/self/transpose_2" [label="[]", style=solid]; @@ -2823,18 +2823,18 @@ strict digraph { "1122 bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2" -> "1123 bert/encoder/layer_7/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "1123 bert/encoder/layer_7/output/LayerNorm/batchnorm/sub" -> "1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "1124 bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1" -> "1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1128 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1132 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "1125 bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" -> "1168 bert/encoder/layer_8/attention/output/add" [label="[]", style=solid]; -"1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" -> "1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" -> "1148 bert/encoder/layer_8/attention/self/key/MatMul" [label="[]", style=solid]; -"1128 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" -> "1129 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"1129 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" -> "1140 bert/encoder/layer_8/attention/self/query/MatMul" [label="[]", style=solid]; -"1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" -> "1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" -> "1134 bert/encoder/layer_8/attention/self/value/MatMul" [label="[]", style=solid]; -"1132 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" -> "1133 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"1133 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" -> "1134 bert/encoder/layer_8/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1126 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" -> "1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"1127 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_1" -> "1134 bert/encoder/layer_8/attention/self/value/MatMul" [label="[]", style=solid]; +"1128 QuantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" -> "1129 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"1129 DequantizeLinear_bert/encoder/layer_8/attention/self/value/kernel^0_1" -> "1134 bert/encoder/layer_8/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1130 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" -> "1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"1131 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_2" -> "1140 bert/encoder/layer_8/attention/self/query/MatMul" [label="[]", style=solid]; +"1132 QuantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" -> "1133 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"1133 DequantizeLinear_bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1^0_3" -> "1148 bert/encoder/layer_8/attention/self/key/MatMul" [label="[]", style=solid]; "1134 bert/encoder/layer_8/attention/self/value/MatMul" -> "1135 bert/encoder/layer_8/attention/self/value/BiasAdd" [label="[]", style=solid]; "1135 bert/encoder/layer_8/attention/self/value/BiasAdd" -> "1136 bert/encoder/layer_8/attention/self/Reshape_2" [label="[]", style=solid]; "1136 bert/encoder/layer_8/attention/self/Reshape_2" -> "1137 bert/encoder/layer_8/attention/self/transpose_2" [label="[]", style=solid]; @@ -2927,18 +2927,18 @@ strict digraph { "1212 bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2" -> "1213 bert/encoder/layer_8/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "1213 bert/encoder/layer_8/output/LayerNorm/batchnorm/sub" -> "1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "1214 bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1" -> "1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1218 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1222 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "1215 bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" -> "1258 bert/encoder/layer_9/attention/output/add" [label="[]", style=solid]; -"1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" -> "1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" -> "1238 bert/encoder/layer_9/attention/self/key/MatMul" [label="[]", style=solid]; -"1218 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" -> "1219 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"1219 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" -> "1230 bert/encoder/layer_9/attention/self/query/MatMul" [label="[]", style=solid]; -"1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" -> "1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" -> "1224 bert/encoder/layer_9/attention/self/value/MatMul" [label="[]", style=solid]; -"1222 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" -> "1223 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"1223 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" -> "1224 bert/encoder/layer_9/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1216 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" -> "1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"1217 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_1" -> "1224 bert/encoder/layer_9/attention/self/value/MatMul" [label="[]", style=solid]; +"1218 QuantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" -> "1219 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"1219 DequantizeLinear_bert/encoder/layer_9/attention/self/value/kernel^0_1" -> "1224 bert/encoder/layer_9/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1220 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" -> "1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"1221 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_2" -> "1230 bert/encoder/layer_9/attention/self/query/MatMul" [label="[]", style=solid]; +"1222 QuantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" -> "1223 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"1223 DequantizeLinear_bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1^0_3" -> "1238 bert/encoder/layer_9/attention/self/key/MatMul" [label="[]", style=solid]; "1224 bert/encoder/layer_9/attention/self/value/MatMul" -> "1225 bert/encoder/layer_9/attention/self/value/BiasAdd" [label="[]", style=solid]; "1225 bert/encoder/layer_9/attention/self/value/BiasAdd" -> "1226 bert/encoder/layer_9/attention/self/Reshape_2" [label="[]", style=solid]; "1226 bert/encoder/layer_9/attention/self/Reshape_2" -> "1227 bert/encoder/layer_9/attention/self/transpose_2" [label="[]", style=solid]; @@ -3031,18 +3031,18 @@ strict digraph { "1302 bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2" -> "1303 bert/encoder/layer_9/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "1303 bert/encoder/layer_9/output/LayerNorm/batchnorm/sub" -> "1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "1304 bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1" -> "1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1308 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1312 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "1305 bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" -> "1348 bert/encoder/layer_10/attention/output/add" [label="[]", style=solid]; -"1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" -> "1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" -> "1328 bert/encoder/layer_10/attention/self/key/MatMul" [label="[]", style=solid]; -"1308 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" -> "1309 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"1309 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" -> "1320 bert/encoder/layer_10/attention/self/query/MatMul" [label="[]", style=solid]; -"1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" -> "1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" -> "1314 bert/encoder/layer_10/attention/self/value/MatMul" [label="[]", style=solid]; -"1312 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" -> "1313 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"1313 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" -> "1314 bert/encoder/layer_10/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1306 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" -> "1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"1307 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_1" -> "1314 bert/encoder/layer_10/attention/self/value/MatMul" [label="[]", style=solid]; +"1308 QuantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" -> "1309 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"1309 DequantizeLinear_bert/encoder/layer_10/attention/self/value/kernel^0_1" -> "1314 bert/encoder/layer_10/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1310 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" -> "1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"1311 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_2" -> "1320 bert/encoder/layer_10/attention/self/query/MatMul" [label="[]", style=solid]; +"1312 QuantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" -> "1313 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"1313 DequantizeLinear_bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1^0_3" -> "1328 bert/encoder/layer_10/attention/self/key/MatMul" [label="[]", style=solid]; "1314 bert/encoder/layer_10/attention/self/value/MatMul" -> "1315 bert/encoder/layer_10/attention/self/value/BiasAdd" [label="[]", style=solid]; "1315 bert/encoder/layer_10/attention/self/value/BiasAdd" -> "1316 bert/encoder/layer_10/attention/self/Reshape_2" [label="[]", style=solid]; "1316 bert/encoder/layer_10/attention/self/Reshape_2" -> "1317 bert/encoder/layer_10/attention/self/transpose_2" [label="[]", style=solid]; @@ -3135,18 +3135,18 @@ strict digraph { "1392 bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2" -> "1393 bert/encoder/layer_10/output/LayerNorm/batchnorm/sub" [label="[]", style=solid]; "1393 bert/encoder/layer_10/output/LayerNorm/batchnorm/sub" -> "1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; "1394 bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1" -> "1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" [label="[]", style=solid]; -"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; -"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1398 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; -"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=solid]; +"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=solid]; +"1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1402 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=solid]; "1395 bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" -> "1438 bert/encoder/layer_11/attention/output/add" [label="[]", style=solid]; -"1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" -> "1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; -"1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" -> "1418 bert/encoder/layer_11/attention/self/key/MatMul" [label="[]", style=solid]; -"1398 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" -> "1399 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; -"1399 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" -> "1410 bert/encoder/layer_11/attention/self/query/MatMul" [label="[]", style=solid]; -"1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" -> "1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; -"1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" -> "1404 bert/encoder/layer_11/attention/self/value/MatMul" [label="[]", style=solid]; -"1402 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" -> "1403 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; -"1403 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" -> "1404 bert/encoder/layer_11/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1396 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" -> "1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" [label="[]", style=dashed]; +"1397 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_1" -> "1404 bert/encoder/layer_11/attention/self/value/MatMul" [label="[]", style=solid]; +"1398 QuantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" -> "1399 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" [label="[768, 768]", style=dashed]; +"1399 DequantizeLinear_bert/encoder/layer_11/attention/self/value/kernel^0_1" -> "1404 bert/encoder/layer_11/attention/self/value/MatMul" [label="[768, 768]", style=solid]; +"1400 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" -> "1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" [label="[]", style=dashed]; +"1401 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_2" -> "1410 bert/encoder/layer_11/attention/self/query/MatMul" [label="[]", style=solid]; +"1402 QuantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" -> "1403 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" [label="[]", style=dashed]; +"1403 DequantizeLinear_bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1^0_3" -> "1418 bert/encoder/layer_11/attention/self/key/MatMul" [label="[]", style=solid]; "1404 bert/encoder/layer_11/attention/self/value/MatMul" -> "1405 bert/encoder/layer_11/attention/self/value/BiasAdd" [label="[]", style=solid]; "1405 bert/encoder/layer_11/attention/self/value/BiasAdd" -> "1406 bert/encoder/layer_11/attention/self/Reshape_2" [label="[]", style=solid]; "1406 bert/encoder/layer_11/attention/self/Reshape_2" -> "1407 bert/encoder/layer_11/attention/self/transpose_2" [label="[]", style=solid]; diff --git a/tests/onnx/data/reference_graphs/quantization/retinanet-9.dot b/tests/onnx/data/reference_graphs/quantization/retinanet-9.dot index 7aa64281d32..9d2f66780d5 100644 --- a/tests/onnx/data/reference_graphs/quantization/retinanet-9.dot +++ b/tests/onnx/data/reference_graphs/quantization/retinanet-9.dot @@ -855,10 +855,10 @@ strict digraph { "853 QuantizeLinear_backbones.ResNet101FPN.pyramid6.weight_1" [id=853, type=QuantizeLinear]; "854 DequantizeLinear_backbones.ResNet101FPN.pyramid6.weight_1" [id=854, type=DequantizeLinear]; "855 Conv_349" [id=855, type=Conv]; -"856 QuantizeLinear_1028_2" [id=856, type=QuantizeLinear]; -"857 DequantizeLinear_1028_2" [id=857, type=DequantizeLinear]; -"858 QuantizeLinear_1028_1" [id=858, type=QuantizeLinear]; -"859 DequantizeLinear_1028_1" [id=859, type=DequantizeLinear]; +"856 QuantizeLinear_1028_1" [id=856, type=QuantizeLinear]; +"857 DequantizeLinear_1028_1" [id=857, type=DequantizeLinear]; +"858 QuantizeLinear_1028_2" [id=858, type=QuantizeLinear]; +"859 DequantizeLinear_1028_2" [id=859, type=DequantizeLinear]; "860 Relu_350" [id=860, type=Relu]; "861 QuantizeLinear_1029_1" [id=861, type=QuantizeLinear]; "862 DequantizeLinear_1029_1" [id=862, type=DequantizeLinear]; @@ -1983,13 +1983,13 @@ strict digraph { "852 Add_348" -> "866 QuantizeLinear_1027_1" [label="[1, 256, 60, 80]", style=solid]; "853 QuantizeLinear_backbones.ResNet101FPN.pyramid6.weight_1" -> "854 DequantizeLinear_backbones.ResNet101FPN.pyramid6.weight_1" [label="[256, 2048, 3, 3]", style=dashed]; "854 DequantizeLinear_backbones.ResNet101FPN.pyramid6.weight_1" -> "855 Conv_349" [label="[256, 2048, 3, 3]", style=solid]; -"855 Conv_349" -> "856 QuantizeLinear_1028_2" [label="[1, 256, 8, 10]", style=solid]; -"855 Conv_349" -> "858 QuantizeLinear_1028_1" [label="[1, 256, 8, 10]", style=solid]; +"855 Conv_349" -> "856 QuantizeLinear_1028_1" [label="[1, 256, 8, 10]", style=solid]; +"855 Conv_349" -> "858 QuantizeLinear_1028_2" [label="[1, 256, 8, 10]", style=solid]; "855 Conv_349" -> "860 Relu_350" [label="[1, 256, 8, 10]", style=solid]; -"856 QuantizeLinear_1028_2" -> "857 DequantizeLinear_1028_2" [label="[1, 256, 8, 10]", style=dashed]; -"857 DequantizeLinear_1028_2" -> "1041 Conv_427" [label="[1, 256, 8, 10]", style=solid]; -"858 QuantizeLinear_1028_1" -> "859 DequantizeLinear_1028_1" [label="[1, 256, 8, 10]", style=dashed]; -"859 DequantizeLinear_1028_1" -> "944 Conv_382" [label="[1, 256, 8, 10]", style=solid]; +"856 QuantizeLinear_1028_1" -> "857 DequantizeLinear_1028_1" [label="[1, 256, 8, 10]", style=dashed]; +"857 DequantizeLinear_1028_1" -> "944 Conv_382" [label="[1, 256, 8, 10]", style=solid]; +"858 QuantizeLinear_1028_2" -> "859 DequantizeLinear_1028_2" [label="[1, 256, 8, 10]", style=dashed]; +"859 DequantizeLinear_1028_2" -> "1041 Conv_427" [label="[1, 256, 8, 10]", style=solid]; "860 Relu_350" -> "861 QuantizeLinear_1029_1" [label="[1, 256, 8, 10]", style=solid]; "861 QuantizeLinear_1029_1" -> "862 DequantizeLinear_1029_1" [label="[1, 256, 8, 10]", style=dashed]; "862 DequantizeLinear_1029_1" -> "865 Conv_351" [label="[1, 256, 8, 10]", style=solid];