Skip to content

Commit

Permalink
[ONNX] Create mapping once inside Q/DQ insertion transformation (#2185)
Browse files Browse the repository at this point in the history
### Changes

Create mappings for insert quantizers transformation once.
The new approach relies on the assumption that there are no quantizers
that should inserted on the same branch.
This assumption is aligned with MinMaxAlgorithm.

### Reason for changes

Improve performance of Insert Quantizers transformation.


### Related tickets

Continue of #2173

### Tests

Performance speed up measured locally

Model | PR time | develop | SpeedUp
-- | -- | -- | --
swin | 65.935 | 67.925 | 3.02%
visformer | 49.854 | 50.771 | 1.84%
deit3 | 35.53 | 36.156 | 1.8%
  • Loading branch information
kshpv authored Oct 11, 2023
1 parent 8c05883 commit 9e42617
Show file tree
Hide file tree
Showing 4 changed files with 293 additions and 291 deletions.
8 changes: 5 additions & 3 deletions nncf/onnx/graph/model_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,10 @@ def _apply_quantizer_insertion_transformations(
:return: New model with inserted QuantizeLinear-DequantizeLinear nodes pairs.
"""
self._added_target_edges = Counter()
node_mapping = get_name_to_node_map(model)
children_node_mapping = get_children_node_mapping(model)
for transformation in transformations:
children_node_mapping = get_children_node_mapping(model)
model = self._insert_quantizer_dequantizer(model, transformation, children_node_mapping)
model = self._insert_quantizer_dequantizer(model, transformation, node_mapping, children_node_mapping)
return model

def _get_quantize_dequantize_nodes(
Expand Down Expand Up @@ -303,17 +304,18 @@ def _insert_quantizer_dequantizer(
self,
model: onnx.ModelProto,
transformation: ONNXQuantizerInsertionCommand,
node_mapping: Dict[str, onnx.NodeProto],
children_node_mapping: Dict[str, List[onnx.ValueInfoProto]],
) -> onnx.ModelProto:
"""
Inserts QuantizeLinear-DequantizeLinear nodes pair.
:param model: Model to insert new nodes.
:param transformation: QuantizeLinear-DequantizeLinear insertion transformation.
:param node_mapping: Mapping from node name to the node.
:param children_node_mapping: Mapping from edge name to nodes which consume this edge as an input.
:return: Updated model with inserted QuantizeLinear-DequantizeLinear pair.
"""
node_mapping = get_name_to_node_map(model)
target_edge_name = self._get_quantizer_dequantizer_edge_name(transformation, node_mapping)
quantizer, dequantizer = self._get_quantize_dequantize_nodes(transformation, target_edge_name)
onnx_scale_tensor, onnx_zero_point_tensor = ONNXModelTransformer._get_scale_zero_point_tensors(
Expand Down
100 changes: 50 additions & 50 deletions tests/onnx/data/reference_graphs/quantization/MaskRCNN-12.dot
Original file line number Diff line number Diff line change
Expand Up @@ -481,10 +481,10 @@ strict digraph {
"479 QuantizeLinear_389_1" [id=479, type=QuantizeLinear];
"480 DequantizeLinear_389_1" [id=480, type=DequantizeLinear];
"481 390" [id=481, type=Conv];
"482 QuantizeLinear_391_2" [id=482, type=QuantizeLinear];
"483 DequantizeLinear_391_2" [id=483, type=DequantizeLinear];
"484 QuantizeLinear_391_1" [id=484, type=QuantizeLinear];
"485 DequantizeLinear_391_1" [id=485, type=DequantizeLinear];
"482 QuantizeLinear_391_1" [id=482, type=QuantizeLinear];
"483 DequantizeLinear_391_1" [id=483, type=DequantizeLinear];
"484 QuantizeLinear_391_2" [id=484, type=QuantizeLinear];
"485 DequantizeLinear_391_2" [id=485, type=DequantizeLinear];
"486 487" [id=486, type=MaxPool];
"487 QuantizeLinear_489_1" [id=487, type=QuantizeLinear];
"488 DequantizeLinear_489_1" [id=488, type=DequantizeLinear];
Expand Down Expand Up @@ -1749,14 +1749,14 @@ strict digraph {
"1747 1172" [id=1747, type=Gather];
"1748 2479" [id=1748, type=Concat];
"1749 2490" [id=1749, type=Gather];
"1750 QuantizeLinear_2527_4" [id=1750, type=QuantizeLinear];
"1751 DequantizeLinear_2527_4" [id=1751, type=DequantizeLinear];
"1752 QuantizeLinear_2527_3" [id=1752, type=QuantizeLinear];
"1753 DequantizeLinear_2527_3" [id=1753, type=DequantizeLinear];
"1754 QuantizeLinear_2527_2" [id=1754, type=QuantizeLinear];
"1755 DequantizeLinear_2527_2" [id=1755, type=DequantizeLinear];
"1756 QuantizeLinear_2527_1" [id=1756, type=QuantizeLinear];
"1757 DequantizeLinear_2527_1" [id=1757, type=DequantizeLinear];
"1750 QuantizeLinear_2527_1" [id=1750, type=QuantizeLinear];
"1751 DequantizeLinear_2527_1" [id=1751, type=DequantizeLinear];
"1752 QuantizeLinear_2527_2" [id=1752, type=QuantizeLinear];
"1753 DequantizeLinear_2527_2" [id=1753, type=DequantizeLinear];
"1754 QuantizeLinear_2527_3" [id=1754, type=QuantizeLinear];
"1755 DequantizeLinear_2527_3" [id=1755, type=DequantizeLinear];
"1756 QuantizeLinear_2527_4" [id=1756, type=QuantizeLinear];
"1757 DequantizeLinear_2527_4" [id=1757, type=DequantizeLinear];
"1758 2532" [id=1758, type=Slice];
"1759 2534" [id=1759, type=Gather];
"1760 2525" [id=1760, type=Slice];
Expand Down Expand Up @@ -3692,14 +3692,14 @@ strict digraph {
"3690 3030" [id=3690, type=Gather];
"3691 6518" [id=3691, type=Concat];
"3692 6530" [id=3692, type=Gather];
"3693 QuantizeLinear_6568_4" [id=3693, type=QuantizeLinear];
"3694 DequantizeLinear_6568_4" [id=3694, type=DequantizeLinear];
"3695 QuantizeLinear_6568_3" [id=3695, type=QuantizeLinear];
"3696 DequantizeLinear_6568_3" [id=3696, type=DequantizeLinear];
"3697 QuantizeLinear_6568_2" [id=3697, type=QuantizeLinear];
"3698 DequantizeLinear_6568_2" [id=3698, type=DequantizeLinear];
"3699 QuantizeLinear_6568_1" [id=3699, type=QuantizeLinear];
"3700 DequantizeLinear_6568_1" [id=3700, type=DequantizeLinear];
"3693 QuantizeLinear_6568_1" [id=3693, type=QuantizeLinear];
"3694 DequantizeLinear_6568_1" [id=3694, type=DequantizeLinear];
"3695 QuantizeLinear_6568_2" [id=3695, type=QuantizeLinear];
"3696 DequantizeLinear_6568_2" [id=3696, type=DequantizeLinear];
"3697 QuantizeLinear_6568_3" [id=3697, type=QuantizeLinear];
"3698 DequantizeLinear_6568_3" [id=3698, type=DequantizeLinear];
"3699 QuantizeLinear_6568_4" [id=3699, type=QuantizeLinear];
"3700 DequantizeLinear_6568_4" [id=3700, type=DequantizeLinear];
"3701 6576" [id=3701, type=Slice];
"3702 6578" [id=3702, type=Gather];
"3703 6569" [id=3703, type=Slice];
Expand Down Expand Up @@ -4788,16 +4788,16 @@ strict digraph {
"478 DequantizeLinear_388_1" -> "481 390" [label="[1, 256, -1, -1]", style=solid];
"479 QuantizeLinear_389_1" -> "480 DequantizeLinear_389_1" [label="[256, 256, 3, 3]", style=dashed];
"480 DequantizeLinear_389_1" -> "481 390" [label="[256, 256, 3, 3]", style=solid];
"481 390" -> "482 QuantizeLinear_391_2" [label="[1, 256, -1, -1]", style=solid];
"481 390" -> "484 QuantizeLinear_391_1" [label="[1, 256, -1, -1]", style=solid];
"481 390" -> "482 QuantizeLinear_391_1" [label="[1, 256, -1, -1]", style=solid];
"481 390" -> "484 QuantizeLinear_391_2" [label="[1, 256, -1, -1]", style=solid];
"481 390" -> "784 536" [label="[1, 256, -1, -1]", style=solid];
"481 390" -> "787 533" [label="[1, 256, -1, -1]", style=solid];
"481 390" -> "1929 2620" [label="[1, 256, -1, -1]", style=solid];
"481 390" -> "3872 6664" [label="[1, 256, -1, -1]", style=solid];
"482 QuantizeLinear_391_2" -> "483 DequantizeLinear_391_2" [label="[1, 256, -1, -1]", style=dashed];
"483 DequantizeLinear_391_2" -> "722 506" [label="[1, 256, -1, -1]", style=solid];
"484 QuantizeLinear_391_1" -> "485 DequantizeLinear_391_1" [label="[1, 256, -1, -1]", style=dashed];
"485 DequantizeLinear_391_1" -> "486 487" [label="[1, 256, -1, -1]", style=solid];
"482 QuantizeLinear_391_1" -> "483 DequantizeLinear_391_1" [label="[1, 256, -1, -1]", style=dashed];
"483 DequantizeLinear_391_1" -> "486 487" [label="[1, 256, -1, -1]", style=solid];
"484 QuantizeLinear_391_2" -> "485 DequantizeLinear_391_2" [label="[1, 256, -1, -1]", style=dashed];
"485 DequantizeLinear_391_2" -> "722 506" [label="[1, 256, -1, -1]", style=solid];
"486 487" -> "489 510" [label="[1, 256, -1, -1]", style=solid];
"486 487" -> "555 542" [label="[1, 256, -1, -1]", style=solid];
"486 487" -> "558 539" [label="[1, 256, -1, -1]", style=solid];
Expand Down Expand Up @@ -6341,21 +6341,21 @@ strict digraph {
"1746 1171" -> "1747 1172" [label="[-1]", style=dashed];
"1747 1172" -> "1748 2479" [label="[-1, 4]", style=solid];
"1748 2479" -> "1749 2490" [label="[-1, 4]", style=solid];
"1749 2490" -> "1750 QuantizeLinear_2527_4" [label="[]", style=solid];
"1749 2490" -> "1752 QuantizeLinear_2527_3" [label="[]", style=solid];
"1749 2490" -> "1754 QuantizeLinear_2527_2" [label="[]", style=solid];
"1749 2490" -> "1756 QuantizeLinear_2527_1" [label="[]", style=solid];
"1749 2490" -> "1750 QuantizeLinear_2527_1" [label="[]", style=solid];
"1749 2490" -> "1752 QuantizeLinear_2527_2" [label="[]", style=solid];
"1749 2490" -> "1754 QuantizeLinear_2527_3" [label="[]", style=solid];
"1749 2490" -> "1756 QuantizeLinear_2527_4" [label="[]", style=solid];
"1749 2490" -> "1803 2495" [label="[]", style=solid];
"1749 2490" -> "1807 2503" [label="[]", style=solid];
"1749 2490" -> "2009 2775" [label="[]", style=solid];
"1750 QuantizeLinear_2527_4" -> "1751 DequantizeLinear_2527_4" [label="[]", style=dashed];
"1751 DequantizeLinear_2527_4" -> "1768 2508" [label="[]", style=solid];
"1752 QuantizeLinear_2527_3" -> "1753 DequantizeLinear_2527_3" [label="[]", style=dashed];
"1753 DequantizeLinear_2527_3" -> "1766 2515" [label="[]", style=solid];
"1754 QuantizeLinear_2527_2" -> "1755 DequantizeLinear_2527_2" [label="[]", style=dashed];
"1755 DequantizeLinear_2527_2" -> "1760 2525" [label="[]", style=solid];
"1756 QuantizeLinear_2527_1" -> "1757 DequantizeLinear_2527_1" [label="[]", style=dashed];
"1757 DequantizeLinear_2527_1" -> "1758 2532" [label="[]", style=solid];
"1750 QuantizeLinear_2527_1" -> "1751 DequantizeLinear_2527_1" [label="[]", style=dashed];
"1751 DequantizeLinear_2527_1" -> "1758 2532" [label="[]", style=solid];
"1752 QuantizeLinear_2527_2" -> "1753 DequantizeLinear_2527_2" [label="[]", style=dashed];
"1753 DequantizeLinear_2527_2" -> "1760 2525" [label="[]", style=solid];
"1754 QuantizeLinear_2527_3" -> "1755 DequantizeLinear_2527_3" [label="[]", style=dashed];
"1755 DequantizeLinear_2527_3" -> "1766 2515" [label="[]", style=solid];
"1756 QuantizeLinear_2527_4" -> "1757 DequantizeLinear_2527_4" [label="[]", style=dashed];
"1757 DequantizeLinear_2527_4" -> "1768 2508" [label="[]", style=solid];
"1758 2532" -> "1759 2534" [label="[]", style=solid];
"1759 2534" -> "1762 2535" [label="[]", style=solid];
"1760 2525" -> "1761 2527" [label="[]", style=solid];
Expand Down Expand Up @@ -8988,21 +8988,21 @@ strict digraph {
"3690 3030" -> "3691 6518" [label="[]", style=solid];
"3690 3030" -> "4259 3037" [label="[]", style=solid];
"3691 6518" -> "3692 6530" [label="[]", style=solid];
"3692 6530" -> "3693 QuantizeLinear_6568_4" [label="[-1, 4]", style=solid];
"3692 6530" -> "3695 QuantizeLinear_6568_3" [label="[-1, 4]", style=solid];
"3692 6530" -> "3697 QuantizeLinear_6568_2" [label="[-1, 4]", style=solid];
"3692 6530" -> "3699 QuantizeLinear_6568_1" [label="[-1, 4]", style=solid];
"3692 6530" -> "3693 QuantizeLinear_6568_1" [label="[-1, 4]", style=solid];
"3692 6530" -> "3695 QuantizeLinear_6568_2" [label="[-1, 4]", style=solid];
"3692 6530" -> "3697 QuantizeLinear_6568_3" [label="[-1, 4]", style=solid];
"3692 6530" -> "3699 QuantizeLinear_6568_4" [label="[-1, 4]", style=solid];
"3692 6530" -> "3746 6539" [label="[-1, 4]", style=solid];
"3692 6530" -> "3750 6547" [label="[-1, 4]", style=solid];
"3692 6530" -> "4281 nncf_model_output_0" [label="[-1, 4]", style=solid];
"3693 QuantizeLinear_6568_4" -> "3694 DequantizeLinear_6568_4" [label="[-1, 4]", style=dashed];
"3694 DequantizeLinear_6568_4" -> "3711 6552" [label="[-1, 4]", style=solid];
"3695 QuantizeLinear_6568_3" -> "3696 DequantizeLinear_6568_3" [label="[-1, 4]", style=dashed];
"3696 DequantizeLinear_6568_3" -> "3709 6559" [label="[-1, 4]", style=solid];
"3697 QuantizeLinear_6568_2" -> "3698 DequantizeLinear_6568_2" [label="[-1, 4]", style=dashed];
"3698 DequantizeLinear_6568_2" -> "3703 6569" [label="[-1, 4]", style=solid];
"3699 QuantizeLinear_6568_1" -> "3700 DequantizeLinear_6568_1" [label="[-1, 4]", style=dashed];
"3700 DequantizeLinear_6568_1" -> "3701 6576" [label="[-1, 4]", style=solid];
"3693 QuantizeLinear_6568_1" -> "3694 DequantizeLinear_6568_1" [label="[-1, 4]", style=dashed];
"3694 DequantizeLinear_6568_1" -> "3701 6576" [label="[-1, 4]", style=solid];
"3695 QuantizeLinear_6568_2" -> "3696 DequantizeLinear_6568_2" [label="[-1, 4]", style=dashed];
"3696 DequantizeLinear_6568_2" -> "3703 6569" [label="[-1, 4]", style=solid];
"3697 QuantizeLinear_6568_3" -> "3698 DequantizeLinear_6568_3" [label="[-1, 4]", style=dashed];
"3698 DequantizeLinear_6568_3" -> "3709 6559" [label="[-1, 4]", style=solid];
"3699 QuantizeLinear_6568_4" -> "3700 DequantizeLinear_6568_4" [label="[-1, 4]", style=dashed];
"3700 DequantizeLinear_6568_4" -> "3711 6552" [label="[-1, 4]", style=solid];
"3701 6576" -> "3702 6578" [label="[-1, 4]", style=solid];
"3702 6578" -> "3705 6579" [label="[-1]", style=solid];
"3703 6569" -> "3704 6571" [label="[-1, 4]", style=solid];
Expand Down
Loading

0 comments on commit 9e42617

Please sign in to comment.