add tf_utils

Signed-off-by: zehao-intel <[email protected]> add pre_optimize for sq Signed-off-by: zehao-intel <[email protected]> fix ut issues Signed-off-by: zehao-intel <[email protected]> fix disable_random Signed-off-by: zehao-intel <[email protected]> [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci fix pylint Signed-off-by: zehao-intel <[email protected]> fix import Signed-off-by: zehao-intel <[email protected]> [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci modify requirements.txt Signed-off-by: zehao-intel <[email protected]> [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci fix import Signed-off-by: zehao-intel <[email protected]> [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci
intel · Feb 2, 2024 · 60aadb7 · 60aadb7
1 parent 0181734
commit 60aadb7
Show file tree

Hide file tree

Showing 53 changed files with 6,875 additions and 65 deletions.
diff --git a/neural_compressor/tensorflow/algorithms/__init__.py b/neural_compressor/tensorflow/algorithms/__init__.py
@@ -14,4 +14,4 @@
 
 
 from neural_compressor.tensorflow.algorithms.smooth_quant import SmoothQuant
-from neural_compressor.tensorflow.algorithms.static_quant import KerasAdaptor
+from neural_compressor.tensorflow.algorithms.static_quant import KerasAdaptor
diff --git a/neural_compressor/tensorflow/algorithms/smooth_quant/smooth_quant.py b/neural_compressor/tensorflow/algorithms/smooth_quant/smooth_quant.py
@@ -21,7 +21,6 @@
 
 from neural_compressor.common import logger
 from neural_compressor.common.utils import DEFAULT_WORKSPACE
-from neural_compressor.tensorflow.quantization.config import SmoothQuantConfig
 from neural_compressor.tensorflow.algorithms.smooth_quant.smooth_quant_calibration import (
     SmoothQuantCalibration,
     SmoothQuantCalibrationLLM,
@@ -30,12 +29,9 @@
     SmoothQuantScaler,
     SmoothQuantScalerLLM,
 )
-from neural_compressor.tensorflow.utils import (
-    BaseModel, 
-    TensorflowLLMModel, 
-    SPR_BASE_VERSIONS,
-    framework_specific_info,
-)
+from neural_compressor.tensorflow.quantization.config import SmoothQuantConfig
+from neural_compressor.tensorflow.quantization.tf_utils.graph_util import GraphAnalyzer
+from neural_compressor.tensorflow.utils import SPR_BASE_VERSIONS, BaseModel, TensorflowLLMModel, framework_specific_info
 
 
 class SmoothQuant:
@@ -126,13 +122,13 @@ def apply_smooth_quant(self, model: BaseModel):
         logger.info("Start Smoothing process for Smooth Quantization.")
 
         # Do a pre-optimization before smooth quant
-        # from neural_compressor.tensorflow.quantization.tf_utils.graph_rewriter.generic.pre_optimize import (
-        #     PreOptimization,
-        # )
+        from neural_compressor.tensorflow.quantization.tf_utils.graph_rewriter.generic.pre_optimize import (
+            PreOptimization,
+        )
 
-        # pre_optimizer_handle = PreOptimization(model, self.new_api, self.device)
-        # pre_optimized_model = pre_optimizer_handle.get_optimized_model(self.itex_mode)
-        # model.graph_def = pre_optimized_model.graph_def
+        pre_optimizer_handle = PreOptimization(model, self.new_api, self.device)
+        pre_optimized_model = pre_optimizer_handle.get_optimized_model(self.itex_mode)
+        model.graph_def = pre_optimized_model.graph_def
 
         # Run calibration to get max values per channel
 
@@ -142,9 +138,7 @@ def apply_smooth_quant(self, model: BaseModel):
         max_vals_per_channel, sq_weight_node_names = calibration()
 
         # Get weight tensors and weight nodes based on the input tensor
-        sq_weight_tensors, sq_weights_nodes = self.get_weight_from_input_tensor(
-            model, max_vals_per_channel.keys(), self.op_types
-        )
+        sq_weight_tensors, sq_weights_nodes = self.get_weight_from_input_tensor(model, max_vals_per_channel.keys())
 
         # Calculate the smooth quant scaler and insert Mul op into the graph
         scaler = SmoothQuantScaler(model, self.calib_dataloader, self.alpha, self.scales_per_op)
@@ -157,13 +151,13 @@ def apply_smooth_quant(self, model: BaseModel):
     def apply_smooth_quant_LLM(self, model: BaseModel):
         """Apply smooth quant to the LLM model."""
         # Do a pre-optimization before smooth quant
-        # from neural_compressor.tensorflow.quantization.tf_utils.graph_rewriter.generic.pre_optimize import (
-        #     PreOptimization,
-        # )
+        from neural_compressor.tensorflow.quantization.tf_utils.graph_rewriter.generic.pre_optimize import (
+            PreOptimization,
+        )
 
-        # pre_optimizer_handle = PreOptimization(model, self.new_api, self.device)
-        # pre_optimized_model = pre_optimizer_handle.get_optimized_model(self.itex_mode)
-        # model.graph_def = pre_optimized_model.graph_def
+        pre_optimizer_handle = PreOptimization(model, self.new_api, self.device)
+        pre_optimized_model = pre_optimizer_handle.get_optimized_model(self.itex_mode)
+        model.graph_def = pre_optimized_model.graph_def
 
         llm_temp_dir = DEFAULT_WORKSPACE + "/temp_saved_model"
         # Run calibration to get max values per channel
@@ -199,8 +193,6 @@ def __call__(self, model: BaseModel):
         Returns:
             model: A smoothed Tensorflow model
         """
-        apply_func = (
-            self.apply_smooth_quant_LLM if isinstance(model, TensorflowLLMModel) else self.apply_smooth_quant
-        )
+        apply_func = self.apply_smooth_quant_LLM if isinstance(model, TensorflowLLMModel) else self.apply_smooth_quant
 
         return apply_func(model)
diff --git a/neural_compressor/tensorflow/algorithms/smooth_quant/smooth_quant_scaler.py b/neural_compressor/tensorflow/algorithms/smooth_quant/smooth_quant_scaler.py
@@ -57,7 +57,7 @@ def _adjust_activation(self, scale, input_node_name, output_node_name, w_i):
             output_node_name: the concrete output weight node name
             w_i: distinguish between different output weight nodes on different branches when naming
         """
-        from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper
+        from neural_compressor.tensorflow.quantization.tf_utils.graph_util import GraphRewriterHelper as Helper
 
         node_suffix = str(w_i)
         mul_const_node = Helper.create_constant_node(input_node_name + "/scale_mul" + node_suffix, scale, tf.float32)

diff --git a/neural_compressor/tensorflow/algorithms/static_quant/keras.py b/neural_compressor/tensorflow/algorithms/static_quant/keras.py
@@ -581,6 +581,8 @@ def query_fw_capability(self, model):
         Args:
             model (object): The model to query quantization tuning capability.
         """
+        if not isinstance(model, tf.keras.Model):
+            model = model.model
         fp32_config = {"weight": {"dtype": "fp32"}, "activation": {"dtype": "fp32"}}
         bf16_config = {"weight": {"dtype": "bf16"}, "activation": {"dtype": "bf16"}}
         int8_type = self.query_handler.get_op_types_by_precision(precision="int8")

diff --git a/neural_compressor/tensorflow/quantization/__init__.py b/neural_compressor/tensorflow/quantization/__init__.py
@@ -19,4 +19,4 @@
     SmoothQuantConfig,
     get_default_sq_config,
     get_default_static_quant_config,
-)
+)
diff --git a/neural_compressor/tensorflow/quantization/algorithm_entry.py b/neural_compressor/tensorflow/quantization/algorithm_entry.py
@@ -21,12 +21,7 @@
 from neural_compressor.tensorflow.algorithms import KerasAdaptor
 from neural_compressor.tensorflow.quantization.auto_tune import ParseKerasConfig
 from neural_compressor.tensorflow.quantization.config import SmoothQuantConfig, StaticQuantConfig
-from neural_compressor.tensorflow.utils import (
-    BaseModel, 
-    KerasModel,
-    register_algo, 
-    framework_specific_info,
-)
+from neural_compressor.tensorflow.utils import BaseModel, KerasModel, framework_specific_info, register_algo
 
 
 @register_algo(name=STATIC_QUANT)
@@ -68,6 +63,5 @@ def smooth_quant_entry(
 
     converter = SmoothQuant(smooth_quant_config, calib_dataloader, calib_iteration)
     sq_model = converter(model)
-
-    return sq_model
 
+    return sq_model
diff --git a/neural_compressor/tensorflow/quantization/auto_tune.py b/neural_compressor/tensorflow/quantization/auto_tune.py
@@ -12,20 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Callable, Dict
 from collections import OrderedDict
+from typing import Callable, Dict
 
 import tensorflow as tf
 
 from neural_compressor.tensorflow.quantization.config import StaticQuantConfig
 
+
 class ParseKerasConfig:
-    """The class that parse StaticQuantConfig to tunning config"""
+    """The class that parse StaticQuantConfig to tuning config."""
+
     support_int8_weight = {"Dense", "Conv2d", "DepthwiseConv2D", "SeparableConv2D"}
 
-    def __init__(self,
-                 quant_config: StaticQuantConfig, 
-                 calib_iteration: int):
+    def __init__(self, quant_config: StaticQuantConfig, calib_iteration: int):
         """Init parser for keras static quant config.
 
         Args:
@@ -37,10 +37,10 @@ def __init__(self,
 
     def update_config(self, quant_config, op_key):
         """Update op-wise config.
-        
-            Args:
-                quant_config: the keras static quant config.
-                op_key: a tuple such as (layer type, layer name).
+
+        Args:
+            quant_config: the keras static quant config.
+            op_key: a tuple such as (layer type, layer name).
         """
         op_value = {"activation": {}}
         op_value["activation"].update(
@@ -53,7 +53,8 @@ def update_config(self, quant_config, op_key):
             }
         )
         if op_key[1] not in self.support_int8_weight:
-            return
+            return op_value
+
         op_value["weight"] = {
             "dtype": quant_config.weight_dtype,
             "scheme": "sym" if quant_config.weight_sym else "asym",

diff --git a/neural_compressor/tensorflow/quantization/config.py b/neural_compressor/tensorflow/quantization/config.py
@@ -23,14 +23,14 @@
 import tensorflow as tf
 
 from neural_compressor.common import logger
-from neural_compressor.common.utils import SMOOTH_QUANT, STATIC_QUANT
 from neural_compressor.common.base_config import (
     DEFAULT_WHITE_LIST,
     OP_NAME_OR_MODULE_TYPE,
     BaseConfig,
     config_registry,
     register_config,
 )
+from neural_compressor.common.utils import SMOOTH_QUANT, STATIC_QUANT
 from neural_compressor.tensorflow.utils import DEFAULT_SQ_ALPHA_ARGS
 
 
@@ -145,6 +145,7 @@ def get_all_registered_configs() -> Dict[str, BaseConfig]:
     registered_configs = config_registry.get_cls_configs()
     return registered_configs.get("keras", {})
 
+
 def get_default_static_quant_config() -> StaticQuantConfig:
     """Generate the default static quant config.
 

diff --git a/neural_compressor/tensorflow/quantization/tf_utils/__init__.py b/neural_compressor/tensorflow/quantization/tf_utils/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/neural_compressor/tensorflow/quantization/tf_utils/graph_rewriter/generic/__init__.py b/neural_compressor/tensorflow/quantization/tf_utils/graph_rewriter/generic/__init__.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tensorflow Generic Graph Rewriters."""
diff --git a/...pressor/tensorflow/quantization/tf_utils/graph_rewriter/generic/convert_add_to_biasadd.py b/...pressor/tensorflow/quantization/tf_utils/graph_rewriter/generic/convert_add_to_biasadd.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Convert Add OP to BiasAdd OP Graph Rewriter."""
+
+import numpy as np
+from tensorflow.python.framework import dtypes, tensor_util
+
+from neural_compressor.tensorflow.quantization.tf_utils.graph_util import GraphAnalyzer
+from neural_compressor.tensorflow.quantization.tf_utils.graph_util import GraphRewriterHelper as Helper
+from neural_compressor.tensorflow.quantization.tf_utils.util import TF_SPR_BASE_VERSIONS
+from neural_compressor.tensorflow.utils import dump_elapsed_time
+
+from ..graph_base import GraphRewriterBase
+
+
+class ConvertAddToBiasAddOptimizer(GraphRewriterBase):
+    """Convert MatMul/Conv2D + Add(AddV2) to MatMul + BiasAdd."""
+
+    @dump_elapsed_time("Pass ConvertAddToBiasAddOptimizer")
+    def do_transformation(self):
+        """Execute conversion Add to BiasAdd."""
+        g = GraphAnalyzer()
+        g.graph = self.model
+        graph_info = g.parse_graph()
+
+        import tensorflow as tf
+
+        if tf.version.VERSION not in TF_SPR_BASE_VERSIONS:
+            target_nodes = g.query_fusion_pattern_nodes([["MatMul", "Conv2D"], ["Add", "AddV2"]])
+        else:
+            target_nodes = g.query_fusion_pattern_nodes([["MatMul"], ["Add", "AddV2"]])
+        for i in target_nodes:
+            successor_node_names = graph_info[i[1]].outputs
+            matmul_input_name = graph_info[i[0]].node.input[0]
+            matmul_input_node = graph_info[Helper.node_name_from_input(matmul_input_name)].node
+            # Fixme below two lines was added due to MatMul kernel limitation for matmul input type
+            # should be quint8.
+            if matmul_input_node.op == "Const":
+                continue
+            add_second_input_name = graph_info[i[1]].node.input[1]
+            add_second_const_node = graph_info[add_second_input_name].node
+            if add_second_const_node.op != "Const":
+                continue
+            bias_tensor = tensor_util.MakeNdarray(add_second_const_node.attr["value"].tensor)
+
+            if bias_tensor.ndim > 2:
+                continue
+
+            new_bias_tensor = np.ravel(bias_tensor)
+
+            g.remove_node(i[1])
+
+            bias_node_name = i[1]
+            bias_const_node_name = add_second_const_node.name + "_flattern"
+
+            bias_const_node = Helper.create_constant_node(bias_const_node_name, new_bias_tensor, dtypes.float32)
+
+            bias_node = Helper.create_node("BiasAdd", bias_node_name, [i[0], bias_const_node_name])
+            Helper.set_attr_dtype(bias_node, "T", dtypes.float32)
+
+            g.add_node(bias_const_node, None, [bias_node_name])
+            g.replace_single_node(bias_node, [i[0]], i[1], successor_node_names, i[1])
+
+        return g.dump_graph()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -14,4 +14,4 @@


		from neural_compressor.tensorflow.algorithms.smooth_quant import SmoothQuant
		from neural_compressor.tensorflow.algorithms.static_quant import KerasAdaptor
		from neural_compressor.tensorflow.algorithms.static_quant import KerasAdaptor