Skip to content

Commit

Permalink
add tf_utils
Browse files Browse the repository at this point in the history
Signed-off-by: zehao-intel <[email protected]>

add pre_optimize for sq

Signed-off-by: zehao-intel <[email protected]>

fix ut issues

Signed-off-by: zehao-intel <[email protected]>

fix disable_random

Signed-off-by: zehao-intel <[email protected]>

[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

fix pylint

Signed-off-by: zehao-intel <[email protected]>

fix import

Signed-off-by: zehao-intel <[email protected]>

[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

modify requirements.txt

Signed-off-by: zehao-intel <[email protected]>

[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

fix import

Signed-off-by: zehao-intel <[email protected]>

[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
  • Loading branch information
zehao-intel committed Feb 2, 2024
1 parent 0181734 commit 60aadb7
Show file tree
Hide file tree
Showing 53 changed files with 6,875 additions and 65 deletions.
2 changes: 1 addition & 1 deletion neural_compressor/tensorflow/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@


from neural_compressor.tensorflow.algorithms.smooth_quant import SmoothQuant
from neural_compressor.tensorflow.algorithms.static_quant import KerasAdaptor
from neural_compressor.tensorflow.algorithms.static_quant import KerasAdaptor
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

from neural_compressor.common import logger
from neural_compressor.common.utils import DEFAULT_WORKSPACE
from neural_compressor.tensorflow.quantization.config import SmoothQuantConfig
from neural_compressor.tensorflow.algorithms.smooth_quant.smooth_quant_calibration import (
SmoothQuantCalibration,
SmoothQuantCalibrationLLM,
Expand All @@ -30,12 +29,9 @@
SmoothQuantScaler,
SmoothQuantScalerLLM,
)
from neural_compressor.tensorflow.utils import (
BaseModel,
TensorflowLLMModel,
SPR_BASE_VERSIONS,
framework_specific_info,
)
from neural_compressor.tensorflow.quantization.config import SmoothQuantConfig
from neural_compressor.tensorflow.quantization.tf_utils.graph_util import GraphAnalyzer
from neural_compressor.tensorflow.utils import SPR_BASE_VERSIONS, BaseModel, TensorflowLLMModel, framework_specific_info


class SmoothQuant:
Expand Down Expand Up @@ -126,13 +122,13 @@ def apply_smooth_quant(self, model: BaseModel):
logger.info("Start Smoothing process for Smooth Quantization.")

# Do a pre-optimization before smooth quant
# from neural_compressor.tensorflow.quantization.tf_utils.graph_rewriter.generic.pre_optimize import (
# PreOptimization,
# )
from neural_compressor.tensorflow.quantization.tf_utils.graph_rewriter.generic.pre_optimize import (
PreOptimization,
)

# pre_optimizer_handle = PreOptimization(model, self.new_api, self.device)
# pre_optimized_model = pre_optimizer_handle.get_optimized_model(self.itex_mode)
# model.graph_def = pre_optimized_model.graph_def
pre_optimizer_handle = PreOptimization(model, self.new_api, self.device)
pre_optimized_model = pre_optimizer_handle.get_optimized_model(self.itex_mode)
model.graph_def = pre_optimized_model.graph_def

# Run calibration to get max values per channel

Expand All @@ -142,9 +138,7 @@ def apply_smooth_quant(self, model: BaseModel):
max_vals_per_channel, sq_weight_node_names = calibration()

# Get weight tensors and weight nodes based on the input tensor
sq_weight_tensors, sq_weights_nodes = self.get_weight_from_input_tensor(
model, max_vals_per_channel.keys(), self.op_types
)
sq_weight_tensors, sq_weights_nodes = self.get_weight_from_input_tensor(model, max_vals_per_channel.keys())

# Calculate the smooth quant scaler and insert Mul op into the graph
scaler = SmoothQuantScaler(model, self.calib_dataloader, self.alpha, self.scales_per_op)
Expand All @@ -157,13 +151,13 @@ def apply_smooth_quant(self, model: BaseModel):
def apply_smooth_quant_LLM(self, model: BaseModel):
"""Apply smooth quant to the LLM model."""
# Do a pre-optimization before smooth quant
# from neural_compressor.tensorflow.quantization.tf_utils.graph_rewriter.generic.pre_optimize import (
# PreOptimization,
# )
from neural_compressor.tensorflow.quantization.tf_utils.graph_rewriter.generic.pre_optimize import (
PreOptimization,
)

# pre_optimizer_handle = PreOptimization(model, self.new_api, self.device)
# pre_optimized_model = pre_optimizer_handle.get_optimized_model(self.itex_mode)
# model.graph_def = pre_optimized_model.graph_def
pre_optimizer_handle = PreOptimization(model, self.new_api, self.device)
pre_optimized_model = pre_optimizer_handle.get_optimized_model(self.itex_mode)
model.graph_def = pre_optimized_model.graph_def

llm_temp_dir = DEFAULT_WORKSPACE + "/temp_saved_model"
# Run calibration to get max values per channel
Expand Down Expand Up @@ -199,8 +193,6 @@ def __call__(self, model: BaseModel):
Returns:
model: A smoothed Tensorflow model
"""
apply_func = (
self.apply_smooth_quant_LLM if isinstance(model, TensorflowLLMModel) else self.apply_smooth_quant
)
apply_func = self.apply_smooth_quant_LLM if isinstance(model, TensorflowLLMModel) else self.apply_smooth_quant

return apply_func(model)
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def _adjust_activation(self, scale, input_node_name, output_node_name, w_i):
output_node_name: the concrete output weight node name
w_i: distinguish between different output weight nodes on different branches when naming
"""
from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper
from neural_compressor.tensorflow.quantization.tf_utils.graph_util import GraphRewriterHelper as Helper

node_suffix = str(w_i)
mul_const_node = Helper.create_constant_node(input_node_name + "/scale_mul" + node_suffix, scale, tf.float32)
Expand Down
2 changes: 2 additions & 0 deletions neural_compressor/tensorflow/algorithms/static_quant/keras.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,8 @@ def query_fw_capability(self, model):
Args:
model (object): The model to query quantization tuning capability.
"""
if not isinstance(model, tf.keras.Model):
model = model.model
fp32_config = {"weight": {"dtype": "fp32"}, "activation": {"dtype": "fp32"}}
bf16_config = {"weight": {"dtype": "bf16"}, "activation": {"dtype": "bf16"}}
int8_type = self.query_handler.get_op_types_by_precision(precision="int8")
Expand Down
2 changes: 1 addition & 1 deletion neural_compressor/tensorflow/quantization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@
SmoothQuantConfig,
get_default_sq_config,
get_default_static_quant_config,
)
)
10 changes: 2 additions & 8 deletions neural_compressor/tensorflow/quantization/algorithm_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,7 @@
from neural_compressor.tensorflow.algorithms import KerasAdaptor
from neural_compressor.tensorflow.quantization.auto_tune import ParseKerasConfig
from neural_compressor.tensorflow.quantization.config import SmoothQuantConfig, StaticQuantConfig
from neural_compressor.tensorflow.utils import (
BaseModel,
KerasModel,
register_algo,
framework_specific_info,
)
from neural_compressor.tensorflow.utils import BaseModel, KerasModel, framework_specific_info, register_algo


@register_algo(name=STATIC_QUANT)
Expand Down Expand Up @@ -68,6 +63,5 @@ def smooth_quant_entry(

converter = SmoothQuant(smooth_quant_config, calib_dataloader, calib_iteration)
sq_model = converter(model)

return sq_model

return sq_model
21 changes: 11 additions & 10 deletions neural_compressor/tensorflow/quantization/auto_tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Callable, Dict
from collections import OrderedDict
from typing import Callable, Dict

import tensorflow as tf

from neural_compressor.tensorflow.quantization.config import StaticQuantConfig


class ParseKerasConfig:
"""The class that parse StaticQuantConfig to tunning config"""
"""The class that parse StaticQuantConfig to tuning config."""

support_int8_weight = {"Dense", "Conv2d", "DepthwiseConv2D", "SeparableConv2D"}

def __init__(self,
quant_config: StaticQuantConfig,
calib_iteration: int):
def __init__(self, quant_config: StaticQuantConfig, calib_iteration: int):
"""Init parser for keras static quant config.
Args:
Expand All @@ -37,10 +37,10 @@ def __init__(self,

def update_config(self, quant_config, op_key):
"""Update op-wise config.
Args:
quant_config: the keras static quant config.
op_key: a tuple such as (layer type, layer name).
Args:
quant_config: the keras static quant config.
op_key: a tuple such as (layer type, layer name).
"""
op_value = {"activation": {}}
op_value["activation"].update(
Expand All @@ -53,7 +53,8 @@ def update_config(self, quant_config, op_key):
}
)
if op_key[1] not in self.support_int8_weight:
return
return op_value

op_value["weight"] = {
"dtype": quant_config.weight_dtype,
"scheme": "sym" if quant_config.weight_sym else "asym",
Expand Down
3 changes: 2 additions & 1 deletion neural_compressor/tensorflow/quantization/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@
import tensorflow as tf

from neural_compressor.common import logger
from neural_compressor.common.utils import SMOOTH_QUANT, STATIC_QUANT
from neural_compressor.common.base_config import (
DEFAULT_WHITE_LIST,
OP_NAME_OR_MODULE_TYPE,
BaseConfig,
config_registry,
register_config,
)
from neural_compressor.common.utils import SMOOTH_QUANT, STATIC_QUANT
from neural_compressor.tensorflow.utils import DEFAULT_SQ_ALPHA_ARGS


Expand Down Expand Up @@ -145,6 +145,7 @@ def get_all_registered_configs() -> Dict[str, BaseConfig]:
registered_configs = config_registry.get_cls_configs()
return registered_configs.get("keras", {})


def get_default_static_quant_config() -> StaticQuantConfig:
"""Generate the default static quant config.
Expand Down
13 changes: 13 additions & 0 deletions neural_compressor/tensorflow/quantization/tf_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2021 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tensorflow Generic Graph Rewriters."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2021 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Convert Add OP to BiasAdd OP Graph Rewriter."""

import numpy as np
from tensorflow.python.framework import dtypes, tensor_util

from neural_compressor.tensorflow.quantization.tf_utils.graph_util import GraphAnalyzer
from neural_compressor.tensorflow.quantization.tf_utils.graph_util import GraphRewriterHelper as Helper
from neural_compressor.tensorflow.quantization.tf_utils.util import TF_SPR_BASE_VERSIONS
from neural_compressor.tensorflow.utils import dump_elapsed_time

from ..graph_base import GraphRewriterBase


class ConvertAddToBiasAddOptimizer(GraphRewriterBase):
"""Convert MatMul/Conv2D + Add(AddV2) to MatMul + BiasAdd."""

@dump_elapsed_time("Pass ConvertAddToBiasAddOptimizer")
def do_transformation(self):
"""Execute conversion Add to BiasAdd."""
g = GraphAnalyzer()
g.graph = self.model
graph_info = g.parse_graph()

import tensorflow as tf

if tf.version.VERSION not in TF_SPR_BASE_VERSIONS:
target_nodes = g.query_fusion_pattern_nodes([["MatMul", "Conv2D"], ["Add", "AddV2"]])
else:
target_nodes = g.query_fusion_pattern_nodes([["MatMul"], ["Add", "AddV2"]])
for i in target_nodes:
successor_node_names = graph_info[i[1]].outputs
matmul_input_name = graph_info[i[0]].node.input[0]
matmul_input_node = graph_info[Helper.node_name_from_input(matmul_input_name)].node
# Fixme below two lines was added due to MatMul kernel limitation for matmul input type
# should be quint8.
if matmul_input_node.op == "Const":
continue
add_second_input_name = graph_info[i[1]].node.input[1]
add_second_const_node = graph_info[add_second_input_name].node
if add_second_const_node.op != "Const":
continue
bias_tensor = tensor_util.MakeNdarray(add_second_const_node.attr["value"].tensor)

if bias_tensor.ndim > 2:
continue

new_bias_tensor = np.ravel(bias_tensor)

g.remove_node(i[1])

bias_node_name = i[1]
bias_const_node_name = add_second_const_node.name + "_flattern"

bias_const_node = Helper.create_constant_node(bias_const_node_name, new_bias_tensor, dtypes.float32)

bias_node = Helper.create_node("BiasAdd", bias_node_name, [i[0], bias_const_node_name])
Helper.set_attr_dtype(bias_node, "T", dtypes.float32)

g.add_node(bias_const_node, None, [bias_node_name])
g.replace_single_node(bias_node, [i[0]], i[1], successor_node_names, i[1])

return g.dump_graph()
Loading

0 comments on commit 60aadb7

Please sign in to comment.