Skip to content

Commit

Permalink
add docstring for static quant and smooth quant (#1936)
Browse files Browse the repository at this point in the history
* add docstring for static quant and smooth quant

Signed-off-by: violetch24 <[email protected]>

* format fix

Signed-off-by: violetch24 <[email protected]>

* update scan path

Signed-off-by: violetch24 <[email protected]>

* Update utility.py

---------

Signed-off-by: violetch24 <[email protected]>
Co-authored-by: violetch24 <[email protected]>
  • Loading branch information
violetch24 and violetch24 authored Jul 19, 2024
1 parent 296c5d4 commit 1ebf698
Show file tree
Hide file tree
Showing 10 changed files with 783 additions and 175 deletions.
4 changes: 3 additions & 1 deletion .azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
/neural-compressor/neural_compressor/strategy
/neural-compressor/neural_compressor/training.py
/neural-compressor/neural_compressor/utils
/neural-compressor/neural_compressor/torch/algorithms/static_quant
/neural-compressor/neural_compressor/torch/algorithms/smooth_quant
/neural_compressor/torch/algorithms/pt2e_quant
/neural_compressor/torch/export
/neural_compressor/common
/neural_compressor/torch/algorithms/weight_only/hqq
/neural_compressor/torch/algorithms/weight_only/hqq
2 changes: 2 additions & 0 deletions neural_compressor/torch/algorithms/smooth_quant/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The SmoothQuant-related modules."""


from .utility import *
from .smooth_quant import SmoothQuantQuantizer
Expand Down
4 changes: 3 additions & 1 deletion neural_compressor/torch/algorithms/smooth_quant/save_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Save and load the quantized model."""


# pylint:disable=import-error
import torch
Expand All @@ -32,7 +34,7 @@ def recover_model_from_json(model, json_file_path, example_inputs): # pragma: n
example_inputs (tuple or torch.Tensor or dict): example inputs that will be passed to the ipex function.
Returns:
(object): quantized model
model (object): quantized model
"""
from torch.ao.quantization.observer import MinMaxObserver

Expand Down
62 changes: 45 additions & 17 deletions neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The quantizer using SmoothQuant path."""


import json
import os
Expand Down Expand Up @@ -49,6 +51,8 @@


class SmoothQuantQuantizer(Quantizer):
"""SmoothQuantQuantizer Class."""

def __init__(self, quant_config: OrderedDict = {}): # pragma: no cover
"""Init a SmoothQuantQuantizer object.
Expand All @@ -61,9 +65,9 @@ def prepare(self, model, example_inputs, inplace=True, *args, **kwargs):
"""Prepares a given model for quantization.
Args:
model: A float model to be quantized.
example_inputs: Used to trace torch model.
inplace: Whether to carry out model transformations in-place. Defaults to True.
model (torch.nn.Module): raw fp32 model or prepared model.
example_inputs (tensor/tuple/dict): used to trace torch model.
inplace (bool, optional): whether to carry out model transformations in-place. Defaults to True.
Returns:
A prepared model.
Expand Down Expand Up @@ -128,9 +132,9 @@ def convert(self, model, example_inputs, inplace=True, *args, **kwargs):
"""Converts a prepared model to a quantized model.
Args:
model: The prepared model to be converted.
example_inputs: Used to trace torch model.
inplace: Whether to carry out model transformations in-place. Defaults to True.
model (QuantizationInterceptionModule): the prepared model to be converted.
example_inputs (tensor/tuple/dict): used to trace torch model.
inplace (bool, optional): whether to carry out model transformations in-place. Defaults to True.
Returns:
A quantized model.
Expand All @@ -153,14 +157,14 @@ def convert(self, model, example_inputs, inplace=True, *args, **kwargs):
return model

def quantize(self, model, tune_cfg, run_fn, example_inputs, inplace=True, *args, **kwargs):
"""Execute the quantize process on the specified model.
"""Executes the quantize process on the specified model.
Args:
model: a float model to be quantized.
tune_cfg: quantization config for ops.
run_fn: a calibration function for calibrating the model.
example_inputs: used to trace torch model.
inplace: whether to carry out model transformations in-place.
model (torch.nn.Module): raw fp32 model or prepared model.
tune_cfg (OrderedDict): quantization config for ops.
run_fn (Callable): a calibration function for calibrating the model.
example_inputs (tensor/tuple/dict): used to trace torch model.
inplace (bool, optional): whether to carry out model transformations in-place. Defaults to True.
Returns:
A quantized model.
Expand Down Expand Up @@ -255,6 +259,22 @@ def quantize(self, model, tune_cfg, run_fn, example_inputs, inplace=True, *args,
def qdq_quantize(
model, tune_cfg, run_fn, example_inputs, inplace, cfgs, op_infos_from_cfgs, output_tensor_id_op_name, sq
):
"""Executes the smooth quantize process.
Args:
model (torch.nn.Module): raw fp32 model or prepared model.
tune_cfg (OrderedDict): quantization config for ops.
run_fn (Callable): a calibration function for calibrating the model.
example_inputs (tensor/tuple/dict): used to trace torch model.
inplace (bool): whether to carry out model transformations in-place. Defaults to True.
cfgs (dict): configs loaded from ipex config path.
op_infos_from_cfgs (dict): dict containing configs that have been parsed for each op.
output_tensor_id_op_name (dict): dict containing op names corresponding to 'op_infos_from_cfgs'.
sq (TorchSmoothQuant): TorchSmoothQuant class containing sq infos.
Returns:
A quantized model.
"""
smoothquant_scale_info = sq.sq_scale_info
sq_minmax_init = True if tune_cfg.get("act_algo", "kl") == "minmax" else False

Expand Down Expand Up @@ -325,6 +345,14 @@ def qdq_quantize(


def _apply_pre_optimization(model, tune_cfg, sq, recover=False):
"""Retrieves sq info to absorb the scale to the layer at output channel.
Args:
model (QuantizationInterceptionModule): a prepared model.
tune_cfg (OrderedDict): quantization config for ops.
sq (TorchSmoothQuant): TorchSmoothQuant class containing sq infos.
recover (bool, optional): whether to recover the scale. Defaults to False.
"""
sq_max_info = {}
if sq.record_max_info:
sq_max_info = sq.max_value_info
Expand Down Expand Up @@ -354,13 +382,13 @@ def _apply_pre_optimization(model, tune_cfg, sq, recover=False):


def _ipex_post_quant_process(model, example_inputs, use_bf16, inplace=False):
"""Convert to a jit model.
"""Converts to a jit model.
Args:
model: a prepared model.
example_inputs: used to trace torch model.
use_bf16: whether to use bf16 for mixed precision.
inplace: whether to carry out model transformations in-place.
model (QuantizationInterceptionModule): a prepared model.
example_inputs (tensor/tuple/dict): used to trace torch model.
use_bf16 (bool): whether to use bf16 for mixed precision.
inplace (bool, optional): whether to carry out model transformations in-place. Defaults to True.
Returns:
A converted jit model.
Expand Down
Loading

0 comments on commit 1ebf698

Please sign in to comment.