add docstring for static quant and smooth quant (#1936)

* add docstring for static quant and smooth quant Signed-off-by: violetch24 <[email protected]> * format fix Signed-off-by: violetch24 <[email protected]> * update scan path Signed-off-by: violetch24 <[email protected]> * Update utility.py --------- Signed-off-by: violetch24 <[email protected]> Co-authored-by: violetch24 <[email protected]>
intel · Jul 19, 2024 · 1ebf698 · 1ebf698
1 parent 296c5d4
commit 1ebf698
Show file tree

Hide file tree

Showing 10 changed files with 783 additions and 175 deletions.
diff --git a/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt b/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt
@@ -15,7 +15,9 @@
 /neural-compressor/neural_compressor/strategy
 /neural-compressor/neural_compressor/training.py
 /neural-compressor/neural_compressor/utils
+/neural-compressor/neural_compressor/torch/algorithms/static_quant
+/neural-compressor/neural_compressor/torch/algorithms/smooth_quant
 /neural_compressor/torch/algorithms/pt2e_quant
 /neural_compressor/torch/export
 /neural_compressor/common
-/neural_compressor/torch/algorithms/weight_only/hqq
+/neural_compressor/torch/algorithms/weight_only/hqq
diff --git a/neural_compressor/torch/algorithms/smooth_quant/__init__.py b/neural_compressor/torch/algorithms/smooth_quant/__init__.py
@@ -12,6 +12,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""The SmoothQuant-related modules."""
+
 
 from .utility import *
 from .smooth_quant import SmoothQuantQuantizer

diff --git a/neural_compressor/torch/algorithms/smooth_quant/save_load.py b/neural_compressor/torch/algorithms/smooth_quant/save_load.py
@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""Save and load the quantized model."""
+
 
 # pylint:disable=import-error
 import torch
@@ -32,7 +34,7 @@ def recover_model_from_json(model, json_file_path, example_inputs):  # pragma: n
         example_inputs (tuple or torch.Tensor or dict): example inputs that will be passed to the ipex function.
 
     Returns:
-        (object): quantized model
+        model (object): quantized model
     """
     from torch.ao.quantization.observer import MinMaxObserver
 

diff --git a/neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py b/neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py
@@ -14,6 +14,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""The quantizer using SmoothQuant path."""
+
 
 import json
 import os
@@ -49,6 +51,8 @@
 
 
 class SmoothQuantQuantizer(Quantizer):
+    """SmoothQuantQuantizer Class."""
+
     def __init__(self, quant_config: OrderedDict = {}):  # pragma: no cover
         """Init a SmoothQuantQuantizer object.
 
@@ -61,9 +65,9 @@ def prepare(self, model, example_inputs, inplace=True, *args, **kwargs):
         """Prepares a given model for quantization.
 
         Args:
-            model: A float model to be quantized.
-            example_inputs: Used to trace torch model.
-            inplace: Whether to carry out model transformations in-place. Defaults to True.
+            model (torch.nn.Module): raw fp32 model or prepared model.
+            example_inputs (tensor/tuple/dict): used to trace torch model.
+            inplace (bool, optional): whether to carry out model transformations in-place. Defaults to True.
 
         Returns:
             A prepared model.
@@ -128,9 +132,9 @@ def convert(self, model, example_inputs, inplace=True, *args, **kwargs):
         """Converts a prepared model to a quantized model.
 
         Args:
-            model: The prepared model to be converted.
-            example_inputs: Used to trace torch model.
-            inplace: Whether to carry out model transformations in-place. Defaults to True.
+            model (QuantizationInterceptionModule): the prepared model to be converted.
+            example_inputs (tensor/tuple/dict): used to trace torch model.
+            inplace (bool, optional): whether to carry out model transformations in-place. Defaults to True.
 
         Returns:
             A quantized model.
@@ -153,14 +157,14 @@ def convert(self, model, example_inputs, inplace=True, *args, **kwargs):
         return model
 
     def quantize(self, model, tune_cfg, run_fn, example_inputs, inplace=True, *args, **kwargs):
-        """Execute the quantize process on the specified model.
+        """Executes the quantize process on the specified model.
 
         Args:
-            model: a float model to be quantized.
-            tune_cfg: quantization config for ops.
-            run_fn: a calibration function for calibrating the model.
-            example_inputs: used to trace torch model.
-            inplace: whether to carry out model transformations in-place.
+            model (torch.nn.Module): raw fp32 model or prepared model.
+            tune_cfg (OrderedDict): quantization config for ops.
+            run_fn (Callable): a calibration function for calibrating the model.
+            example_inputs (tensor/tuple/dict): used to trace torch model.
+            inplace (bool, optional): whether to carry out model transformations in-place. Defaults to True.
 
         Returns:
             A quantized model.
@@ -255,6 +259,22 @@ def quantize(self, model, tune_cfg, run_fn, example_inputs, inplace=True, *args,
 def qdq_quantize(
     model, tune_cfg, run_fn, example_inputs, inplace, cfgs, op_infos_from_cfgs, output_tensor_id_op_name, sq
 ):
+    """Executes the smooth quantize process.
+
+    Args:
+        model (torch.nn.Module): raw fp32 model or prepared model.
+        tune_cfg (OrderedDict): quantization config for ops.
+        run_fn (Callable): a calibration function for calibrating the model.
+        example_inputs (tensor/tuple/dict): used to trace torch model.
+        inplace (bool): whether to carry out model transformations in-place. Defaults to True.
+        cfgs (dict): configs loaded from ipex config path.
+        op_infos_from_cfgs (dict): dict containing configs that have been parsed for each op.
+        output_tensor_id_op_name (dict): dict containing op names corresponding to 'op_infos_from_cfgs'.
+        sq (TorchSmoothQuant): TorchSmoothQuant class containing sq infos.
+
+    Returns:
+        A quantized model.
+    """
     smoothquant_scale_info = sq.sq_scale_info
     sq_minmax_init = True if tune_cfg.get("act_algo", "kl") == "minmax" else False
 
@@ -325,6 +345,14 @@ def qdq_quantize(
 
 
 def _apply_pre_optimization(model, tune_cfg, sq, recover=False):
+    """Retrieves sq info to absorb the scale to the layer at output channel.
+
+    Args:
+        model (QuantizationInterceptionModule): a prepared model.
+        tune_cfg (OrderedDict): quantization config for ops.
+        sq (TorchSmoothQuant): TorchSmoothQuant class containing sq infos.
+        recover (bool, optional): whether to recover the scale. Defaults to False.
+    """
     sq_max_info = {}
     if sq.record_max_info:
         sq_max_info = sq.max_value_info
@@ -354,13 +382,13 @@ def _apply_pre_optimization(model, tune_cfg, sq, recover=False):
 
 
 def _ipex_post_quant_process(model, example_inputs, use_bf16, inplace=False):
-    """Convert to a jit model.
+    """Converts to a jit model.
 
     Args:
-        model: a prepared model.
-        example_inputs: used to trace torch model.
-        use_bf16: whether to use bf16 for mixed precision.
-        inplace: whether to carry out model transformations in-place.
+        model (QuantizationInterceptionModule): a prepared model.
+        example_inputs (tensor/tuple/dict): used to trace torch model.
+        use_bf16 (bool): whether to use bf16 for mixed precision.
+        inplace (bool, optional): whether to carry out model transformations in-place. Defaults to True.
 
     Returns:
         A converted jit model.