intel · yuwenzho · May 9, 2024 · May 9, 2024 · May 9, 2024 · May 9, 2024
diff --git a/neural_compressor/torch/algorithms/weight_only/autoround.py b/neural_compressor/torch/algorithms/weight_only/autoround.py
@@ -26,7 +26,7 @@
 class AutoRoundQuantizer(Quantizer):
     def __init__(
         self,
-        weight_config: dict = {},
+        quant_config: dict = {},
         enable_full_range: bool = False,
         batch_size: int = 8,
         amp: bool = True,
@@ -51,8 +51,8 @@ def __init__(
         """Init a AutQRoundQuantizer object.
 
         Args:
-        weight_config (dict): Configuration for weight quantization (default is an empty dictionary).
-        weight_config={
+        quant_config (dict): Configuration for weight quantization (default is an empty dictionary).
+        quant_config={
                     'layer1':##layer_name
                     {
                         'data_type': 'int',
@@ -89,9 +89,8 @@ def __init__(
         scale_dtype (str): The data type of quantization scale to be used (default is "float32"), different kernels
                             have different choices.
         """
-        super().__init__(weight_config)
+        super().__init__(quant_config)
         self.tokenizer = None
-        self.weight_config = weight_config
         self.enable_full_range = enable_full_range
         self.batch_size = batch_size
         self.amp = amp
@@ -125,7 +124,7 @@ def prepare(self, model: torch.nn.Module, *args, **kwargs):
         self.rounder = AutoRoundProcessor(
             model=model,
             tokenizer=None,
-            weight_config=self.weight_config,
+            weight_config=self.quant_config,
             enable_full_range=self.enable_full_range,
             batch_size=self.batch_size,
             amp=self.amp,

diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py
@@ -30,7 +30,7 @@
     StaticQuantConfig,
     TEQConfig,
 )
-from neural_compressor.torch.utils import Mode, logger, register_algo
+from neural_compressor.torch.utils import Mode, logger, postprocess_model, preprocess_quantizer, register_algo
 
 
 ###################### RTN Algo Entry ##################################
@@ -68,17 +68,9 @@ def rtn_entry(
             "double_quant_group_size": quant_config.double_quant_group_size,
         }
 
-    if getattr(model, "quantizer", False):
-        quantizer = model.quantizer
-    else:
-        quantizer = RTNQuantizer(quant_config=weight_config)
-
+    quantizer = preprocess_quantizer(model, quantizer_cls=RTNQuantizer, quant_config=weight_config)
     model = quantizer.execute(model, mode=mode)
-
-    if getattr(model, "quantizer", False):
-        del model.quantizer
-    else:
-        model.quantizer = quantizer
+    postprocess_model(model, mode, quantizer)
     return model
 
 
@@ -125,15 +117,11 @@ def gptq_entry(
     )
     kwargs.pop("example_inputs")
     logger.warning("lm_head in transformer model is skipped by GPTQ")
-    if getattr(model, "quantizer", False):
-        quantizer = model.quantizer
-    else:
-        quantizer = GPTQuantizer(quant_config=weight_config)
+
+    quantizer = preprocess_quantizer(model, quantizer_cls=GPTQuantizer, quant_config=weight_config)
     model = quantizer.execute(model, mode=mode, *args, **kwargs)
-    if getattr(model, "quantizer", False):
-        del model.quantizer
-    else:
-        model.quantizer = quantizer
+    postprocess_model(model, mode, quantizer)
+
     return model
 
 
@@ -177,17 +165,10 @@ def static_quant_entry(
     inplace = kwargs.get("inplace", True)
     assert example_inputs is not None, "Please provide example_inputs for static quantization."
 
-    if getattr(model, "quantizer", False):
-        quantizer = model.quantizer
-    else:
-        quantizer = StaticQuantQuantizer(quant_config=quant_config_mapping)
-
+    quantizer = preprocess_quantizer(model, quantizer_cls=StaticQuantQuantizer, quant_config=quant_config_mapping)
     model = quantizer.execute(model, mode=mode, run_fn=run_fn, example_inputs=example_inputs, inplace=inplace)
+    postprocess_model(model, mode, quantizer)
 
-    if getattr(model, "quantizer", False):
-        del model.quantizer
-    else:
-        model.quantizer = quantizer
     return model
 
 
@@ -301,11 +282,7 @@ def awq_quantize_entry(
     example_inputs = kwargs.get("example_inputs", None)
     assert example_inputs is not None, "Please provide example_inputs for AWQ quantization."
 
-    if getattr(model, "quantizer", False):
-        quantizer = model.quantizer
-    else:
-        quantizer = AWQQuantizer(quant_config=weight_config)
-
+    quantizer = preprocess_quantizer(model, quantizer_cls=AWQQuantizer, quant_config=weight_config)
     model = quantizer.execute(
         model,
         mode=mode,
@@ -318,11 +295,8 @@ def awq_quantize_entry(
         return_int=return_int,
         use_full_range=use_full_range,
     )
+    postprocess_model(model, mode, quantizer)
 
-    if getattr(model, "quantizer", False):
-        del model.quantizer
-    else:
-        model.quantizer = quantizer
     return model
 
 
@@ -364,10 +338,18 @@ def teq_quantize_entry(
             absorb_to_layer = quant_config.absorb_to_layer
             folding = quant_config.folding
     assert isinstance(model, torch.nn.Module), "only support torch module"
-    quantizer = TEQuantizer(
-        quant_config=weight_config, folding=folding, absorb_to_layer=absorb_to_layer, example_inputs=example_inputs
+
+    quantizer = preprocess_quantizer(
+        model,
+        quantizer_cls=TEQuantizer,
+        quant_config=weight_config,
+        folding=folding,
+        absorb_to_layer=absorb_to_layer,
+        example_inputs=example_inputs,
     )
     model = quantizer.execute(model, mode=mode, run_fn=run_fn, example_inputs=example_inputs, inplace=inplace)
+    postprocess_model(model, mode, quantizer)
+
     return model
 
 
@@ -414,35 +396,33 @@ def autoround_quantize_entry(
             scale_dtype = quant_config.scale_dtype
 
     kwargs.pop("example_inputs")
-    if getattr(model, "quantizer", False):
-        quantizer = model.quantizer
-    else:
-        quantizer = AutoRoundQuantizer(
-            weight_config=weight_config,
-            enable_full_range=enable_full_range,
-            batch_size=batch_size,
-            lr_scheduler=lr_scheduler,
-            use_quant_input=use_quant_input,
-            enable_minmax_tuning=enable_minmax_tuning,
-            lr=lr,
-            minmax_lr=minmax_lr,
-            low_gpu_mem_usage=low_gpu_mem_usage,
-            iters=iters,
-            seqlen=seqlen,
-            n_samples=n_samples,
-            sampler=sampler,
-            seed=seed,
-            n_blocks=n_blocks,
-            gradient_accumulate_steps=gradient_accumulate_steps,
-            not_use_best_mse=not_use_best_mse,
-            dynamic_max_gap=dynamic_max_gap,
-            scale_dtype=scale_dtype,
-        )
+
+    quantizer = preprocess_quantizer(
+        model,
+        quantizer_cls=AutoRoundQuantizer,
+        quant_config=weight_config,
+        enable_full_range=enable_full_range,
+        batch_size=batch_size,
+        lr_scheduler=lr_scheduler,
+        use_quant_input=use_quant_input,
+        enable_minmax_tuning=enable_minmax_tuning,
+        lr=lr,
+        minmax_lr=minmax_lr,
+        low_gpu_mem_usage=low_gpu_mem_usage,
+        iters=iters,
+        seqlen=seqlen,
+        n_samples=n_samples,
+        sampler=sampler,
+        seed=seed,
+        n_blocks=n_blocks,
+        gradient_accumulate_steps=gradient_accumulate_steps,
+        not_use_best_mse=not_use_best_mse,
+        dynamic_max_gap=dynamic_max_gap,
+        scale_dtype=scale_dtype,
+    )
     model = quantizer.execute(model=model, mode=mode, *args, **kwargs)
-    if getattr(model, "quantizer", False):
-        del model.quantizer
-    else:
-        model.quantizer = quantizer
+    postprocess_model(model, mode, quantizer)
+
     logger.info("AutoRound quantization done.")
     return model
 
@@ -460,17 +440,11 @@ def hqq_entry(
     from neural_compressor.torch.algorithms.weight_only.hqq import HQQuantizer
 
     logger.info("Quantize model with the HQQ algorithm.")
-    if getattr(model, "quantizer", False):
-        quantizer = model.quantizer
-    else:
-        quantizer = HQQuantizer(quant_config=configs_mapping)
 
+    quantizer = preprocess_quantizer(model, quantizer_cls=HQQuantizer, quant_config=configs_mapping)
     model = quantizer.execute(model, mode=mode)
+    postprocess_model(model, mode, quantizer)
 
-    if getattr(model, "quantizer", False):
-        del model.quantizer
-    else:
-        model.quantizer = quantizer
     return model
 
 

diff --git a/neural_compressor/torch/utils/utility.py b/neural_compressor/torch/utils/utility.py
@@ -131,3 +131,39 @@ class Mode(Enum):
     PREPARE = "prepare"
     CONVERT = "convert"
     QUANTIZE = "quantize"
+
+
+def preprocess_quantizer(model, quantizer_cls, quant_config=None, *args, **kwargs):
+    """Process quantizer.
+
+    Initialize a quantizer or get `quantizer` attribute from model.
+
+    Args:
+        model (torch.nn.Module): pytorch model.
+        quantizer_cls (Quantizer): quantizer class of a specific algorithm.
+        quant_config (dict, optional): Specifies how to apply the algorithm on the given model.
+            Defaults to None.
+
+    Returns:
+        quantizer object.
+    """
+    if not hasattr(model, "quantizer"):
+        quantizer = quantizer_cls(quant_config=quant_config, *args, **kwargs)
+        return quantizer
+    else:
+        return model.quantizer
+
+
+def postprocess_model(model, mode, quantizer):
+    """Process `quantizer` attribute of model according to current mode.
+
+    Args:
+        model (torch.nn.Module): pytorch model.
+        mode (Mode): The mode of current phase, including 'prepare', 'convert' and 'quantize'.
+        quantizer (Quantizer): quantizer object.
+    """
+    if mode == Mode.PREPARE:
+        model.quantizer = quantizer
+    elif mode == Mode.CONVERT or mode == Mode.QUANTIZE:
+        if getattr(model, "quantizer", False):
+            del model.quantizer
diff --git a/test/3x/torch/quantization/weight_only/test_autoround.py b/test/3x/torch/quantization/weight_only/test_autoround.py
@@ -102,7 +102,7 @@ def test_quantizer(self):
                 "sym": False,
             }
         }
-        quantizer = AutoRoundQuantizer(weight_config=weight_config)
+        quantizer = AutoRoundQuantizer(quant_config=weight_config)
         fp32_model = gpt_j_model
 
         # quantizer execute