intel · chensuyue · Jun 12, 2024 · May 31, 2024 · May 31, 2024 · Jun 7, 2024
diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py
@@ -328,7 +328,6 @@ def awq_quantize_entry(
                 "use_full_range": op_config.use_full_range,
                 "use_mse_search": op_config.use_mse_search,
                 "use_layer_wise": op_config.use_layer_wise,
-                "export_compressed_model": op_config.export_compressed_model,
                 "use_double_quant": op_config.use_double_quant,
                 "double_quant_dtype": op_config.double_quant_dtype,
                 "double_quant_bits": op_config.double_quant_bits,
@@ -338,7 +337,6 @@ def awq_quantize_entry(
             use_auto_scale = op_config.use_auto_scale
             use_mse_search = op_config.use_auto_clip  # for awq clip
             folding = op_config.folding
-            return_int = op_config.export_compressed_model
             use_full_range = op_config.use_full_range
 
     run_fn = kwargs.get("run_fn", None)
@@ -357,7 +355,6 @@ def awq_quantize_entry(
         use_auto_scale=use_auto_scale,
         use_mse_search=use_mse_search,
         folding=folding,
-        return_int=return_int,
         use_full_range=use_full_range,
     )
 

diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py
@@ -387,7 +387,6 @@ class AWQConfig(BaseConfig):
         "use_full_range",
         "use_mse_search",
         "use_layer_wise",
-        "export_compressed_model",
         "use_double_quant",
         "double_quant_dtype",
         "double_quant_bits",
@@ -410,7 +409,6 @@ def __init__(
         use_full_range: bool = False,
         use_mse_search: bool = False,
         use_layer_wise: bool = False,
-        export_compressed_model: bool = False,
         # double quant
         use_double_quant: bool = False,
         double_quant_dtype: str = "int",
@@ -434,7 +432,6 @@ def __init__(
             use_full_range (bool): Enables full range for activations, default is False.
             use_mse_search (bool): Enables mean squared error (MSE) search, default is False.
             use_layer_wise (bool): Enables quantize model per layer. Defaults to False.
-            export_compressed_model (bool): Enables return model in int format or not. Defaults to False.
             use_double_quant (bool): Enables double quantization, default is False.
             double_quant_dtype (str): Data type for double_quant scale, default is "int".
             double_quant_bits (int): Number of bits used to represent double_quant scale, default is 4.
@@ -454,7 +451,6 @@ def __init__(
         self.use_full_range = use_full_range
         self.use_mse_search = use_mse_search
         self.use_layer_wise = use_layer_wise
-        self.export_compressed_model = export_compressed_model
         # double quant
         self.use_double_quant = use_double_quant
         self.double_quant_bits = double_quant_bits