From eda805252a3f9353e20fd2f38fa462095638573e Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Fri, 31 May 2024 15:52:41 +0800 Subject: [PATCH 1/2] Remove export_compressed_model in AWQConfig --- neural_compressor/torch/quantization/config.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py index 430e3a07983..99f717e5ac2 100644 --- a/neural_compressor/torch/quantization/config.py +++ b/neural_compressor/torch/quantization/config.py @@ -387,7 +387,6 @@ class AWQConfig(BaseConfig): "use_full_range", "use_mse_search", "use_layer_wise", - "export_compressed_model", "use_double_quant", "double_quant_dtype", "double_quant_bits", @@ -410,7 +409,6 @@ def __init__( use_full_range: bool = False, use_mse_search: bool = False, use_layer_wise: bool = False, - export_compressed_model: bool = False, # double quant use_double_quant: bool = False, double_quant_dtype: str = "int", @@ -434,7 +432,6 @@ def __init__( use_full_range (bool): Enables full range for activations, default is False. use_mse_search (bool): Enables mean squared error (MSE) search, default is False. use_layer_wise (bool): Enables quantize model per layer. Defaults to False. - export_compressed_model (bool): Enables return model in int format or not. Defaults to False. use_double_quant (bool): Enables double quantization, default is False. double_quant_dtype (str): Data type for double_quant scale, default is "int". double_quant_bits (int): Number of bits used to represent double_quant scale, default is 4. @@ -454,7 +451,6 @@ def __init__( self.use_full_range = use_full_range self.use_mse_search = use_mse_search self.use_layer_wise = use_layer_wise - self.export_compressed_model = export_compressed_model # double quant self.use_double_quant = use_double_quant self.double_quant_bits = double_quant_bits From d1130b7056893dd1eaef9f5db837dca19cb86740 Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Fri, 31 May 2024 16:23:53 +0800 Subject: [PATCH 2/2] Update algorithm_entry.py --- neural_compressor/torch/quantization/algorithm_entry.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py index 59e42729555..1dd48b7f5f5 100644 --- a/neural_compressor/torch/quantization/algorithm_entry.py +++ b/neural_compressor/torch/quantization/algorithm_entry.py @@ -328,7 +328,6 @@ def awq_quantize_entry( "use_full_range": op_config.use_full_range, "use_mse_search": op_config.use_mse_search, "use_layer_wise": op_config.use_layer_wise, - "export_compressed_model": op_config.export_compressed_model, "use_double_quant": op_config.use_double_quant, "double_quant_dtype": op_config.double_quant_dtype, "double_quant_bits": op_config.double_quant_bits, @@ -338,7 +337,6 @@ def awq_quantize_entry( use_auto_scale = op_config.use_auto_scale use_mse_search = op_config.use_auto_clip # for awq clip folding = op_config.folding - return_int = op_config.export_compressed_model use_full_range = op_config.use_full_range run_fn = kwargs.get("run_fn", None) @@ -357,7 +355,6 @@ def awq_quantize_entry( use_auto_scale=use_auto_scale, use_mse_search=use_mse_search, folding=folding, - return_int=return_int, use_full_range=use_full_range, )