From e538a63110b71c303bcceef265b7acaeed9df5f2 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Mon, 22 Jul 2024 16:18:20 +0800 Subject: [PATCH] update int4 usage Signed-off-by: Kaihui-intel --- neural_compressor/torch/quantization/algorithm_entry.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py index 1493d176484..23ce9f31dac 100644 --- a/neural_compressor/torch/quantization/algorithm_entry.py +++ b/neural_compressor/torch/quantization/algorithm_entry.py @@ -567,9 +567,14 @@ def autoround_quantize_entry( if quant_config.name != AUTOROUND or quant_config.dtype == "fp32": continue else: + dtype = quant_config.dtype + bits = quant_config.bits + if dtype != "int" and "int" in dtype: + bits = int(dtype.lstrip("int")) + dtype = "int" weight_config[op_name] = { - "data_type": quant_config.dtype, - "bits": quant_config.bits, + "data_type": dtype, + "bits": bits, "sym": quant_config.use_sym, "group_size": quant_config.group_size, "act_bits": quant_config.act_bits,