Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update AutoRound commit version #1941

Merged
merged 8 commits into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .azure-pipelines/scripts/ut/env_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ elif [[ $(echo "${test_case}" | grep -c "tf pruning") != 0 ]]; then
fi

if [[ $(echo "${test_case}" | grep -c "api") != 0 ]] || [[ $(echo "${test_case}" | grep -c "adaptor") != 0 ]]; then
pip install git+https://github.com/intel/auto-round.git@24b2e74070f2b4e6f26ff069ec75af74cf5b177c
pip install git+https://github.com/intel/auto-round.git@e24b9074af6cdb099e31c92eb81b7f5e9a4a244e
fi

# test deps
Expand Down
4 changes: 2 additions & 2 deletions neural_compressor/adaptor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4926,7 +4926,7 @@ def autoround_quantize(self, model, tune_cfg, dataloader):
act_group_size = self.recipes["autoround_args"].get("act_group_size", None)
act_sym = self.recipes["autoround_args"].get("act_sym", None)
act_dynamic = self.recipes["autoround_args"].get("act_dynamic", True)
multimodal = self.recipes["autoround_args"].get("multimodal", False)
quant_block_list = self.recipes["autoround_args"].get("quant_block_list", None)
use_layer_wise = self.recipes["autoround_args"].get("use_layer_wise", False)

if dataloader is not None:
Expand Down Expand Up @@ -4959,7 +4959,7 @@ def autoround_quantize(self, model, tune_cfg, dataloader):
dynamic_max_gap=dynamic_max_gap,
data_type=data_type,
scale_dtype=scale_dtype,
multimodal=multimodal,
quant_block_list=quant_block_list,
act_bits=act_bits,
act_group_size=act_group_size,
act_sym=act_sym,
Expand Down
6 changes: 3 additions & 3 deletions neural_compressor/adaptor/torch_utils/weight_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ def autoround_quantize(
dynamic_max_gap: int = -1,
data_type: str = "int", ##only support int for now
scale_dtype: str = "fp16",
multimodal: bool = False,
quant_block_list: list = None,
act_bits: int = 32,
act_group_size: int = None,
act_sym: bool = None,
Expand Down Expand Up @@ -761,7 +761,7 @@ def autoround_quantize(
data_type (str): The data type to be used (default is "int").
scale_dtype (str): The data type of quantization scale to be used (default is "float32"), different kernels
have different choices.
multimodal(bool): Enable multimodal model quantization, (default is "False").
quant_block_list (list): A list whose elements are list of block's layer names to be quantized.
act_bits (int): Number of bits for activation quantization. Default is 32.
act_group_size (int): Group size for activation quantization. Default is None.
act_sym (bool): Whether to use symmetric activation quantization. Default is None.
Expand Down Expand Up @@ -800,7 +800,7 @@ def autoround_quantize(
dynamic_max_gap=dynamic_max_gap,
data_type=data_type, ## only support data_type
scale_dtype=scale_dtype,
multimodal=multimodal,
quant_block_list=quant_block_list,
act_bits=act_bits,
act_group_size=act_group_size,
act_sym=act_sym,
Expand Down
10 changes: 5 additions & 5 deletions neural_compressor/torch/algorithms/weight_only/autoround.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(
dynamic_max_gap: int = -1,
data_type: str = "int",
scale_dtype: str = "fp16",
multimodal: bool = False,
quant_block_list: list = None,
act_bits: int = 32,
act_group_size: int = None,
act_sym: bool = None,
Expand Down Expand Up @@ -113,8 +113,8 @@ def __init__(
dynamic_max_gap (int): The dynamic maximum gap (default is -1).
data_type (str): The data type to be used (default is "int").
scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
have different choices.
multimodal(bool): Enable multimodal model quantization, (default is "False").
have different choices.
quant_block_list (list): A list whose elements are list of block's layer names to be quantized.
act_bits (int): Number of bits for activation quantization. Default is 32.
act_group_size (int): Group size for activation quantization. Default is None.
act_sym (bool): Whether to use symmetric activation quantization. Default is None.
Expand Down Expand Up @@ -146,7 +146,7 @@ def __init__(
self.dynamic_max_gap = dynamic_max_gap
self.data_type = data_type
self.scale_dtype = scale_dtype
self.multimodal = multimodal
self.quant_block_list = quant_block_list
self.act_bits = act_bits
self.act_group_size = act_group_size
self.act_sym = act_sym
Expand Down Expand Up @@ -202,7 +202,7 @@ def convert(self, model: torch.nn.Module, *args, **kwargs):
dynamic_max_gap=self.dynamic_max_gap,
data_type=self.data_type,
scale_dtype=self.scale_dtype,
multimodal=self.multimodal,
quant_block_list=self.quant_block_list,
act_bits=self.act_bits,
act_group_size=self.act_group_size,
act_sym=self.act_sym,
Expand Down
13 changes: 9 additions & 4 deletions neural_compressor/torch/quantization/algorithm_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,9 +567,14 @@ def autoround_quantize_entry(
if quant_config.name != AUTOROUND or quant_config.dtype == "fp32":
continue
else:
dtype = quant_config.dtype
bits = quant_config.bits
if dtype != "int" and "int" in dtype:
bits = int(dtype.lstrip("int"))
dtype = "int"
weight_config[op_name] = {
"data_type": quant_config.dtype,
"bits": quant_config.bits,
"data_type": dtype,
"bits": bits,
"sym": quant_config.use_sym,
"group_size": quant_config.group_size,
"act_bits": quant_config.act_bits,
Expand All @@ -595,7 +600,7 @@ def autoround_quantize_entry(
not_use_best_mse = quant_config.not_use_best_mse
dynamic_max_gap = quant_config.dynamic_max_gap
scale_dtype = quant_config.scale_dtype
multimodal = quant_config.multimodal
quant_block_list = quant_config.quant_block_list
low_cpu_mem_usage = quant_config.use_layer_wise

kwargs.pop("example_inputs")
Expand All @@ -622,7 +627,7 @@ def autoround_quantize_entry(
not_use_best_mse=not_use_best_mse,
dynamic_max_gap=dynamic_max_gap,
scale_dtype=scale_dtype,
multimodal=multimodal,
quant_block_list=quant_block_list,
low_cpu_mem_usage=low_cpu_mem_usage,
)
model = quantizer.execute(model=model, mode=mode, *args, **kwargs)
Expand Down
6 changes: 3 additions & 3 deletions neural_compressor/torch/quantization/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,7 @@ def __init__(
dynamic_max_gap: int = -1,
scale_dtype: str = "fp16",
use_layer_wise: bool = False,
multimodal: bool = False,
quant_block_list: list = None,
white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST,
):
"""Init AUTOROUND weight-only quantization config.
Expand Down Expand Up @@ -951,7 +951,7 @@ def __init__(
scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
have different choices.
use_layer_wise (bool): Enables quantize model per layer. Defaults to False.
multimodal(bool): Enable multimodal model quantization, (default is "False").
quant_block_list (list): A list whose elements are list of block's layer names to be quantized.
white_list (Optional[List[OP_NAME_OR_MODULE_TYPE]]): White list of operator names or module types.
Default is DEFAULT_WHITE_LIST.
"""
Expand Down Expand Up @@ -983,7 +983,7 @@ def __init__(
self.dynamic_max_gap = dynamic_max_gap
self.scale_dtype = scale_dtype
self.use_layer_wise = use_layer_wise
self.multimodal = multimodal
self.quant_block_list = quant_block_list
self._post_init()

@classmethod
Expand Down
17 changes: 17 additions & 0 deletions test/3x/torch/quantization/weight_only/test_autoround.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,23 @@ def test_autoround(self, quant_lm_head):
if quant_lm_head is True:
assert isinstance(q_model.lm_head, WeightOnlyLinear), "quantization for lm_head failed."

def test_int4_dtype(self):
fp32_model = copy.deepcopy(self.gptj)
quant_config = AutoRoundConfig(dtype="int4", nsamples=32, seqlen=10, iters=10, scale_dtype="fp32")
logger.info(f"Test AutoRound with config {quant_config}")

# prepare + convert API
model = prepare(model=fp32_model, quant_config=quant_config)

run_fn(model, self.dataloader)
q_model = convert(model)
out = q_model(self.inp)[0]
assert torch.allclose(out, self.label, atol=1e-1)
assert "transformer.h.0.attn.k_proj" in q_model.autoround_config.keys()
assert "scale" in q_model.autoround_config["transformer.h.0.attn.k_proj"].keys()
assert torch.float32 == q_model.autoround_config["transformer.h.0.attn.k_proj"]["scale_dtype"]
assert isinstance(q_model.transformer.h[0].attn.k_proj, WeightOnlyLinear), "packing model failed."

def test_autoround_with_quantize_API(self):
gpt_j_model = copy.deepcopy(self.gptj)

Expand Down
2 changes: 1 addition & 1 deletion test/3x/torch/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
auto_round @ git+https://github.com/intel/auto-round.git@24b2e74070f2b4e6f26ff069ec75af74cf5b177c
auto_round @ git+https://github.com/intel/auto-round.git@e24b9074af6cdb099e31c92eb81b7f5e9a4a244e
expecttest
intel_extension_for_pytorch
numpy
Expand Down
2 changes: 1 addition & 1 deletion test/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
--find-links https://download.pytorch.org/whl/torch_stable.html
accelerate==0.21.0
auto-round @ git+https://github.com/intel/auto-round.git@24b2e74070f2b4e6f26ff069ec75af74cf5b177c
auto-round @ git+https://github.com/intel/auto-round.git@e24b9074af6cdb099e31c92eb81b7f5e9a4a244e
dynast==1.6.0rc1
horovod
intel-extension-for-pytorch
Expand Down
Loading