diff --git a/nncf/quantization/algorithms/weight_compression/weight_lowering.py b/nncf/quantization/algorithms/weight_compression/weight_lowering.py index ba11ebe2576..d22272ad93a 100644 --- a/nncf/quantization/algorithms/weight_compression/weight_lowering.py +++ b/nncf/quantization/algorithms/weight_compression/weight_lowering.py @@ -253,23 +253,19 @@ def calculate_integer_quantization_params( if weight.dtype != TensorDataType.float32: weight = weight.astype(TensorDataType.float32) - if mode in [CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT4_ASYM]: - level_low = 0 - level_high = 2**num_bits - 1 + asym_quant = mode in [CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT4_ASYM] + level_low = 0 if asym_quant else -(2 ** (num_bits - 1)) + level_high = 2**num_bits - 1 if asym_quant else 2 ** (num_bits - 1) - 1 + + if asym_quant: min_values = fns.min(weight, axis=reduction_axes, keepdims=True) # [a1, r, a2] -> [a1, 1, a2] max_values = fns.max(weight, axis=reduction_axes, keepdims=True) # [a1, r, a2] -> [a1, 1, a2] - scale, zero_point = calculate_scale_zero_point( - min_values, max_values, level_low, level_high, narrow_range=False - ) - return scale, zero_point + else: + max_values = fns.max(fns.abs(weight), axis=reduction_axes, keepdims=True) # [a1, r, a2] -> [a1, 1, a2] + min_values = -max_values - level_high = 2 ** (num_bits - 1) - 1 - scale = fns.max(fns.abs(weight), axis=reduction_axes, keepdims=True) # [a1, r//gs, 1, a2] - scale /= level_high - eps = fns.finfo(scale).eps - # NOTE: adding machine epsilon to avoid division by zero - scale = fns.where(fns.abs(scale) < eps, eps, scale) - return scale, None + scale, zero_point = calculate_scale_zero_point(min_values, max_values, level_low, level_high, narrow_range=False) + return scale, zero_point if asym_quant else None def calculate_quantized_weight( diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py index 0b01674e43c..8b42121d867 100644 --- a/tests/openvino/native/quantization/test_weights_compression.py +++ b/tests/openvino/native/quantization/test_weights_compression.py @@ -28,6 +28,7 @@ from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters from nncf.quantization.algorithms.weight_compression.mixed_precision import MIXED_PRECISION_CRITERIA +from nncf.quantization.algorithms.weight_compression.weight_lowering import do_integer_quantization from nncf.quantization.algorithms.weight_compression.weight_lowering import get_integer_quantization_error from nncf.quantization.algorithms.weight_compression.weight_lowering import reshape_weight_for_grouped_quantization from nncf.scopes import IgnoredScope @@ -912,3 +913,18 @@ def test_mixed_precision_e2m1(mode, all_layers, ratio, ref_ids): } ref_e8m0_nodes = {f"weights_{i}/scale" for i in ref_ids} assert ref_e8m0_nodes == names_e8m0 + + +def test_compressed_weighs_range(): + bits = 4 + sz = 2 ** (bits - 1) + quantized_w = np.arange(-sz, sz).reshape(2, sz).astype(np.float32) + w = Tensor(quantized_w / 10.0) + + config = WeightCompressionConfig(mode=CompressWeightsMode.INT4_SYM) + compressed_weighs, scale, zp = do_integer_quantization(w, -1, config) + + assert zp is None + ref_scale = 2 * np.max(np.abs(quantized_w) / 10.0, axis=1) / (2**bits - 1) + assert np.allclose(scale.data, ref_scale.reshape(scale.shape)) + assert np.allclose(compressed_weighs.data, quantized_w)