openvinotoolkit · l-bat · Jul 4, 2024
@@ -253,23 +253,19 @@ def calculate_integer_quantization_params(
     if weight.dtype != TensorDataType.float32:
         weight = weight.astype(TensorDataType.float32)
 
-    if mode in [CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT4_ASYM]:
-        level_low = 0
-        level_high = 2**num_bits - 1
+    asym_quant = mode in [CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT4_ASYM]
+    level_low = 0 if asym_quant else -(2 ** (num_bits - 1))
+    level_high = 2**num_bits - 1 if asym_quant else 2 ** (num_bits - 1) - 1
+
+    if asym_quant:
         min_values = fns.min(weight, axis=reduction_axes, keepdims=True)  # [a1, r, a2] -> [a1, 1, a2]
         max_values = fns.max(weight, axis=reduction_axes, keepdims=True)  # [a1, r, a2] -> [a1, 1, a2]
-        scale, zero_point = calculate_scale_zero_point(
-            min_values, max_values, level_low, level_high, narrow_range=False
-        )
-        return scale, zero_point
+    else:
+        max_values = fns.max(fns.abs(weight), axis=reduction_axes, keepdims=True)  # [a1, r, a2] -> [a1, 1, a2]
+        min_values = -max_values
 
-    level_high = 2 ** (num_bits - 1) - 1
-    scale = fns.max(fns.abs(weight), axis=reduction_axes, keepdims=True)  # [a1, r//gs, 1, a2]
-    scale /= level_high
-    eps = fns.finfo(scale).eps
-    # NOTE: adding machine epsilon to avoid division by zero
-    scale = fns.where(fns.abs(scale) < eps, eps, scale)
-    return scale, None
+    scale, zero_point = calculate_scale_zero_point(min_values, max_values, level_low, level_high, narrow_range=False)
+    return scale, zero_point if asym_quant else None
 
 
 def calculate_quantized_weight(

@@ -28,6 +28,7 @@
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
 from nncf.quantization.algorithms.weight_compression.mixed_precision import MIXED_PRECISION_CRITERIA
+from nncf.quantization.algorithms.weight_compression.weight_lowering import do_integer_quantization
 from nncf.quantization.algorithms.weight_compression.weight_lowering import get_integer_quantization_error
 from nncf.quantization.algorithms.weight_compression.weight_lowering import reshape_weight_for_grouped_quantization
 from nncf.scopes import IgnoredScope
@@ -912,3 +913,18 @@ def test_mixed_precision_e2m1(mode, all_layers, ratio, ref_ids):
     }
     ref_e8m0_nodes = {f"weights_{i}/scale" for i in ref_ids}
     assert ref_e8m0_nodes == names_e8m0
+
+
+def test_compressed_weighs_range():
+    bits = 4
+    sz = 2 ** (bits - 1)
+    quantized_w = np.arange(-sz, sz).reshape(2, sz).astype(np.float32)
+    w = Tensor(quantized_w / 10.0)
+
+    config = WeightCompressionConfig(mode=CompressWeightsMode.INT4_SYM)
+    compressed_weighs, scale, zp = do_integer_quantization(w, -1, config)
+
+    assert zp is None
+    ref_scale = 2 * np.max(np.abs(quantized_w) / 10.0, axis=1) / (2**bits - 1)
+    assert np.allclose(scale.data, ref_scale.reshape(scale.shape))
+    assert np.allclose(compressed_weighs.data, quantized_w)