Skip to content

Commit

Permalink
Make SE algorithm also return zero points (#3054)
Browse files Browse the repository at this point in the history
### Changes

1. Made Scale Estimation return zero points besides scales.
2. Minor fixes to `do_int_quantization` function, made its signature
more strict.

### Reason for changes

1. Currently zero points need to be re-computed for the weights for
which SE computed scales.
2. Avoid unnecessary computations inside `do_int_quantization`. Avoid
wrong usage of the function.

### Tests

Added a test to verify correctness of feeding precomputed quantization
parameters to `do_int_quantization`.

### Tickets
Prerequisite to 139047.
  • Loading branch information
nikita-savelyevv authored Nov 6, 2024
1 parent f5ef50a commit 5d2be87
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,7 @@ def apply(
else:
if self._scale_estimation:
scale_estimation_params = self._advanced_parameters.scale_estimation_params
scales = ScaleEstimation(
scales, zero_points = ScaleEstimation(
model,
self._backend_entity.name_to_node_mapping,
all_weight_params,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def apply(
graph: NNCFGraph,
statistic_points: Optional[StatisticPointsContainer] = None,
dataset: Optional[Dataset] = None,
) -> Dict[str, Tensor]:
) -> Tuple[Dict[str, Tensor], Dict[str, Tensor]]:
"""
Estimates better scale for the int4 nodes in the model.
Minimizes per-group difference between floating point MatMul and
Expand All @@ -124,10 +124,10 @@ def apply(
:param graph: Model graph.
:param statistic_points: Statistic points with collected statistics values.
:param dataset: A representative dataset for the calibration process.
:return: Dict with pairs (weight name, estimated scale).
:return: Two dictionaries for estimated scales and zero points for each weight name.
"""

scales = dict()
scales, zero_points = dict(), dict()

for wp in track(self._all_weight_params, description="Applying Scale Estimation"):
weight_name = wp.weight_name
Expand All @@ -147,7 +147,7 @@ def apply(

weight = self._backend_entity.get_weight(wp.node_with_weight, weight_port_id, model, graph)

scales[weight_name], _ = self.calculate_quantization_params(
scales[weight_name], zero_points[weight_name] = self.calculate_quantization_params(
self._backend_entity,
stats,
weight,
Expand All @@ -159,7 +159,7 @@ def apply(
self._weight_penalty,
)

return scales
return scales, zero_points

@staticmethod
def calculate_quantization_params(
Expand Down Expand Up @@ -369,6 +369,8 @@ def calculate_quantization_params(

if config.group_size == -1:
result_scale = fns.squeeze(result_scale, axis=1)
if zp is not None and config.group_size == -1:
zp = fns.squeeze(zp, axis=1)

return result_scale, zp

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,12 @@ def do_int_quantization(
"""
assert config.is_integer(), "The function supports integer quantization only"
group_size = config.group_size
is_asym = config.mode in [CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT4_ASYM]
if is_asym and (precomputed_scale is None) != (precomputed_zero_point is None):
raise ValueError(
"If precomputed quantization parameters are provided, both scale and zero point are required "
"for asymmetric quantization."
)

if weight.dtype != TensorDataType.float32:
weight = weight.astype(TensorDataType.float32)
Expand All @@ -366,7 +372,8 @@ def do_int_quantization(
# weights are reshaped from [a1, r, a2] to [a1, r//gs, gs, a2]
weight, reduction_axes = reshape_weight_for_grouped_quantization(weight, reduction_axes, group_size)

if precomputed_zero_point is None or precomputed_zero_point is None:
scale, zero_point = None, None
if precomputed_scale is None or (is_asym and precomputed_zero_point is None):
scale, zero_point = calculate_integer_quantization_params(weight, reduction_axes, config)
if precomputed_scale is not None:
scale = precomputed_scale
Expand Down
47 changes: 47 additions & 0 deletions tests/openvino/native/quantization/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -1074,6 +1074,53 @@ def test_compressed_weighs_range(mode, data):
assert np.allclose(np.abs(compressed_weighs.data), np.abs(w.data))


@pytest.mark.parametrize(
("config", "precompute_scale", "precompute_zero_point", "raises"),
[
(WeightCompressionConfig(CompressWeightsMode.INT8_ASYM), False, False, False),
(WeightCompressionConfig(CompressWeightsMode.INT8_ASYM), True, True, False),
(WeightCompressionConfig(CompressWeightsMode.INT8_ASYM), True, False, True),
(WeightCompressionConfig(CompressWeightsMode.INT8_ASYM), False, True, True),
(WeightCompressionConfig(CompressWeightsMode.INT4_ASYM), False, False, False),
(WeightCompressionConfig(CompressWeightsMode.INT4_ASYM), True, True, False),
(WeightCompressionConfig(CompressWeightsMode.INT4_ASYM), True, False, True),
(WeightCompressionConfig(CompressWeightsMode.INT4_ASYM), False, True, True),
(WeightCompressionConfig(CompressWeightsMode.INT8_SYM), True, False, False),
(WeightCompressionConfig(CompressWeightsMode.INT8_SYM), False, False, False),
(WeightCompressionConfig(CompressWeightsMode.INT4_SYM), True, False, False),
(WeightCompressionConfig(CompressWeightsMode.INT4_SYM), False, False, False),
],
)
def test_int_quantization_with_precomputed_parameters(config, precompute_scale, precompute_zero_point, raises):
is_asym = config.mode in [CompressWeightsMode.INT4_ASYM, CompressWeightsMode.INT8_ASYM]

precomputed_scale, precomputed_zero_point = None, None
weight = Tensor(((np.arange(11) - 5) / 10).astype(np.float32)[:, None])
if precompute_scale:
precomputed_scale = Tensor(-((np.arange(11) - 5) / 100).astype(np.float32)[:, None])
if precompute_zero_point:
precomputed_zero_point = Tensor(np.arange(11).astype(np.int32)[:, None])

if raises:
with pytest.raises(ValueError) as exc_info:
_, scale, zero_point = do_int_quantization(weight, -1, config, precomputed_scale, precomputed_zero_point)
assert exc_info.value == (
"If precomputed quantization parameters are provided, both scale and zero point "
"are required for asymmetric quantization."
)
return
else:
_, scale, zero_point = do_int_quantization(weight, -1, config, precomputed_scale, precomputed_zero_point)

if precompute_scale:
assert np.allclose(scale.data, precomputed_scale.data)
if is_asym:
if precompute_zero_point:
assert np.allclose(zero_point.data, precomputed_zero_point.data)
else:
assert zero_point is None


@pytest.mark.parametrize("mode", INT4_NF4_MODES)
def test_call_max_var_criterion_with_dataset_gptq_neg_group_size(mode):
model = AWQMatmulModel().ov_model
Expand Down

0 comments on commit 5d2be87

Please sign in to comment.