From bb51646b66bbd2e1dce39e5c1b08cf39a85555b2 Mon Sep 17 00:00:00 2001
From: Nikolay Lyalyushkin <nikolay.lyalyushkin@intel.com>
Date: Tue, 5 Nov 2024 14:12:01 +0100
Subject: [PATCH 1/4] Collect statistics from subset in weight compression

---
 .../weight_compression/algorithm.py           |  8 +++---
 .../quantization/test_weights_compression.py  | 27 ++++++++++++-------
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/nncf/quantization/algorithms/weight_compression/algorithm.py b/nncf/quantization/algorithms/weight_compression/algorithm.py
index 81bb4406f0a..99728f6ef8e 100644
--- a/nncf/quantization/algorithms/weight_compression/algorithm.py
+++ b/nncf/quantization/algorithms/weight_compression/algorithm.py
@@ -498,7 +498,9 @@ def apply(
                 matmul_nodes_to_compress, graph
             )
             if statistic_points is None:
-                statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys())
+                statistic_points = self.get_statistic_points(
+                    model, graph, matmul_input_to_output_nodes_map.keys(), self._subset_size
+                )
                 statistic_points = self._collect_statistics(dataset, graph, model, statistic_points)
             statistics = self._get_statistics_for_weights_compression(
                 matmul_input_to_output_nodes_map, statistic_points
@@ -759,7 +761,6 @@ def get_statistic_points(
         model: TModel,
         graph: NNCFGraph,
         nodes_and_port_ids: Iterable[Tuple[NNCFNode, int]],
-        subset_size: Optional[int] = None,
     ) -> StatisticPointsContainer:
         """
         Returns statistic points, for which StatisticsCollector should collect statistics.
@@ -767,7 +768,6 @@ def get_statistic_points(
         :param model: Model for statistics collection.
         :param graph: Model graph.
         :param nodes_and_port_ids: Nodes and port ids for which statistics should be collected.
-        :param subset_size: Number of samples to collect.
         :return: Statistic points, for which StatisticsCollector should collect statistics.
         """
         statistic_container = StatisticPointsContainer()
@@ -781,7 +781,7 @@ def get_statistic_points(
                 # size dimension.
                 n_dims = len(graph.get_output_edges_by_port_id(node, output_port_id)[0].tensor_shape)
                 stat_collector = self._backend_entity.mean_statistic_collector(
-                    reduction_axes=tuple(range(n_dims - 1)), subset_size=subset_size
+                    reduction_axes=tuple(range(n_dims - 1)), subset_size=self._subset_size
                 )
                 statistic_container.add_statistic_point(
                     StatisticPoint(
diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py
index db72b267698..ff52f115124 100644
--- a/tests/openvino/native/quantization/test_weights_compression.py
+++ b/tests/openvino/native/quantization/test_weights_compression.py
@@ -886,25 +886,34 @@ def test_compression_for_different_dtypes(activation_dtype, weight_dtype):
     check_compressed_matmul_subgraph(scale_multiply_node, activation_dtype, weight_dtype)
 
 
-DATASET_SIZE = 129
+DATASET_SIZE = 5
 
 
 @pytest.mark.parametrize(
-    ("subset_size", "ref_size"),
+    ("dataset_size", "subset_size", "ref_size"),
     (
-        (1, 1),
-        (5, 5),
-        (130, DATASET_SIZE),
+        (DATASET_SIZE, 1, 1),
+        (DATASET_SIZE, DATASET_SIZE, DATASET_SIZE),
+        (DATASET_SIZE, DATASET_SIZE + 1, DATASET_SIZE),
     ),
 )
-def test_valid_subset_size(mocker, subset_size, ref_size):
+@pytest.mark.parametrize(
+    ("compression_args", "multiplier_of_calls"),
+    (
+        (dict(mode=CompressWeightsMode.INT4_ASYM, ratio=1), 0),  # data-free, no reducers
+        (dict(mode=CompressWeightsMode.INT4_ASYM, ratio=0.5), 1),  # 1 reducer for mixed precision
+        (dict(mode=CompressWeightsMode.INT4_ASYM, ratio=1, awq=True), 2),  # mean & shape reducer for AWQ
+        (dict(mode=CompressWeightsMode.INT4_ASYM, ratio=0.5, awq=True), 3),  # 2 - for AWQ + 1 - for Mixed Precision
+    ),
+)
+def test_data_aware_all_layers(mocker, dataset_size, subset_size, ref_size, compression_args, multiplier_of_calls):
     model = IdentityMatmul().ov_model
-    dataset = Dataset([ACTIVATION] * DATASET_SIZE)
+    dataset = Dataset([ACTIVATION] * dataset_size)
     stats_spy = mocker.spy(AggregatorBase, "register_reduced_input")
 
-    compress_weights(model, mode=CompressWeightsMode.INT4_ASYM, ratio=0.5, dataset=dataset, subset_size=subset_size)
+    compress_weights(model, dataset=dataset, subset_size=subset_size, **compression_args)
 
-    assert stats_spy.call_count == ref_size
+    assert stats_spy.call_count == ref_size * multiplier_of_calls
 
 
 def test_default_subset_value():

From a9801233a762a155a756764ff5560f507ea27863 Mon Sep 17 00:00:00 2001
From: Nikolay <nikolay.lyalyushkin@intel.com>
Date: Wed, 6 Nov 2024 14:46:51 +0100
Subject: [PATCH 2/4] Fixed tests

---
 nncf/quantization/algorithms/weight_compression/algorithm.py  | 4 +---
 .../openvino/native/quantization/test_weights_compression.py  | 4 +++-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/nncf/quantization/algorithms/weight_compression/algorithm.py b/nncf/quantization/algorithms/weight_compression/algorithm.py
index 99728f6ef8e..42d1fa0060e 100644
--- a/nncf/quantization/algorithms/weight_compression/algorithm.py
+++ b/nncf/quantization/algorithms/weight_compression/algorithm.py
@@ -498,9 +498,7 @@ def apply(
                 matmul_nodes_to_compress, graph
             )
             if statistic_points is None:
-                statistic_points = self.get_statistic_points(
-                    model, graph, matmul_input_to_output_nodes_map.keys(), self._subset_size
-                )
+                statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys())
                 statistic_points = self._collect_statistics(dataset, graph, model, statistic_points)
             statistics = self._get_statistics_for_weights_compression(
                 matmul_input_to_output_nodes_map, statistic_points
diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py
index ff52f115124..99a0435ee7b 100644
--- a/tests/openvino/native/quantization/test_weights_compression.py
+++ b/tests/openvino/native/quantization/test_weights_compression.py
@@ -906,7 +906,9 @@ def test_compression_for_different_dtypes(activation_dtype, weight_dtype):
         (dict(mode=CompressWeightsMode.INT4_ASYM, ratio=0.5, awq=True), 3),  # 2 - for AWQ + 1 - for Mixed Precision
     ),
 )
-def test_data_aware_all_layers(mocker, dataset_size, subset_size, ref_size, compression_args, multiplier_of_calls):
+def test_number_of_reduced_statistics_for_subset_size(
+    mocker, dataset_size, subset_size, ref_size, compression_args, multiplier_of_calls
+):
     model = IdentityMatmul().ov_model
     dataset = Dataset([ACTIVATION] * dataset_size)
     stats_spy = mocker.spy(AggregatorBase, "register_reduced_input")

From 278620bfa3c46b59a8b91f3fa61de0fbec65255f Mon Sep 17 00:00:00 2001
From: Nikolay <nikolay.lyalyushkin@intel.com>
Date: Wed, 6 Nov 2024 15:41:49 +0100
Subject: [PATCH 3/4] fixed stats caching

---
 nncf/openvino/quantization/quantize_model.py |  1 -
 nncf/quantization/statistics_caching.py      | 11 ++---------
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py
index cbaf9ffb62d..46db1c50cca 100644
--- a/nncf/openvino/quantization/quantize_model.py
+++ b/nncf/openvino/quantization/quantize_model.py
@@ -412,7 +412,6 @@ def compress_weights_impl(
             statistics_aggregator,
             model,
             graph,
-            subset_size,
             compression_algorithm,
             matmul_input_to_output_nodes_map,
         )
diff --git a/nncf/quantization/statistics_caching.py b/nncf/quantization/statistics_caching.py
index e806e3cc65d..20da64aebaa 100644
--- a/nncf/quantization/statistics_caching.py
+++ b/nncf/quantization/statistics_caching.py
@@ -26,7 +26,6 @@ def register_statistics_for_algorithm(
     aggregator: StatisticsAggregator,
     model: TModel,
     graph: NNCFGraph,
-    subset_size: int,
     compression_algo: WeightCompression,
     matmul_input_to_output_nodes_map: Dict[Tuple[NNCFNode, int], List[NNCFNode]],
 ) -> None:
@@ -36,14 +35,11 @@ def register_statistics_for_algorithm(
     :param aggregator: Aggregator to register statistics.
     :param model: Model being analyzed.
     :param graph: Model's computational graph.
-    :param subset_size: Size of dataset subset for statistics.
     :param compression_algo: WeightCompression algorithm instance.
     :param matmul_input_to_output_nodes_map: A dictionary mapping from a tuple of (activation node, port ID)
     to a list of MatMul nodes that accept the activation as input.
     """
-    statistic_points = compression_algo.get_statistic_points(
-        model, graph, matmul_input_to_output_nodes_map.keys(), subset_size
-    )
+    statistic_points = compression_algo.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys())
     aggregator.register_statistic_points(statistic_points)
 
 
@@ -94,15 +90,12 @@ def register_all_statistics(
     :param aggregator: Aggregator to register statistics.
     :param model: Model being analyzed.
     :param graph: Model's computational graph.
-    :param subset_size: Size of dataset subset for statistics.
     :param compression_algo: WeightCompression algorithm instance.
     :param enable_mixed_precision: Whether to enable mixed precision statistics.
     """
     _, matmul_input_to_output_nodes_map = compression_algo.get_compression_nodes_info(graph)
 
-    register_statistics_for_algorithm(
-        aggregator, model, graph, subset_size, compression_algo, matmul_input_to_output_nodes_map
-    )
+    register_statistics_for_algorithm(aggregator, model, graph, compression_algo, matmul_input_to_output_nodes_map)
 
     if enable_mixed_precision:
         _register_mixed_precision(aggregator, model, graph, matmul_input_to_output_nodes_map, subset_size)

From 918e1b316662350074c89ef7aba014a40f7ea59e Mon Sep 17 00:00:00 2001
From: Nikolay <nikolay.lyalyushkin@intel.com>
Date: Thu, 7 Nov 2024 14:42:55 +0100
Subject: [PATCH 4/4] moved subset_size to ctor of mixed_precision_cls

---
 .../weight_compression/algorithm.py           |  4 +-
 .../weight_compression/mixed_precision.py     | 40 +++++++++----------
 nncf/quantization/statistics_caching.py       |  6 +--
 3 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/nncf/quantization/algorithms/weight_compression/algorithm.py b/nncf/quantization/algorithms/weight_compression/algorithm.py
index 42d1fa0060e..b47340b674d 100644
--- a/nncf/quantization/algorithms/weight_compression/algorithm.py
+++ b/nncf/quantization/algorithms/weight_compression/algorithm.py
@@ -256,7 +256,7 @@ def __init__(
 
         primary_config = WeightCompressionConfig(mode=self._mode, group_size=self._group_size)
         criterion_cls = MIXED_PRECISION_CRITERIA.get(self._sensitivity_metric)
-        self._mixed_precision_algo = criterion_cls(primary_config, self._ratio)
+        self._mixed_precision_algo = criterion_cls(primary_config, self._ratio, self._subset_size)
         self._statistics_path = self._advanced_parameters.statistics_path
         if self._gptq:
             gptq_params = self._advanced_parameters.gptq_params
@@ -789,7 +789,7 @@ def get_statistic_points(
         # Statistics for mixed precision algorithm
         if self._data_aware_mixed_precision:
             mixed_precision_statistics = self._mixed_precision_algo.get_statistic_points(
-                model, graph, nodes_and_port_ids, self._subset_size
+                model, graph, nodes_and_port_ids
             )
             for points in mixed_precision_statistics.values():
                 for point in points:
diff --git a/nncf/quantization/algorithms/weight_compression/mixed_precision.py b/nncf/quantization/algorithms/weight_compression/mixed_precision.py
index 93c9c8d8b6c..a96c09fcb19 100644
--- a/nncf/quantization/algorithms/weight_compression/mixed_precision.py
+++ b/nncf/quantization/algorithms/weight_compression/mixed_precision.py
@@ -46,18 +46,16 @@ class MixedPrecisionCriterion(Algorithm):
     for weights based on some criteria.
     """
 
-    def __init__(
-        self,
-        primary_config: WeightCompressionConfig,
-        ratio: float,
-    ):
+    def __init__(self, primary_config: WeightCompressionConfig, ratio: float, subset_size: Optional[int] = None):
         """
         :param primary_config: Configuration on how to compress (quantize) weights to primary precision.
         :param ratio: The ratio between primary and backup precisions (e.g. 0.9 means 90% of layers quantized to NF4
             and the rest to INT8_ASYM).
+        :param subset_size: Size of dataset subset for statistics.
         """
         self._primary_config = primary_config
         self._ratio = ratio
+        self._subset_size = subset_size
         self._algorithm_key = f"MPC_{hash(self)}"
         self._backend_entity = None
 
@@ -117,7 +115,6 @@ def get_statistic_points(
         model: TModel,
         graph: NNCFGraph,
         nodes_and_port_ids: Iterable[Tuple[NNCFNode, int]],
-        subset_size: Optional[int] = None,
     ) -> StatisticPointsContainer:
         """
         Returns statistic points, for which StatisticsCollector should collect statistics.
@@ -125,7 +122,6 @@ def get_statistic_points(
         :param model: Model for statistics collection.
         :param graph: Model graph.
         :param nodes_and_port_ids: Nodes and port ids for which statistics should be collected.
-        :param subset_size: Number of samples to collect.
         :return: Statistic points, for which StatisticsCollector should collect statistics.
         """
 
@@ -201,7 +197,6 @@ def get_statistic_points(
         model: TModel,
         graph: NNCFGraph,
         nodes_and_port_ids: Iterable[Tuple[NNCFNode, int]],
-        subset_size: Optional[int] = None,
     ) -> StatisticPointsContainer:
         raise RuntimeError("No statistics collection intended for data-free mixed precision criterion")
 
@@ -262,7 +257,6 @@ def get_statistic_points(
         model: TModel,
         graph: NNCFGraph,
         nodes_and_port_ids: Iterable[Tuple[NNCFNode, int]],
-        subset_size: Optional[int] = None,
     ) -> StatisticPointsContainer:
         self._set_backend_entity(model)
 
@@ -277,7 +271,7 @@ def get_statistic_points(
             statistic_point = self._backend_entity.target_point(
                 TargetType.POST_LAYER_OPERATION, act_node.node_name, port_id=output_port_id
             )
-            stat_collector = self._get_statistic_collector(subset_size=subset_size)
+            stat_collector = self._get_statistic_collector()
             statistic_container.add_statistic_point(
                 StatisticPoint(
                     target_point=statistic_point, tensor_collector=stat_collector, algorithm=self._algorithm_key
@@ -287,11 +281,9 @@ def get_statistic_points(
         return statistic_container
 
     @abstractmethod
-    def _get_statistic_collector(self, subset_size=None):
+    def _get_statistic_collector():
         """
         Get statistic collector
-
-        :param subset_size: Number of samples to collect
         """
 
     def _get_activation_node_and_port(self, node: NNCFNode, nncf_graph: NNCFGraph) -> Tuple[NNCFNode, int]:
@@ -367,8 +359,8 @@ def _calc_weight_sensitivity(
         decompressed_weight = decompressed_weight.reshape(orig_shape)
         return fns.linalg.norm(decompressed_weight - weight, ord="fro").item()
 
-    def _get_statistic_collector(self, subset_size=None):
-        return self._backend_entity.hawq_statistic_collector(subset_size)
+    def _get_statistic_collector(self):
+        return self._backend_entity.hawq_statistic_collector()
 
 
 @MIXED_PRECISION_CRITERIA.register(SensitivityMetric.MEAN_ACTIVATION_VARIANCE)
@@ -379,9 +371,11 @@ class MeanVarianceCriterion(DataBasedCriterion):
 
     STAT_KEY = SensitivityMetric.MEAN_ACTIVATION_VARIANCE.value
 
-    def _get_statistic_collector(self, subset_size=None):
+    def _get_statistic_collector(self):
         # Reducing across the second-last dimension, assuming it is the sequence length dimension
-        return self._backend_entity.mean_variance_statistic_collector(reduction_axes=(-2,), subset_size=subset_size)
+        return self._backend_entity.mean_variance_statistic_collector(
+            reduction_axes=(-2,), subset_size=self._subset_size
+        )
 
 
 @MIXED_PRECISION_CRITERIA.register(SensitivityMetric.MAX_ACTIVATION_VARIANCE)
@@ -392,9 +386,11 @@ class MaxVarianceCriterion(DataBasedCriterion):
 
     STAT_KEY = SensitivityMetric.MAX_ACTIVATION_VARIANCE.value
 
-    def _get_statistic_collector(self, subset_size=None):
+    def _get_statistic_collector(self):
         # Reducing across the second-last dimension, assuming it is the sequence length dimension
-        return self._backend_entity.max_variance_statistic_collector(reduction_axes=(-2,), subset_size=subset_size)
+        return self._backend_entity.max_variance_statistic_collector(
+            reduction_axes=(-2,), subset_size=self._subset_size
+        )
 
 
 @MIXED_PRECISION_CRITERIA.register(SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE)
@@ -405,6 +401,8 @@ class MeanMaxCriterion(DataBasedCriterion):
 
     STAT_KEY = SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE.value
 
-    def _get_statistic_collector(self, subset_size=None):
+    def _get_statistic_collector(self):
         # Reducing across the second-last dimension, assuming it is the sequence length dimension
-        return self._backend_entity.mean_abs_max_statistic_collector(reduction_axes=(-2,), subset_size=subset_size)
+        return self._backend_entity.mean_abs_max_statistic_collector(
+            reduction_axes=(-2,), subset_size=self._subset_size
+        )
diff --git a/nncf/quantization/statistics_caching.py b/nncf/quantization/statistics_caching.py
index 20da64aebaa..d6253f2fdda 100644
--- a/nncf/quantization/statistics_caching.py
+++ b/nncf/quantization/statistics_caching.py
@@ -69,10 +69,8 @@ def _register_mixed_precision(
 
     for sensitivity in sensitivities:
         criterion_cls = MIXED_PRECISION_CRITERIA.get(sensitivity)
-        mixed_prec_algo = criterion_cls(None, None)
-        statistic_points = mixed_prec_algo.get_statistic_points(
-            model, graph, matmul_input_to_output_nodes_map.keys(), subset_size
-        )
+        mixed_prec_algo = criterion_cls(None, None, subset_size)
+        statistic_points = mixed_prec_algo.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys())
         aggregator.register_statistic_points(statistic_points)