From bb51646b66bbd2e1dce39e5c1b08cf39a85555b2 Mon Sep 17 00:00:00 2001 From: Nikolay Lyalyushkin Date: Tue, 5 Nov 2024 14:12:01 +0100 Subject: [PATCH 1/4] Collect statistics from subset in weight compression --- .../weight_compression/algorithm.py | 8 +++--- .../quantization/test_weights_compression.py | 27 ++++++++++++------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/nncf/quantization/algorithms/weight_compression/algorithm.py b/nncf/quantization/algorithms/weight_compression/algorithm.py index 81bb4406f0a..99728f6ef8e 100644 --- a/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -498,7 +498,9 @@ def apply( matmul_nodes_to_compress, graph ) if statistic_points is None: - statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) + statistic_points = self.get_statistic_points( + model, graph, matmul_input_to_output_nodes_map.keys(), self._subset_size + ) statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) statistics = self._get_statistics_for_weights_compression( matmul_input_to_output_nodes_map, statistic_points @@ -759,7 +761,6 @@ def get_statistic_points( model: TModel, graph: NNCFGraph, nodes_and_port_ids: Iterable[Tuple[NNCFNode, int]], - subset_size: Optional[int] = None, ) -> StatisticPointsContainer: """ Returns statistic points, for which StatisticsCollector should collect statistics. @@ -767,7 +768,6 @@ def get_statistic_points( :param model: Model for statistics collection. :param graph: Model graph. :param nodes_and_port_ids: Nodes and port ids for which statistics should be collected. - :param subset_size: Number of samples to collect. :return: Statistic points, for which StatisticsCollector should collect statistics. """ statistic_container = StatisticPointsContainer() @@ -781,7 +781,7 @@ def get_statistic_points( # size dimension. n_dims = len(graph.get_output_edges_by_port_id(node, output_port_id)[0].tensor_shape) stat_collector = self._backend_entity.mean_statistic_collector( - reduction_axes=tuple(range(n_dims - 1)), subset_size=subset_size + reduction_axes=tuple(range(n_dims - 1)), subset_size=self._subset_size ) statistic_container.add_statistic_point( StatisticPoint( diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py index db72b267698..ff52f115124 100644 --- a/tests/openvino/native/quantization/test_weights_compression.py +++ b/tests/openvino/native/quantization/test_weights_compression.py @@ -886,25 +886,34 @@ def test_compression_for_different_dtypes(activation_dtype, weight_dtype): check_compressed_matmul_subgraph(scale_multiply_node, activation_dtype, weight_dtype) -DATASET_SIZE = 129 +DATASET_SIZE = 5 @pytest.mark.parametrize( - ("subset_size", "ref_size"), + ("dataset_size", "subset_size", "ref_size"), ( - (1, 1), - (5, 5), - (130, DATASET_SIZE), + (DATASET_SIZE, 1, 1), + (DATASET_SIZE, DATASET_SIZE, DATASET_SIZE), + (DATASET_SIZE, DATASET_SIZE + 1, DATASET_SIZE), ), ) -def test_valid_subset_size(mocker, subset_size, ref_size): +@pytest.mark.parametrize( + ("compression_args", "multiplier_of_calls"), + ( + (dict(mode=CompressWeightsMode.INT4_ASYM, ratio=1), 0), # data-free, no reducers + (dict(mode=CompressWeightsMode.INT4_ASYM, ratio=0.5), 1), # 1 reducer for mixed precision + (dict(mode=CompressWeightsMode.INT4_ASYM, ratio=1, awq=True), 2), # mean & shape reducer for AWQ + (dict(mode=CompressWeightsMode.INT4_ASYM, ratio=0.5, awq=True), 3), # 2 - for AWQ + 1 - for Mixed Precision + ), +) +def test_data_aware_all_layers(mocker, dataset_size, subset_size, ref_size, compression_args, multiplier_of_calls): model = IdentityMatmul().ov_model - dataset = Dataset([ACTIVATION] * DATASET_SIZE) + dataset = Dataset([ACTIVATION] * dataset_size) stats_spy = mocker.spy(AggregatorBase, "register_reduced_input") - compress_weights(model, mode=CompressWeightsMode.INT4_ASYM, ratio=0.5, dataset=dataset, subset_size=subset_size) + compress_weights(model, dataset=dataset, subset_size=subset_size, **compression_args) - assert stats_spy.call_count == ref_size + assert stats_spy.call_count == ref_size * multiplier_of_calls def test_default_subset_value(): From a9801233a762a155a756764ff5560f507ea27863 Mon Sep 17 00:00:00 2001 From: Nikolay Date: Wed, 6 Nov 2024 14:46:51 +0100 Subject: [PATCH 2/4] Fixed tests --- nncf/quantization/algorithms/weight_compression/algorithm.py | 4 +--- .../openvino/native/quantization/test_weights_compression.py | 4 +++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nncf/quantization/algorithms/weight_compression/algorithm.py b/nncf/quantization/algorithms/weight_compression/algorithm.py index 99728f6ef8e..42d1fa0060e 100644 --- a/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -498,9 +498,7 @@ def apply( matmul_nodes_to_compress, graph ) if statistic_points is None: - statistic_points = self.get_statistic_points( - model, graph, matmul_input_to_output_nodes_map.keys(), self._subset_size - ) + statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) statistics = self._get_statistics_for_weights_compression( matmul_input_to_output_nodes_map, statistic_points diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py index ff52f115124..99a0435ee7b 100644 --- a/tests/openvino/native/quantization/test_weights_compression.py +++ b/tests/openvino/native/quantization/test_weights_compression.py @@ -906,7 +906,9 @@ def test_compression_for_different_dtypes(activation_dtype, weight_dtype): (dict(mode=CompressWeightsMode.INT4_ASYM, ratio=0.5, awq=True), 3), # 2 - for AWQ + 1 - for Mixed Precision ), ) -def test_data_aware_all_layers(mocker, dataset_size, subset_size, ref_size, compression_args, multiplier_of_calls): +def test_number_of_reduced_statistics_for_subset_size( + mocker, dataset_size, subset_size, ref_size, compression_args, multiplier_of_calls +): model = IdentityMatmul().ov_model dataset = Dataset([ACTIVATION] * dataset_size) stats_spy = mocker.spy(AggregatorBase, "register_reduced_input") From 278620bfa3c46b59a8b91f3fa61de0fbec65255f Mon Sep 17 00:00:00 2001 From: Nikolay Date: Wed, 6 Nov 2024 15:41:49 +0100 Subject: [PATCH 3/4] fixed stats caching --- nncf/openvino/quantization/quantize_model.py | 1 - nncf/quantization/statistics_caching.py | 11 ++--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py index cbaf9ffb62d..46db1c50cca 100644 --- a/nncf/openvino/quantization/quantize_model.py +++ b/nncf/openvino/quantization/quantize_model.py @@ -412,7 +412,6 @@ def compress_weights_impl( statistics_aggregator, model, graph, - subset_size, compression_algorithm, matmul_input_to_output_nodes_map, ) diff --git a/nncf/quantization/statistics_caching.py b/nncf/quantization/statistics_caching.py index e806e3cc65d..20da64aebaa 100644 --- a/nncf/quantization/statistics_caching.py +++ b/nncf/quantization/statistics_caching.py @@ -26,7 +26,6 @@ def register_statistics_for_algorithm( aggregator: StatisticsAggregator, model: TModel, graph: NNCFGraph, - subset_size: int, compression_algo: WeightCompression, matmul_input_to_output_nodes_map: Dict[Tuple[NNCFNode, int], List[NNCFNode]], ) -> None: @@ -36,14 +35,11 @@ def register_statistics_for_algorithm( :param aggregator: Aggregator to register statistics. :param model: Model being analyzed. :param graph: Model's computational graph. - :param subset_size: Size of dataset subset for statistics. :param compression_algo: WeightCompression algorithm instance. :param matmul_input_to_output_nodes_map: A dictionary mapping from a tuple of (activation node, port ID) to a list of MatMul nodes that accept the activation as input. """ - statistic_points = compression_algo.get_statistic_points( - model, graph, matmul_input_to_output_nodes_map.keys(), subset_size - ) + statistic_points = compression_algo.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) aggregator.register_statistic_points(statistic_points) @@ -94,15 +90,12 @@ def register_all_statistics( :param aggregator: Aggregator to register statistics. :param model: Model being analyzed. :param graph: Model's computational graph. - :param subset_size: Size of dataset subset for statistics. :param compression_algo: WeightCompression algorithm instance. :param enable_mixed_precision: Whether to enable mixed precision statistics. """ _, matmul_input_to_output_nodes_map = compression_algo.get_compression_nodes_info(graph) - register_statistics_for_algorithm( - aggregator, model, graph, subset_size, compression_algo, matmul_input_to_output_nodes_map - ) + register_statistics_for_algorithm(aggregator, model, graph, compression_algo, matmul_input_to_output_nodes_map) if enable_mixed_precision: _register_mixed_precision(aggregator, model, graph, matmul_input_to_output_nodes_map, subset_size) From 918e1b316662350074c89ef7aba014a40f7ea59e Mon Sep 17 00:00:00 2001 From: Nikolay Date: Thu, 7 Nov 2024 14:42:55 +0100 Subject: [PATCH 4/4] moved subset_size to ctor of mixed_precision_cls --- .../weight_compression/algorithm.py | 4 +- .../weight_compression/mixed_precision.py | 40 +++++++++---------- nncf/quantization/statistics_caching.py | 6 +-- 3 files changed, 23 insertions(+), 27 deletions(-) diff --git a/nncf/quantization/algorithms/weight_compression/algorithm.py b/nncf/quantization/algorithms/weight_compression/algorithm.py index 42d1fa0060e..b47340b674d 100644 --- a/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -256,7 +256,7 @@ def __init__( primary_config = WeightCompressionConfig(mode=self._mode, group_size=self._group_size) criterion_cls = MIXED_PRECISION_CRITERIA.get(self._sensitivity_metric) - self._mixed_precision_algo = criterion_cls(primary_config, self._ratio) + self._mixed_precision_algo = criterion_cls(primary_config, self._ratio, self._subset_size) self._statistics_path = self._advanced_parameters.statistics_path if self._gptq: gptq_params = self._advanced_parameters.gptq_params @@ -789,7 +789,7 @@ def get_statistic_points( # Statistics for mixed precision algorithm if self._data_aware_mixed_precision: mixed_precision_statistics = self._mixed_precision_algo.get_statistic_points( - model, graph, nodes_and_port_ids, self._subset_size + model, graph, nodes_and_port_ids ) for points in mixed_precision_statistics.values(): for point in points: diff --git a/nncf/quantization/algorithms/weight_compression/mixed_precision.py b/nncf/quantization/algorithms/weight_compression/mixed_precision.py index 93c9c8d8b6c..a96c09fcb19 100644 --- a/nncf/quantization/algorithms/weight_compression/mixed_precision.py +++ b/nncf/quantization/algorithms/weight_compression/mixed_precision.py @@ -46,18 +46,16 @@ class MixedPrecisionCriterion(Algorithm): for weights based on some criteria. """ - def __init__( - self, - primary_config: WeightCompressionConfig, - ratio: float, - ): + def __init__(self, primary_config: WeightCompressionConfig, ratio: float, subset_size: Optional[int] = None): """ :param primary_config: Configuration on how to compress (quantize) weights to primary precision. :param ratio: The ratio between primary and backup precisions (e.g. 0.9 means 90% of layers quantized to NF4 and the rest to INT8_ASYM). + :param subset_size: Size of dataset subset for statistics. """ self._primary_config = primary_config self._ratio = ratio + self._subset_size = subset_size self._algorithm_key = f"MPC_{hash(self)}" self._backend_entity = None @@ -117,7 +115,6 @@ def get_statistic_points( model: TModel, graph: NNCFGraph, nodes_and_port_ids: Iterable[Tuple[NNCFNode, int]], - subset_size: Optional[int] = None, ) -> StatisticPointsContainer: """ Returns statistic points, for which StatisticsCollector should collect statistics. @@ -125,7 +122,6 @@ def get_statistic_points( :param model: Model for statistics collection. :param graph: Model graph. :param nodes_and_port_ids: Nodes and port ids for which statistics should be collected. - :param subset_size: Number of samples to collect. :return: Statistic points, for which StatisticsCollector should collect statistics. """ @@ -201,7 +197,6 @@ def get_statistic_points( model: TModel, graph: NNCFGraph, nodes_and_port_ids: Iterable[Tuple[NNCFNode, int]], - subset_size: Optional[int] = None, ) -> StatisticPointsContainer: raise RuntimeError("No statistics collection intended for data-free mixed precision criterion") @@ -262,7 +257,6 @@ def get_statistic_points( model: TModel, graph: NNCFGraph, nodes_and_port_ids: Iterable[Tuple[NNCFNode, int]], - subset_size: Optional[int] = None, ) -> StatisticPointsContainer: self._set_backend_entity(model) @@ -277,7 +271,7 @@ def get_statistic_points( statistic_point = self._backend_entity.target_point( TargetType.POST_LAYER_OPERATION, act_node.node_name, port_id=output_port_id ) - stat_collector = self._get_statistic_collector(subset_size=subset_size) + stat_collector = self._get_statistic_collector() statistic_container.add_statistic_point( StatisticPoint( target_point=statistic_point, tensor_collector=stat_collector, algorithm=self._algorithm_key @@ -287,11 +281,9 @@ def get_statistic_points( return statistic_container @abstractmethod - def _get_statistic_collector(self, subset_size=None): + def _get_statistic_collector(): """ Get statistic collector - - :param subset_size: Number of samples to collect """ def _get_activation_node_and_port(self, node: NNCFNode, nncf_graph: NNCFGraph) -> Tuple[NNCFNode, int]: @@ -367,8 +359,8 @@ def _calc_weight_sensitivity( decompressed_weight = decompressed_weight.reshape(orig_shape) return fns.linalg.norm(decompressed_weight - weight, ord="fro").item() - def _get_statistic_collector(self, subset_size=None): - return self._backend_entity.hawq_statistic_collector(subset_size) + def _get_statistic_collector(self): + return self._backend_entity.hawq_statistic_collector() @MIXED_PRECISION_CRITERIA.register(SensitivityMetric.MEAN_ACTIVATION_VARIANCE) @@ -379,9 +371,11 @@ class MeanVarianceCriterion(DataBasedCriterion): STAT_KEY = SensitivityMetric.MEAN_ACTIVATION_VARIANCE.value - def _get_statistic_collector(self, subset_size=None): + def _get_statistic_collector(self): # Reducing across the second-last dimension, assuming it is the sequence length dimension - return self._backend_entity.mean_variance_statistic_collector(reduction_axes=(-2,), subset_size=subset_size) + return self._backend_entity.mean_variance_statistic_collector( + reduction_axes=(-2,), subset_size=self._subset_size + ) @MIXED_PRECISION_CRITERIA.register(SensitivityMetric.MAX_ACTIVATION_VARIANCE) @@ -392,9 +386,11 @@ class MaxVarianceCriterion(DataBasedCriterion): STAT_KEY = SensitivityMetric.MAX_ACTIVATION_VARIANCE.value - def _get_statistic_collector(self, subset_size=None): + def _get_statistic_collector(self): # Reducing across the second-last dimension, assuming it is the sequence length dimension - return self._backend_entity.max_variance_statistic_collector(reduction_axes=(-2,), subset_size=subset_size) + return self._backend_entity.max_variance_statistic_collector( + reduction_axes=(-2,), subset_size=self._subset_size + ) @MIXED_PRECISION_CRITERIA.register(SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE) @@ -405,6 +401,8 @@ class MeanMaxCriterion(DataBasedCriterion): STAT_KEY = SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE.value - def _get_statistic_collector(self, subset_size=None): + def _get_statistic_collector(self): # Reducing across the second-last dimension, assuming it is the sequence length dimension - return self._backend_entity.mean_abs_max_statistic_collector(reduction_axes=(-2,), subset_size=subset_size) + return self._backend_entity.mean_abs_max_statistic_collector( + reduction_axes=(-2,), subset_size=self._subset_size + ) diff --git a/nncf/quantization/statistics_caching.py b/nncf/quantization/statistics_caching.py index 20da64aebaa..d6253f2fdda 100644 --- a/nncf/quantization/statistics_caching.py +++ b/nncf/quantization/statistics_caching.py @@ -69,10 +69,8 @@ def _register_mixed_precision( for sensitivity in sensitivities: criterion_cls = MIXED_PRECISION_CRITERIA.get(sensitivity) - mixed_prec_algo = criterion_cls(None, None) - statistic_points = mixed_prec_algo.get_statistic_points( - model, graph, matmul_input_to_output_nodes_map.keys(), subset_size - ) + mixed_prec_algo = criterion_cls(None, None, subset_size) + statistic_points = mixed_prec_algo.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) aggregator.register_statistic_points(statistic_points)