From 1ce23c54108db4a6fa1a1dc0d32c4bdd54e03638 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 13 Oct 2023 12:22:15 +0200 Subject: [PATCH 001/108] draft --- nncf/common/tensor_statistics/aggregator.py | 28 +++++++++++++++------ nncf/data/dataset.py | 6 +++++ nncf/onnx/statistics/aggregator.py | 4 +++ nncf/openvino/statistics/aggregator.py | 4 +++ nncf/torch/statistics/aggregator.py | 6 ++++- 5 files changed, 39 insertions(+), 9 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 6c4925ad458..68a01575b03 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -54,19 +54,26 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: engine = factory.EngineFactory.create(model_with_outputs) dataset_length = self.dataset.get_length() + batch_size = self.dataset.get_batch_size() + collected_statistics_num = 0 + if batch_size is not None and dataset_length is not None: + dataset_length *= batch_size total = ( min(dataset_length or self.stat_subset_size, self.stat_subset_size) if self.stat_subset_size is not None else None ) - for input_data in track( - islice(self.dataset.get_inference_data(), self.stat_subset_size), - total=total, - description="Statistics collection", - ): - outputs = engine.infer(input_data) - processed_outputs = self._process_outputs(outputs) - self._register_statistics(processed_outputs, merged_statistics) + with track(total=total, description="Statistics collection") as pbar: + for input_data in islice(self.dataset.get_inference_data(), self.stat_subset_size): + batch_size_to_collect = min(total - collected_statistics_num, batch_size) + sliced_iput = self._get_sliced_data(input_data, batch_size_to_collect) + outputs = engine.infer(sliced_iput) + processed_outputs = self._process_outputs(outputs) + self._register_statistics(processed_outputs, merged_statistics) + collected_statistics_num += batch_size_to_collect + pbar.progress.update(pbar.task, advance=batch_size_to_collect) + if collected_statistics_num == total: + break def register_statistic_points(self, statistic_points: StatisticPointsContainer) -> None: """ @@ -134,3 +141,8 @@ def _process_outputs(outputs: Any) -> Dict[str, NNCFTensor]: :param outputs: raw model outputs :return: processed model outputs in Dict[str, NNCFTensor] format """ + + @staticmethod + @abstractmethod + def _get_sliced_data(inputs: Any, end: int) -> Any: + """ """ diff --git a/nncf/data/dataset.py b/nncf/data/dataset.py index d89fb6fc84c..d4eec574dc1 100644 --- a/nncf/data/dataset.py +++ b/nncf/data/dataset.py @@ -81,6 +81,12 @@ def get_length(self) -> Optional[int]: return self._data_source.__len__() return None + def get_batch_size(self) -> Optional[int]: + """ """ + if hasattr(self._data_source, "batch_size"): + return self._data_source.batch_size + return None + class DataProvider(Generic[DataItem, ModelInput]): def __init__( diff --git a/nncf/onnx/statistics/aggregator.py b/nncf/onnx/statistics/aggregator.py index a768a855258..0c9f2dc76a5 100644 --- a/nncf/onnx/statistics/aggregator.py +++ b/nncf/onnx/statistics/aggregator.py @@ -80,3 +80,7 @@ def _get_merged_statistic_points( @staticmethod def _process_outputs(outputs: Dict[str, np.ndarray]) -> Dict[str, ONNXNNCFTensor]: return {n: ONNXNNCFTensor(v) for n, v in outputs.items()} + + @staticmethod + def _get_sliced_data(inputs: Dict[str, np.ndarray], end: int) -> Dict[str, ONNXNNCFTensor]: + return inputs diff --git a/nncf/openvino/statistics/aggregator.py b/nncf/openvino/statistics/aggregator.py index 7fd5e26c72d..7a2f2ddb2a2 100644 --- a/nncf/openvino/statistics/aggregator.py +++ b/nncf/openvino/statistics/aggregator.py @@ -111,3 +111,7 @@ def _get_merged_statistic_points( @staticmethod def _process_outputs(outputs: Dict[str, np.ndarray]) -> Dict[str, OVNNCFTensor]: return {n: OVNNCFTensor(v) for n, v in outputs.items()} + + @staticmethod + def _get_sliced_data(inputs: Dict[str, np.ndarray], end: int) -> Dict[str, OVNNCFTensor]: + return inputs diff --git a/nncf/torch/statistics/aggregator.py b/nncf/torch/statistics/aggregator.py index 41fdc20c4fa..79fa76e0430 100644 --- a/nncf/torch/statistics/aggregator.py +++ b/nncf/torch/statistics/aggregator.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict +from typing import Any, Dict import numpy as np import torch @@ -70,3 +70,7 @@ def _get_merged_statistic_points( @staticmethod def _process_outputs(outputs: Dict[str, np.ndarray]) -> Dict[str, PTNNCFTensor]: return outputs + + @staticmethod + def _get_sliced_data(inputs: Any, end: int) -> Any: + return inputs[:end] From 8184df67515443d59e88fc0bea85d7c1a5cb94f0 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 13 Oct 2023 13:14:18 +0200 Subject: [PATCH 002/108] check on Nones --- nncf/common/tensor_statistics/aggregator.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 68a01575b03..a40dcde9994 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -45,7 +45,7 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: """ if not self.statistic_points: return - + collected_statistics_num = 0 model_transformer = factory.ModelTransformerFactory.create(model) merged_statistics = self._get_merged_statistic_points(self.statistic_points, model, graph) @@ -53,11 +53,10 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: model_with_outputs = model_transformer.transform(transformation_layout) engine = factory.EngineFactory.create(model_with_outputs) - dataset_length = self.dataset.get_length() batch_size = self.dataset.get_batch_size() - collected_statistics_num = 0 - if batch_size is not None and dataset_length is not None: - dataset_length *= batch_size + batch_size = 1 if batch_size is None else batch_size + dataset_length = self.dataset.get_length() + dataset_length = dataset_length * batch_size if dataset_length is not None else dataset_length total = ( min(dataset_length or self.stat_subset_size, self.stat_subset_size) if self.stat_subset_size is not None @@ -65,14 +64,16 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: ) with track(total=total, description="Statistics collection") as pbar: for input_data in islice(self.dataset.get_inference_data(), self.stat_subset_size): - batch_size_to_collect = min(total - collected_statistics_num, batch_size) + batch_size_to_collect = ( + min(total - collected_statistics_num, batch_size) if total is not None else batch_size + ) sliced_iput = self._get_sliced_data(input_data, batch_size_to_collect) outputs = engine.infer(sliced_iput) processed_outputs = self._process_outputs(outputs) self._register_statistics(processed_outputs, merged_statistics) collected_statistics_num += batch_size_to_collect pbar.progress.update(pbar.task, advance=batch_size_to_collect) - if collected_statistics_num == total: + if total and collected_statistics_num == total: break def register_statistic_points(self, statistic_points: StatisticPointsContainer) -> None: From 2e3f5073e2b4d03867deb57a043f3af52453f5d1 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 18 Oct 2023 16:21:06 +0200 Subject: [PATCH 003/108] update aggregator with keep_dims=True --- .../common/tensor_statistics/collectors.py | 61 ++++----------- nncf/openvino/statistics/collectors.py | 7 +- .../bias_correction/openvino_backend.py | 3 +- .../fast_bias_correction/openvino_backend.py | 3 +- .../fast_bias_correction/torch_backend.py | 3 +- .../algorithms/min_max/openvino_backend.py | 78 ++++++++++--------- nncf/torch/tensor_statistics/collectors.py | 11 +-- 7 files changed, 62 insertions(+), 104 deletions(-) diff --git a/nncf/experimental/common/tensor_statistics/collectors.py b/nncf/experimental/common/tensor_statistics/collectors.py index 68cd4442c0a..5d64c84eb47 100644 --- a/nncf/experimental/common/tensor_statistics/collectors.py +++ b/nncf/experimental/common/tensor_statistics/collectors.py @@ -12,7 +12,6 @@ from abc import ABC from abc import abstractmethod from collections import defaultdict -from collections import deque from typing import Any, Dict, List, Optional, Set, Tuple, Type, TypeVar, Union from nncf.common.tensor import TensorType @@ -129,16 +128,14 @@ def __init__( """ :param tensor_processor: Backend-specific tensor processor. :param aggregation_axes: Axes along which to operate. - Registered statistics are stacked along zero axis, - axes >=1 correspond to recieved statistic axes shifted left by 1. :param num_samples: Maximum number of samples to collect. Aggregator skips tensor registration if tensor registration was called num_samples times before. Aggregator never skips registration if num_samples is None. """ self._tensor_processor = tensor_processor - self._aggregation_axes = (0,) if aggregation_axes is None else aggregation_axes - self._keepdims = False + self._aggregation_axes = (0,) if aggregation_axes is None else (0, *map(lambda x: x + 1, aggregation_axes)) + self._keepdims = True self._num_samples = num_samples self._collected_samples = 0 self._container = [] @@ -594,20 +591,7 @@ def _aggregate_impl(self): return self._container.shape -class TensorAggregatorBase(AggregatorBase, ABC): - def __init__( - self, - tensor_processor: NNCFCollectorTensorProcessor, - aggregation_axes: Optional[AggregationAxes] = None, - num_samples: Optional[int] = None, - window_size=None, - ): - super().__init__(tensor_processor, aggregation_axes=aggregation_axes, num_samples=num_samples) - self._window_size = window_size - self._container = deque(maxlen=window_size) - - -class OnlineAggregatorBase(TensorAggregatorBase, ABC): +class OnlineAggregatorBase(AggregatorBase, ABC): """ Base class for aggregators which are using aggregation function fn with following property: fn([x1, x2, x3]) == fn([fn([x1, x2]), x3]) where x1, x2, x3 are samples to aggregate. @@ -616,26 +600,14 @@ class OnlineAggregatorBase(TensorAggregatorBase, ABC): """ def _register_reduced_input_impl(self, x: NNCFTensor) -> None: - online_aggregation_axes = tuple(dim - 1 for dim in self._aggregation_axes if dim != 0) - if online_aggregation_axes: - reduced = self._aggregation_fn(x, axis=online_aggregation_axes, keepdims=self._keepdims) - else: - reduced = x - if 0 in self._aggregation_axes: - if self._container: - reduced = self._aggregation_fn( - self._tensor_processor.stack([reduced, self._container]), axis=0, keepdims=False - ) - self._container = reduced - else: - self._container.append(reduced) + stacked_tensors = self._tensor_processor.stack([x, *self._container]) + aggregated = self._aggregation_fn(stacked_tensors, axis=self._aggregation_axes, keepdims=self._keepdims) + squeezed = self._tensor_processor.squeeze(aggregated, 0) + self._container = [squeezed] def _aggregate_impl(self) -> NNCFTensor: - if 0 in self._aggregation_axes: - if self._keepdims: - return self._tensor_processor.stack([self._container]).tensor - return self._container.tensor - return self._tensor_processor.stack(self._container).tensor + assert len(self._container) == 1 + return self._container[0].tensor @abstractmethod def _aggregation_fn(self, stacked_value: NNCFTensor, axis: AggregationAxes, keepdims: bool) -> NNCFTensor: @@ -652,7 +624,7 @@ def _aggregation_fn(self, stacked_value: NNCFTensor, axis: AggregationAxes, keep return self._tensor_processor.reduce_max(stacked_value, axis=axis, keepdims=keepdims) -class OfflineAggregatorBase(TensorAggregatorBase, ABC): +class OfflineAggregatorBase(AggregatorBase, ABC): """ Base class for aggregators which are using aggregation function fn which does not fulfill property fn([x1, x2, x3]) == fn([fn([x1, x2]), x3]) @@ -665,7 +637,8 @@ def _register_reduced_input_impl(self, x: TensorType) -> None: def _aggregate_impl(self) -> NNCFTensor: stacked_val = self._tensor_processor.stack(self._container) - return self._aggregation_fn(stacked_val, axis=self._aggregation_axes, keepdims=self._keepdims).tensor + aggregated = self._aggregation_fn(stacked_val, axis=self._aggregation_axes, keepdims=self._keepdims) + return self._tensor_processor.squeeze(aggregated, 0).tensor @abstractmethod def _aggregation_fn(self, stacked_value: NNCFTensor, axis: AggregationAxes, keepdims: bool) -> NNCFTensor: @@ -688,12 +661,9 @@ def __init__( tensor_processor: NNCFCollectorTensorProcessor, aggregation_axes: Optional[AggregationAxes] = None, num_samples: Optional[int] = None, - window_size=None, quantile: float = 0.01, ): super().__init__(tensor_processor, aggregation_axes=aggregation_axes, num_samples=num_samples) - self._window_size = window_size - self._container = deque(maxlen=window_size) self._quantile = quantile def _aggregate_impl(self) -> NNCFTensor: @@ -734,7 +704,7 @@ def _aggregation_fn( return self._tensor_processor.masked_median(stacked_samples, axis=axis, mask=mask, keepdims=keepdims) -class MedianAbsoluteDeviationAggregator(TensorAggregatorBase): +class MedianAbsoluteDeviationAggregator(AggregatorBase): def _register_reduced_input_impl(self, x: TensorType) -> None: return self._container.append(x) @@ -759,19 +729,16 @@ def _aggregate_impl(self) -> Dict[str, NNCFTensor]: } -class PercentileAggregator(TensorAggregatorBase): +class PercentileAggregator(AggregatorBase): def __init__( self, tensor_processor: NNCFCollectorTensorProcessor, percentiles_to_collect: List[float], aggregation_axes: Optional[AggregationAxes] = None, num_samples: Optional[int] = None, - window_size=None, ): super().__init__(tensor_processor, aggregation_axes=aggregation_axes, num_samples=num_samples) self._percentiles_to_collect = percentiles_to_collect - self._window_size = window_size - self._container = deque(maxlen=window_size) def _register_reduced_input_impl(self, x: TensorType) -> None: return self._container.append(x) diff --git a/nncf/openvino/statistics/collectors.py b/nncf/openvino/statistics/collectors.py index 4672541d86b..d75ce4376d9 100644 --- a/nncf/openvino/statistics/collectors.py +++ b/nncf/openvino/statistics/collectors.py @@ -271,16 +271,12 @@ def get_output_names(self, target_node_name: str, port_id: int) -> List[str]: return get_reducer_output_node_names(self.name, target_node_name, port_id, self.output_port_id, self.inplace) -def get_mean_statistic_collector( - num_samples: int, channel_axis: int, window_size: Optional[int] = None, inplace: bool = True -) -> TensorCollector: +def get_mean_statistic_collector(num_samples: int, channel_axis: int, inplace: bool = True) -> TensorCollector: """ Mean statistic collector builder. :param num_samples: Maximum number of samples to collect. :param channel_axis: Channel axis to use during reduction phase. - :param window_size: Number of samples from the end of the list of collected samples to aggregate. - Aggregates all available collected statistics in case parameter is None. :param inplace: Whether the mean reducer should be calculated inplace or out of place. :return: Mean statistic collector. """ @@ -296,7 +292,6 @@ def get_mean_statistic_collector( kwargs = { "tensor_processor": OVNNCFCollectorTensorProcessor, "num_samples": num_samples, - "window_size": window_size, } aggregate_mean = MeanAggregator(**kwargs) aggregate_shape = ShapeAggregator() diff --git a/nncf/quantization/algorithms/bias_correction/openvino_backend.py b/nncf/quantization/algorithms/bias_correction/openvino_backend.py index 7af72dec173..26ebe67df5d 100644 --- a/nncf/quantization/algorithms/bias_correction/openvino_backend.py +++ b/nncf/quantization/algorithms/bias_correction/openvino_backend.py @@ -67,9 +67,8 @@ def mean_statistic_collector( channel_axis: int, inplace: bool, num_samples: Optional[int] = None, - window_size: Optional[int] = None, ) -> TensorCollector: - return get_mean_statistic_collector(num_samples, channel_axis, window_size, inplace) + return get_mean_statistic_collector(num_samples, channel_axis, inplace) @staticmethod def raw_statistic_collector(inplace: bool, num_samples: int = None) -> TensorCollector: diff --git a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py index d2744da5864..9aff7277d30 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py @@ -53,9 +53,8 @@ def mean_statistic_collector( channel_axis: int, inplace: bool, num_samples: Optional[int] = None, - window_size: Optional[int] = None, ) -> TensorCollector: - return get_mean_statistic_collector(num_samples, channel_axis, window_size, inplace) + return get_mean_statistic_collector(num_samples, channel_axis, inplace) @staticmethod def get_sub_input_output_names(subgraph: ov.Model) -> Tuple[str, str]: diff --git a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py index 193be8994d9..d89659a6549 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py @@ -65,9 +65,8 @@ def mean_statistic_collector( channel_axis: int, inplace: bool, num_samples: Optional[int] = None, - window_size: Optional[int] = None, ) -> TensorCollector: - return get_mean_statistic_collector(num_samples, channel_axis, window_size) + return get_mean_statistic_collector(num_samples, channel_axis) @staticmethod def get_sub_input_output_names(subgraph: NNCFNetwork) -> Tuple[str, str]: diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py index 5412d42853d..0b75a82fac9 100644 --- a/nncf/quantization/algorithms/min_max/openvino_backend.py +++ b/nncf/quantization/algorithms/min_max/openvino_backend.py @@ -122,36 +122,45 @@ def unify_statistics(statistics: List[OVMinMaxTensorStatistic]) -> OVMinMaxTenso return OVMinMaxTensorStatistic(min_values=min_values, max_values=max_values) @staticmethod - def _get_reduction_axes_and_use_abs_max( + def _get_activation_shape(target_point, nncf_graph, node): + if target_point.type == TargetType.PRE_LAYER_OPERATION: + return nncf_graph.get_input_edges(node)[target_point.port_id].tensor_shape + elif target_point.type == TargetType.POST_LAYER_OPERATION: + return nncf_graph.get_output_edges(node)[target_point.port_id].tensor_shape + else: + raise NotImplementedError(f"Unsupported target point type {target_point.type}.") + + @staticmethod + def _get_batch_axis(): + return 0 # TODO (?) + + @staticmethod + def _get_reduction_axes( nncf_graph: NNCFGraph, target_point: OVTargetPoint, quantizer_config: QuantizerConfig ) -> Tuple[ReductionAxes, bool]: - use_abs_max = quantizer_config.mode == QuantizationMode.SYMMETRIC - if not quantizer_config.per_channel: - return None, use_abs_max - node = nncf_graph.get_node_by_name(target_point.target_node_name) - if not target_point.is_weight_target_point(): - if target_point.type == TargetType.PRE_LAYER_OPERATION: - shape = nncf_graph.get_input_edges(node)[target_point.port_id].tensor_shape - elif target_point.type == TargetType.POST_LAYER_OPERATION: - shape = nncf_graph.get_output_edges(node)[target_point.port_id].tensor_shape + if target_point.is_weight_target_point(): + assert isinstance(node.layer_attributes, OVLayerAttributes) + shape = node.layer_attributes.constant_attributes[target_point.port_id]["shape"] + if quantizer_config.per_channel: + channel_axes = get_weight_channel_axes(node, target_point.port_id) + reduction_axes = get_channel_agnostic_reduction_axes(channel_axes, shape) else: - raise NotImplementedError(f"Unsupported target point type {target_point.type}.") - - # TODO (l-bat): Disable quantizer propagation through layout changing operations - channel_axis = 1 # OpenVINO activations have channel first layout: [N, C, Z, Y, X] - axes = get_channel_agnostic_reduction_axes([channel_axis], shape) - return axes, use_abs_max - - assert isinstance(node.layer_attributes, OVLayerAttributes) - const_shape = node.layer_attributes.constant_attributes[target_point.port_id]["shape"] + reduction_axes = tuple(range(len(shape))) + return reduction_axes + shape = OVMinMaxAlgoBackend._get_activation_shape(target_point, nncf_graph, node) if quantizer_config.per_channel: - channel_axes = get_weight_channel_axes(node, target_point.port_id) - axes = get_channel_agnostic_reduction_axes(channel_axes, const_shape) + # TODO (l-bat): Disable quantizer propagation through layout changing operations + axis = 1 # OpenVINO activations have channel first layout: [N, C, Z, Y, X] else: - axes = tuple(range(len(const_shape))) - return axes, use_abs_max + axis = OVMinMaxAlgoBackend._get_batch_axis() + reduction_axes = get_channel_agnostic_reduction_axes([axis], shape) + return reduction_axes + + @staticmethod + def _get_aggregation_axes(target_point: OVTargetPoint): + return None if target_point.is_weight_target_point() else (0,) @staticmethod def get_statistic_collector( @@ -162,25 +171,15 @@ def get_statistic_collector( inplace: bool, num_samples: int = None, ) -> TensorCollector: - reduction_axes, use_abs_max = OVMinMaxAlgoBackend._get_reduction_axes_and_use_abs_max( - nncf_graph, target_point, quantizer_config - ) + use_abs_max = quantizer_config.mode == QuantizationMode.SYMMETRIC + reduction_axes = OVMinMaxAlgoBackend._get_reduction_axes(nncf_graph, target_point, quantizer_config) + aggregation_axes = OVMinMaxAlgoBackend._get_aggregation_axes(target_point) collector = TensorCollector(OVMinMaxTensorStatistic) for params, container_key in zip( [range_estimator_params.min, range_estimator_params.max], [OVMinMaxTensorStatistic.MIN_STAT, OVMinMaxTensorStatistic.MAX_STAT], ): - if not params.statistics_type in OV_REDUCERS_MAP: - raise RuntimeError( - f"Statistic type: {params.statistics_type} is not supported for OpenVino PTQ backend yet." - ) - - if not params.aggregator_type in AGGREGATORS_MAP: - raise RuntimeError( - f"Aggregator type: {params.aggregator_type} is not supported for OpenVino PTQ backend yet." - ) - kwargs = {"reduction_axes": reduction_axes, "inplace": inplace} if params.statistics_type in [StatisticsType.QUANTILE, StatisticsType.ABS_QUANTILE]: if container_key == OVMinMaxTensorStatistic.MIN_STAT: @@ -194,8 +193,11 @@ def get_statistic_collector( statistic_type = StatisticsType.ABS_MAX reducer = OV_REDUCERS_MAP[statistic_type](**kwargs) - kwargs = {"num_samples": num_samples, "tensor_processor": OVNNCFCollectorTensorProcessor} - aggregator = AGGREGATORS_MAP[params.aggregator_type](**kwargs) + aggregator = AGGREGATORS_MAP[params.aggregator_type]( + num_samples=num_samples, + aggregation_axes=aggregation_axes, + tensor_processor=OVNNCFCollectorTensorProcessor, + ) collector.register_statistic_branch(container_key, reducer, aggregator) return collector diff --git a/nncf/torch/tensor_statistics/collectors.py b/nncf/torch/tensor_statistics/collectors.py index 4089fb77f2e..2bab689dcc3 100644 --- a/nncf/torch/tensor_statistics/collectors.py +++ b/nncf/torch/tensor_statistics/collectors.py @@ -20,6 +20,7 @@ from nncf.common.tensor_statistics.collectors import NNCFTensor from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer from nncf.experimental.common.tensor_statistics.collectors import AbsQuantileReducer +from nncf.experimental.common.tensor_statistics.collectors import AggregatorBase from nncf.experimental.common.tensor_statistics.collectors import BatchMeanReducer from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator from nncf.experimental.common.tensor_statistics.collectors import MaxReducer @@ -33,7 +34,6 @@ from nncf.experimental.common.tensor_statistics.collectors import PercentileAggregator from nncf.experimental.common.tensor_statistics.collectors import QuantileReducer from nncf.experimental.common.tensor_statistics.collectors import ShapeAggregator -from nncf.experimental.common.tensor_statistics.collectors import TensorAggregatorBase from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.quantization.advanced_parameters import StatisticsType from nncf.torch.tensor import PTNNCFTensor @@ -425,8 +425,8 @@ def get_percentile_tensor_collector( def _get_collection_without_reduction( - aggregator_cls: TensorAggregatorBase, - statistic_cls: TensorAggregatorBase, + aggregator_cls: AggregatorBase, + statistic_cls: AggregatorBase, reduction_axes: Tuple[int, ...], aggregation_axes: Tuple[int, ...], num_samples: int, @@ -496,9 +496,7 @@ def get_mean_percentile_statistic_collector( return tensor_collector -def get_mean_statistic_collector( - num_samples: int, channel_axis: int, window_size: Optional[int] = None -) -> TensorCollector: +def get_mean_statistic_collector(num_samples: int, channel_axis: int) -> TensorCollector: """ Mean statistic collector builder. @@ -517,7 +515,6 @@ def get_mean_statistic_collector( kwargs = { "tensor_processor": PTNNCFCollectorTensorProcessor, "num_samples": num_samples, - "window_size": window_size, } aggregate_mean = MeanAggregator(**kwargs) aggregate_shape = ShapeAggregator() From b5d15cde1c1232c5fff94b408738f035575401ee Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 18 Oct 2023 16:25:09 +0200 Subject: [PATCH 004/108] typhints --- .../algorithms/min_max/openvino_backend.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py index 0b75a82fac9..b91c8928a2d 100644 --- a/nncf/quantization/algorithms/min_max/openvino_backend.py +++ b/nncf/quantization/algorithms/min_max/openvino_backend.py @@ -122,7 +122,7 @@ def unify_statistics(statistics: List[OVMinMaxTensorStatistic]) -> OVMinMaxTenso return OVMinMaxTensorStatistic(min_values=min_values, max_values=max_values) @staticmethod - def _get_activation_shape(target_point, nncf_graph, node): + def _get_activation_shape(target_point: OVTargetPoint, nncf_graph: NNCFGraph, node: NNCFNode) -> List[int]: if target_point.type == TargetType.PRE_LAYER_OPERATION: return nncf_graph.get_input_edges(node)[target_point.port_id].tensor_shape elif target_point.type == TargetType.POST_LAYER_OPERATION: @@ -131,13 +131,17 @@ def _get_activation_shape(target_point, nncf_graph, node): raise NotImplementedError(f"Unsupported target point type {target_point.type}.") @staticmethod - def _get_batch_axis(): + def _get_batch_axis() -> int: return 0 # TODO (?) + @staticmethod + def _get_aggregation_axes(target_point: OVTargetPoint): + return None if target_point.is_weight_target_point() else (0,) + @staticmethod def _get_reduction_axes( nncf_graph: NNCFGraph, target_point: OVTargetPoint, quantizer_config: QuantizerConfig - ) -> Tuple[ReductionAxes, bool]: + ) -> ReductionAxes: node = nncf_graph.get_node_by_name(target_point.target_node_name) if target_point.is_weight_target_point(): assert isinstance(node.layer_attributes, OVLayerAttributes) @@ -158,10 +162,6 @@ def _get_reduction_axes( reduction_axes = get_channel_agnostic_reduction_axes([axis], shape) return reduction_axes - @staticmethod - def _get_aggregation_axes(target_point: OVTargetPoint): - return None if target_point.is_weight_target_point() else (0,) - @staticmethod def get_statistic_collector( range_estimator_params: RangeEstimatorParameters, From 1034acdbac34ea8488b666e5b9b4edecc18ab2a5 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 19 Oct 2023 15:36:06 +0200 Subject: [PATCH 005/108] fix OV tests; update collectors --- .../common/tensor_statistics/collectors.py | 76 ++++++++++----- .../test_reducers_and_aggregators.py | 92 +++++-------------- .../quantization/test_quantizer_config.py | 14 ++- 3 files changed, 89 insertions(+), 93 deletions(-) diff --git a/nncf/experimental/common/tensor_statistics/collectors.py b/nncf/experimental/common/tensor_statistics/collectors.py index 5d64c84eb47..87de1179cc0 100644 --- a/nncf/experimental/common/tensor_statistics/collectors.py +++ b/nncf/experimental/common/tensor_statistics/collectors.py @@ -134,7 +134,8 @@ def __init__( """ self._tensor_processor = tensor_processor - self._aggregation_axes = (0,) if aggregation_axes is None else (0, *map(lambda x: x + 1, aggregation_axes)) + self._tensor_aggregation_axes = tuple(aggregation_axes) if aggregation_axes is not None else aggregation_axes + self._stacked_tensor_aggregation_axis = 0 self._keepdims = True self._num_samples = num_samples self._collected_samples = 0 @@ -600,9 +601,15 @@ class OnlineAggregatorBase(AggregatorBase, ABC): """ def _register_reduced_input_impl(self, x: NNCFTensor) -> None: - stacked_tensors = self._tensor_processor.stack([x, *self._container]) - aggregated = self._aggregation_fn(stacked_tensors, axis=self._aggregation_axes, keepdims=self._keepdims) - squeezed = self._tensor_processor.squeeze(aggregated, 0) + if self._tensor_aggregation_axes is not None: + x = self._aggregation_fn(x, axis=self._tensor_aggregation_axes, keepdims=self._keepdims) + stacked_tensors = self._tensor_processor.stack( + [x, *self._container], axis=self._stacked_tensor_aggregation_axis + ) + aggregated_tensors = self._aggregation_fn( + stacked_tensors, axis=self._stacked_tensor_aggregation_axis, keepdims=self._keepdims + ) + squeezed = self._tensor_processor.squeeze(aggregated_tensors, self._stacked_tensor_aggregation_axis) self._container = [squeezed] def _aggregate_impl(self) -> NNCFTensor: @@ -636,9 +643,12 @@ def _register_reduced_input_impl(self, x: TensorType) -> None: self._container.append(x) def _aggregate_impl(self) -> NNCFTensor: - stacked_val = self._tensor_processor.stack(self._container) - aggregated = self._aggregation_fn(stacked_val, axis=self._aggregation_axes, keepdims=self._keepdims) - return self._tensor_processor.squeeze(aggregated, 0).tensor + stacked_val = self._tensor_processor.stack(self._container, axis=self._stacked_tensor_aggregation_axis) + aggregation_axes = get_stacked_tensor_all_aggregation_axes( + self._tensor_aggregation_axes, self._stacked_tensor_aggregation_axis + ) + aggregated = self._aggregation_fn(stacked_val, axis=aggregation_axes, keepdims=self._keepdims) + return self._tensor_processor.squeeze(aggregated, self._stacked_tensor_aggregation_axis).tensor @abstractmethod def _aggregation_fn(self, stacked_value: NNCFTensor, axis: AggregationAxes, keepdims: bool) -> NNCFTensor: @@ -667,15 +677,22 @@ def __init__( self._quantile = quantile def _aggregate_impl(self) -> NNCFTensor: - stacked_samples = self._tensor_processor.stack(self._container) + stacked_samples = self._tensor_processor.stack(self._container, axis=self._stacked_tensor_aggregation_axis) + aggregation_axes = get_stacked_tensor_all_aggregation_axes( + self._tensor_aggregation_axes, self._stacked_tensor_aggregation_axis + ) low_values, high_values = self._tensor_processor.quantile( - stacked_samples, quantile=(self._quantile, 1 - self._quantile), axis=self._aggregation_axes + stacked_samples, quantile=(self._quantile, 1 - self._quantile), axis=aggregation_axes ) tp = self._tensor_processor outliers_mask = tp.logical_or(tp.less(stacked_samples, low_values), tp.less(high_values, stacked_samples)) - return self._aggregation_fn( - stacked_samples=stacked_samples, mask=outliers_mask, axis=self._aggregation_axes, keepdims=self._keepdims - ).tensor + aggregated = self._aggregation_fn( + stacked_samples=stacked_samples, + mask=outliers_mask, + axis=aggregation_axes, + keepdims=self._keepdims, + ) + return self._tensor_processor.squeeze(aggregated, self._stacked_tensor_aggregation_axis).tensor @abstractmethod def _aggregation_fn( @@ -710,22 +727,24 @@ def _register_reduced_input_impl(self, x: TensorType) -> None: def _aggregate_impl(self) -> Dict[str, NNCFTensor]: stacked_val = self._tensor_processor.stack(self._container) - + aggregation_axes = get_stacked_tensor_all_aggregation_axes( + self._tensor_aggregation_axes, self._stacked_tensor_aggregation_axis + ) mask = self._tensor_processor.zero_elements(stacked_val) median_per_ch = self._tensor_processor.masked_median( - stacked_val, mask=mask, axis=self._aggregation_axes, keepdims=True + stacked_val, mask=mask, axis=aggregation_axes, keepdims=True ) mad_values = self._tensor_processor.median( self._tensor_processor.abs(self._tensor_processor.sub(stacked_val, median_per_ch)), - axis=self._aggregation_axes, + axis=aggregation_axes, keepdims=self._keepdims, ) - if not self._keepdims: - median_per_ch = self._tensor_processor.squeeze(median_per_ch, self._aggregation_axes) + squeezed_mad_values = self._tensor_processor.squeeze(mad_values, self._stacked_tensor_aggregation_axis) + squeezed_median_per_ch = self._tensor_processor.squeeze(median_per_ch, self._stacked_tensor_aggregation_axis) return { - MedianMADTensorStatistic.MEDIAN_VALUES_STAT: median_per_ch.tensor, - MedianMADTensorStatistic.MAD_VALUES_STAT: mad_values.tensor, + MedianMADTensorStatistic.MEDIAN_VALUES_STAT: squeezed_median_per_ch.tensor, + MedianMADTensorStatistic.MAD_VALUES_STAT: squeezed_mad_values.tensor, } @@ -745,13 +764,17 @@ def _register_reduced_input_impl(self, x: TensorType) -> None: def _aggregate_impl(self) -> Dict[float, NNCFTensor]: stacked_val = self._tensor_processor.stack(self._container) - + aggregation_axes = get_stacked_tensor_all_aggregation_axes( + self._tensor_aggregation_axes, self._stacked_tensor_aggregation_axis + ) percentiles = self._tensor_processor.percentile( - stacked_val, self._percentiles_to_collect, axis=self._aggregation_axes, keepdims=self._keepdims + stacked_val, self._percentiles_to_collect, axis=aggregation_axes, keepdims=self._keepdims ) retval = {} for idx, percentile in enumerate(self._percentiles_to_collect): - retval[percentile] = percentiles[idx].tensor + retval[percentile] = self._tensor_processor.squeeze( + percentiles[idx], self._stacked_tensor_aggregation_axis + ).tensor return retval @@ -763,3 +786,12 @@ def _aggregate_impl(self) -> Dict[float, NNCFTensor]: AggregatorType.MEDIAN: MedianAggregator, AggregatorType.MEDIAN_NO_OUTLIERS: MedianNoOutliersAggregator, } + + +def get_stacked_tensor_all_aggregation_axes(_tensor_aggregation_axes, _stacked_tensor_aggregation_axis): + f = lambda x: x + 1 + if _tensor_aggregation_axes is not None: + aggregation_axes = (_stacked_tensor_aggregation_axis, *map(f, _tensor_aggregation_axes)) + else: + aggregation_axes = _stacked_tensor_aggregation_axis + return aggregation_axes diff --git a/tests/common/experimental/test_reducers_and_aggregators.py b/tests/common/experimental/test_reducers_and_aggregators.py index e307fb73f84..dfabfdea101 100644 --- a/tests/common/experimental/test_reducers_and_aggregators.py +++ b/tests/common/experimental/test_reducers_and_aggregators.py @@ -65,45 +65,9 @@ class OfflineAggregatorTestCase: max_ref=np.array([[[50000, 4, 8], [12, 16, 20], [24, 28, 32]]]), ), OfflineAggregatorTestCase( - aggregation_axes=(0,), - min_ref=np.array([[[-50000, -4, -8], [-12, -16, -20], [-24, -28, -32]]]), - max_ref=np.array([[[50000, 4, 8], [12, 16, 20], [24, 28, 32]]]), - ), - OfflineAggregatorTestCase( - aggregation_axes=(0, 2), - min_ref=np.array([[-50000, -28, -32]]), - max_ref=np.array([[50000, 28, 32]]), - ), - OfflineAggregatorTestCase( - aggregation_axes=(2,), - min_ref=np.array( - [ - [[-50000, 5, 10]], - [[-40000, 4, 8]], - [[-30000, 3, 6]], - [[-20000, 2, 4]], - [[-10000, 1, 2]], - [[0, 0, 0]], - [[-6, -7, -8]], - [[-12, -14, -16]], - [[-18, -21, -24]], - [[-24, -28, -32]], - ] - ), - max_ref=np.array( - [ - [[50000, -5, -10]], - [[40000, -4, -8]], - [[30000, -3, -6]], - [[20000, -2, -4]], - [[10000, -1, -2]], - [[0, 0, 0]], - [[6, 7, 8]], - [[12, 14, 16]], - [[18, 21, 24]], - [[24, 28, 32]], - ] - ), + aggregation_axes=(1,), + min_ref=np.array([[[-50000, -28, -32]]]), + max_ref=np.array([[[50000, 28, 32]]]), ), ] @@ -256,8 +220,8 @@ def test_min_max_aggregators( assert self.all_close(max_aggregator.aggregate(), max_ref) NO_OUTLIERS_TEST_PARAMS = [ - (MeanAggregator, True, 1, 1404.5138888888905), - (MedianAggregator, True, 1, 24.0), + (MeanAggregator, True, 1, [1404.5138888888905]), + (MedianAggregator, True, 1, [24.0]), ( MeanAggregator, False, @@ -265,16 +229,16 @@ def test_min_max_aggregators( [2503.125, -2493.75, 5009.375, -4987.5, 7515.625, -7481.25, 10021.875, -9975.0, 12528.125], ), (MedianAggregator, False, 1, [4.5, 5.0, 13.5, 10.0, 22.5, 15.0, 31.5, 20.0, 40.5]), - (MeanAggregator, True, 2, [2512.5, -1651.04166667, 3352.08333333]), - (MedianAggregator, True, 2, [13.0, 12.5, 21.0]), + (MeanAggregator, True, 2, [[2512.5, -1651.04166667, 3352.08333333]]), + (MedianAggregator, True, 2, [[13.0, 12.5, 21.0]]), (MeanAggregator, False, 2, DEFALUT_3D_MEAN_VALUE), (MedianAggregator, False, 2, DEFALUT_3D_MEDIAN_VALUE), - (MeanAggregator, True, 3, DEFALUT_3D_MEAN_VALUE), - (MedianAggregator, True, 3, DEFALUT_3D_MEDIAN_VALUE), + (MeanAggregator, True, 3, [DEFALUT_3D_MEAN_VALUE]), + (MedianAggregator, True, 3, [DEFALUT_3D_MEDIAN_VALUE]), (MeanAggregator, False, 3, [DEFALUT_3D_MEAN_VALUE]), (MedianAggregator, False, 3, [DEFALUT_3D_MEDIAN_VALUE]), - (default_test_mean_no_outlier, True, 1, 20.0893), - (default_test_median_no_outlier, True, 1, 30.0), + (default_test_mean_no_outlier, True, 1, [20.0893]), + (default_test_median_no_outlier, True, 1, [30.0]), ( default_test_mean_no_outlier, False, @@ -282,12 +246,12 @@ def test_min_max_aggregators( [4.16666667, 8.33333333, 12.5, 16.66666667, 20.83333333, 25.0, 29.16666667, 33.33333333, 37.5], ), (default_test_median_no_outlier, False, 1, [5.0, 4.0, 15.0, 8.0, 25.0, 12.0, 35.0, 16.0, 45.0]), - (default_test_mean_no_outlier, True, 2, [16.66666667, 20.83333333, 25.0]), - (default_test_median_no_outlier, True, 2, [14.0, 10.0, 24.0]), + (default_test_mean_no_outlier, True, 2, [[16.66666667, 20.83333333, 25.0]]), + (default_test_median_no_outlier, True, 2, [[14.0, 10.0, 24.0]]), (default_test_mean_no_outlier, False, 2, NO_OUTLIERS_DEFAULT_3D_MEAN_VALUE), (default_test_median_no_outlier, False, 2, NO_OUTLIERS_DEFAULT_3D_MEDIAN_VALUE), - (default_test_mean_no_outlier, True, 3, NO_OUTLIERS_DEFAULT_3D_MEAN_VALUE), - (default_test_median_no_outlier, True, 3, NO_OUTLIERS_DEFAULT_3D_MEDIAN_VALUE), + (default_test_mean_no_outlier, True, 3, [NO_OUTLIERS_DEFAULT_3D_MEAN_VALUE]), + (default_test_median_no_outlier, True, 3, [NO_OUTLIERS_DEFAULT_3D_MEDIAN_VALUE]), (default_test_mean_no_outlier, False, 3, [NO_OUTLIERS_DEFAULT_3D_MEAN_VALUE]), (default_test_median_no_outlier, False, 3, [NO_OUTLIERS_DEFAULT_3D_MEDIAN_VALUE]), ] @@ -305,7 +269,7 @@ def test_mean_median_agggregators(self, aggregator_cls, refs, tensor_processor, input_ = input_.reshape((1, 3, 3)) input_with_outliers = input_with_outliers.reshape((1, 3, 3)) - aggregation_axes = (0, 1) if use_per_sample_stats else (0,) + aggregation_axes = (0,) if use_per_sample_stats else None aggregator = aggregator_cls(tensor_processor=tensor_processor, aggregation_axes=aggregation_axes) for i in range(1, 6): aggregator.register_reduced_input(self.get_nncf_tensor(input_ * i, Dtype.FLOAT)) @@ -333,12 +297,8 @@ def test_mean_median_agggregators(self, aggregator_cls, refs, tensor_processor, "mad_values": np.array([2.5, 5.0, 7.5, 10.0, 12.5, 15.0, 17.5, 20.0, 22.5]), }, (0,): { - "median_values": np.array([4.5, 9.0, 13.5, 18.0, 22.5, 27.0, 31.5, 36.0, 40.5]), - "mad_values": np.array([2.5, 5.0, 7.5, 10.0, 12.5, 15.0, 17.5, 20.0, 22.5]), - }, - (0, 1): { - "median_values": np.array(18.0), - "mad_values": np.array(12.0), + "median_values": np.array([18.0]), + "mad_values": np.array([12.0]), }, }, PercentileAggregator: { @@ -349,16 +309,10 @@ def test_mean_median_agggregators(self, aggregator_cls, refs, tensor_processor, 95: np.array([7.6, 15.2, 22.8, 30.4, 38.0, 45.6, 53.2, 60.8, 68.4]), }, (0,): { - 5: np.array([0.4, 0.8, 1.2, 1.6, 2.0, 2.4, 2.8, 3.2, 3.6]), - 10: np.array([0.8, 1.6, 2.4, 3.2, 4.0, 4.8, 5.6, 6.4, 7.2]), - 90: np.array([7.2, 14.4, 21.6, 28.8, 36.0, 43.2, 50.4, 57.6, 64.8]), - 95: np.array([7.6, 15.2, 22.8, 30.4, 38.0, 45.6, 53.2, 60.8, 68.4]), - }, - (0, 1): { - 5: np.array(0.0), - 10: np.array(0.0), - 90: np.array(48.0), - 95: np.array(56.0), + 5: np.array([0.0]), + 10: np.array([0.0]), + 90: np.array([48.0]), + 95: np.array([56.0]), }, }, } @@ -373,7 +327,7 @@ def test_mean_median_agggregators(self, aggregator_cls, refs, tensor_processor, ), ], ) - @pytest.mark.parametrize("aggregation_axes", [None, (0,), (0, 1)]) + @pytest.mark.parametrize("aggregation_axes", [None, (0,)]) def test_mad_percentile_aggregators(self, aggregator_cls, tensor_processor, aggregation_axes): aggregator = aggregator_cls(tensor_processor=tensor_processor, aggregation_axes=aggregation_axes) input_ = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.float32) diff --git a/tests/openvino/native/quantization/test_quantizer_config.py b/tests/openvino/native/quantization/test_quantizer_config.py index 773f5996604..fab774506f4 100644 --- a/tests/openvino/native/quantization/test_quantizer_config.py +++ b/tests/openvino/native/quantization/test_quantizer_config.py @@ -59,8 +59,18 @@ def get_reduction_axes(self, reducer: TensorReducerBase) -> ReductionAxes: (TargetType.PRE_LAYER_OPERATION, "/Sum_1_0", (0, 2), (0, 1, 2)), marks=pytest.mark.skip("Ticket 102414: remove hardcoded axes for activations"), ), - (TargetType.POST_LAYER_OPERATION, "/Conv_1_0", (0, 2, 3), None), - (TargetType.OPERATION_WITH_WEIGHTS, "/Conv_1_0", (1, 2, 3), None), + ( + TargetType.POST_LAYER_OPERATION, + "/Conv_1_0", + (0, 2, 3), + (1, 2, 3), + ), # per-tensor: all tensor tensor except batch index is reduced + ( + TargetType.OPERATION_WITH_WEIGHTS, + "/Conv_1_0", + (1, 2, 3), + (0, 1, 2, 3), + ), # per-tensor: all weight tensor is reduced ] ) def statistic_collector_parameters(self, request) -> ParamsCls: From 8b526c53fd23bd9dc3f25d988f46a0ebd28b85da Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 20 Oct 2023 12:11:25 +0200 Subject: [PATCH 006/108] fix tests --- .../common/tensor_statistics/collectors.py | 54 +++++++++---------- .../reference_scales/DynamicModel_mixed.json | 8 +-- .../DynamicModel_performance.json | 8 +-- .../reference_scales/IntegerModel_mixed.json | 16 +++--- .../IntegerModel_performance.json | 4 +- .../reference_scales/MatMul2DModel_mixed.json | 4 +- .../MatMul2DModel_performance.json | 4 +- .../ScaleShiftReluModel_mixed.json | 12 ++--- .../ScaleShiftReluModel_performance.json | 12 ++--- .../test_templates/test_channel_alignment.py | 4 +- tests/torch/ptq/test_quantizer_config.py | 4 +- 11 files changed, 64 insertions(+), 66 deletions(-) diff --git a/nncf/experimental/common/tensor_statistics/collectors.py b/nncf/experimental/common/tensor_statistics/collectors.py index 87de1179cc0..635a5823458 100644 --- a/nncf/experimental/common/tensor_statistics/collectors.py +++ b/nncf/experimental/common/tensor_statistics/collectors.py @@ -136,6 +136,13 @@ def __init__( self._tensor_processor = tensor_processor self._tensor_aggregation_axes = tuple(aggregation_axes) if aggregation_axes is not None else aggregation_axes self._stacked_tensor_aggregation_axis = 0 + if self._tensor_aggregation_axes is not None: + self._stacked_tensor_all_aggregation_axes = ( + self._stacked_tensor_aggregation_axis, + *map(lambda x: x + 1, self._tensor_aggregation_axes), + ) + else: + self._stacked_tensor_all_aggregation_axes = self._stacked_tensor_aggregation_axis self._keepdims = True self._num_samples = num_samples self._collected_samples = 0 @@ -601,7 +608,13 @@ class OnlineAggregatorBase(AggregatorBase, ABC): """ def _register_reduced_input_impl(self, x: NNCFTensor) -> None: - if self._tensor_aggregation_axes is not None: + """ + The function aggregates firstly the input tensor. + + + :param NNCFTensor x: _description_ + """ + if self._tensor_aggregation_axes is not None: # Should aggregate firstly the tensor x = self._aggregation_fn(x, axis=self._tensor_aggregation_axes, keepdims=self._keepdims) stacked_tensors = self._tensor_processor.stack( [x, *self._container], axis=self._stacked_tensor_aggregation_axis @@ -644,10 +657,9 @@ def _register_reduced_input_impl(self, x: TensorType) -> None: def _aggregate_impl(self) -> NNCFTensor: stacked_val = self._tensor_processor.stack(self._container, axis=self._stacked_tensor_aggregation_axis) - aggregation_axes = get_stacked_tensor_all_aggregation_axes( - self._tensor_aggregation_axes, self._stacked_tensor_aggregation_axis + aggregated = self._aggregation_fn( + stacked_val, axis=self._stacked_tensor_all_aggregation_axes, keepdims=self._keepdims ) - aggregated = self._aggregation_fn(stacked_val, axis=aggregation_axes, keepdims=self._keepdims) return self._tensor_processor.squeeze(aggregated, self._stacked_tensor_aggregation_axis).tensor @abstractmethod @@ -678,18 +690,17 @@ def __init__( def _aggregate_impl(self) -> NNCFTensor: stacked_samples = self._tensor_processor.stack(self._container, axis=self._stacked_tensor_aggregation_axis) - aggregation_axes = get_stacked_tensor_all_aggregation_axes( - self._tensor_aggregation_axes, self._stacked_tensor_aggregation_axis - ) low_values, high_values = self._tensor_processor.quantile( - stacked_samples, quantile=(self._quantile, 1 - self._quantile), axis=aggregation_axes + stacked_samples, + quantile=(self._quantile, 1 - self._quantile), + axis=self._stacked_tensor_all_aggregation_axes, ) tp = self._tensor_processor outliers_mask = tp.logical_or(tp.less(stacked_samples, low_values), tp.less(high_values, stacked_samples)) aggregated = self._aggregation_fn( stacked_samples=stacked_samples, mask=outliers_mask, - axis=aggregation_axes, + axis=self._stacked_tensor_all_aggregation_axes, keepdims=self._keepdims, ) return self._tensor_processor.squeeze(aggregated, self._stacked_tensor_aggregation_axis).tensor @@ -727,17 +738,13 @@ def _register_reduced_input_impl(self, x: TensorType) -> None: def _aggregate_impl(self) -> Dict[str, NNCFTensor]: stacked_val = self._tensor_processor.stack(self._container) - aggregation_axes = get_stacked_tensor_all_aggregation_axes( - self._tensor_aggregation_axes, self._stacked_tensor_aggregation_axis - ) mask = self._tensor_processor.zero_elements(stacked_val) median_per_ch = self._tensor_processor.masked_median( - stacked_val, mask=mask, axis=aggregation_axes, keepdims=True + stacked_val, mask=mask, axis=self._stacked_tensor_all_aggregation_axes, keepdims=True ) - mad_values = self._tensor_processor.median( self._tensor_processor.abs(self._tensor_processor.sub(stacked_val, median_per_ch)), - axis=aggregation_axes, + axis=self._stacked_tensor_all_aggregation_axes, keepdims=self._keepdims, ) squeezed_mad_values = self._tensor_processor.squeeze(mad_values, self._stacked_tensor_aggregation_axis) @@ -764,11 +771,11 @@ def _register_reduced_input_impl(self, x: TensorType) -> None: def _aggregate_impl(self) -> Dict[float, NNCFTensor]: stacked_val = self._tensor_processor.stack(self._container) - aggregation_axes = get_stacked_tensor_all_aggregation_axes( - self._tensor_aggregation_axes, self._stacked_tensor_aggregation_axis - ) percentiles = self._tensor_processor.percentile( - stacked_val, self._percentiles_to_collect, axis=aggregation_axes, keepdims=self._keepdims + stacked_val, + self._percentiles_to_collect, + axis=self._stacked_tensor_all_aggregation_axes, + keepdims=self._keepdims, ) retval = {} for idx, percentile in enumerate(self._percentiles_to_collect): @@ -786,12 +793,3 @@ def _aggregate_impl(self) -> Dict[float, NNCFTensor]: AggregatorType.MEDIAN: MedianAggregator, AggregatorType.MEDIAN_NO_OUTLIERS: MedianNoOutliersAggregator, } - - -def get_stacked_tensor_all_aggregation_axes(_tensor_aggregation_axes, _stacked_tensor_aggregation_axis): - f = lambda x: x + 1 - if _tensor_aggregation_axes is not None: - aggregation_axes = (_stacked_tensor_aggregation_axis, *map(f, _tensor_aggregation_axes)) - else: - aggregation_axes = _stacked_tensor_aggregation_axis - return aggregation_axes diff --git a/tests/openvino/native/data/2023.1/reference_scales/DynamicModel_mixed.json b/tests/openvino/native/data/2023.1/reference_scales/DynamicModel_mixed.json index 9cf4da1968f..b7df9051e7e 100644 --- a/tests/openvino/native/data/2023.1/reference_scales/DynamicModel_mixed.json +++ b/tests/openvino/native/data/2023.1/reference_scales/DynamicModel_mixed.json @@ -94,9 +94,9 @@ ] }, "Sub/fq_output_0": { - "input_low": -0.6373578310012817, - "input_high": 0.9560367465019226, - "output_low": -0.6373578310012817, - "output_high": 0.9560367465019226 + "input_low": -0.5242199897766113, + "input_high": 0.8538841009140015, + "output_low": -0.5242199897766113, + "output_high": 0.8538841009140015 } } \ No newline at end of file diff --git a/tests/openvino/native/data/2023.1/reference_scales/DynamicModel_performance.json b/tests/openvino/native/data/2023.1/reference_scales/DynamicModel_performance.json index 9727ddf1990..b450a080161 100644 --- a/tests/openvino/native/data/2023.1/reference_scales/DynamicModel_performance.json +++ b/tests/openvino/native/data/2023.1/reference_scales/DynamicModel_performance.json @@ -94,9 +94,9 @@ ] }, "Sub/fq_output_0": { - "input_low": -0.9635645747184753, - "input_high": 0.9560367465019226, - "output_low": -0.9635645747184753, - "output_high": 0.9560367465019226 + "input_low": -0.8591098189353943, + "input_high": 0.8523980379104614, + "output_low": -0.8591098189353943, + "output_high": 0.8523980379104614 } } \ No newline at end of file diff --git a/tests/openvino/native/data/2023.1/reference_scales/IntegerModel_mixed.json b/tests/openvino/native/data/2023.1/reference_scales/IntegerModel_mixed.json index 4dc60d69c0a..5c0b63fad2d 100644 --- a/tests/openvino/native/data/2023.1/reference_scales/IntegerModel_mixed.json +++ b/tests/openvino/native/data/2023.1/reference_scales/IntegerModel_mixed.json @@ -94,10 +94,10 @@ "output_high": 0.91275554895401 }, "MatMul_1/fq_output_0": { - "input_low": 0.0, - "input_high": 1.7037991285324097, - "output_low": 0.0, - "output_high": 1.7037991285324097 + "input_low": -0.0008062992128543556, + "input_high": 0.0007999999797903001, + "output_low": -0.0008062992128543556, + "output_high": 0.0007999999797903001 }, "MatMul_1/fq_weights_1": { "input_low": [ @@ -182,10 +182,10 @@ ] }, "Gather_2/fq_output_0": { - "input_low": 0.0, - "input_high": 0.91275554895401, - "output_low": 0.0, - "output_high": 0.91275554895401 + "input_low": -0.0008062992128543556, + "input_high": 0.0007999999797903001, + "output_low": -0.0008062992128543556, + "output_high": 0.0007999999797903001 }, "Gather_2/fq_weights_0": { "input_low": [ diff --git a/tests/openvino/native/data/2023.1/reference_scales/IntegerModel_performance.json b/tests/openvino/native/data/2023.1/reference_scales/IntegerModel_performance.json index 645988e4a01..71d3a71b1cf 100644 --- a/tests/openvino/native/data/2023.1/reference_scales/IntegerModel_performance.json +++ b/tests/openvino/native/data/2023.1/reference_scales/IntegerModel_performance.json @@ -95,9 +95,9 @@ }, "MatMul_1/fq_output_0": { "input_low": 0.0, - "input_high": 1.7037991285324097, + "input_high": 7.999999797903001e-05, "output_low": 0.0, - "output_high": 1.7037991285324097 + "output_high": 7.999999797903001e-05 }, "MatMul_1/fq_weights_1": { "input_low": [ diff --git a/tests/openvino/native/data/2023.1/reference_scales/MatMul2DModel_mixed.json b/tests/openvino/native/data/2023.1/reference_scales/MatMul2DModel_mixed.json index fd0ed29955f..7e7fd2cdd5e 100644 --- a/tests/openvino/native/data/2023.1/reference_scales/MatMul2DModel_mixed.json +++ b/tests/openvino/native/data/2023.1/reference_scales/MatMul2DModel_mixed.json @@ -27,8 +27,8 @@ }, "Input/fq_output_0": { "input_low": 0.0, - "input_high": 0.9350724220275879, + "input_high": 0.8685823082923889, "output_low": 0.0, - "output_high": 0.9350724220275879 + "output_high": 0.8685823082923889 } } \ No newline at end of file diff --git a/tests/openvino/native/data/2023.1/reference_scales/MatMul2DModel_performance.json b/tests/openvino/native/data/2023.1/reference_scales/MatMul2DModel_performance.json index fd0ed29955f..7e7fd2cdd5e 100644 --- a/tests/openvino/native/data/2023.1/reference_scales/MatMul2DModel_performance.json +++ b/tests/openvino/native/data/2023.1/reference_scales/MatMul2DModel_performance.json @@ -27,8 +27,8 @@ }, "Input/fq_output_0": { "input_low": 0.0, - "input_high": 0.9350724220275879, + "input_high": 0.8685823082923889, "output_low": 0.0, - "output_high": 0.9350724220275879 + "output_high": 0.8685823082923889 } } \ No newline at end of file diff --git a/tests/openvino/native/data/2023.1/reference_scales/ScaleShiftReluModel_mixed.json b/tests/openvino/native/data/2023.1/reference_scales/ScaleShiftReluModel_mixed.json index 71700abfa50..31dddc43f04 100644 --- a/tests/openvino/native/data/2023.1/reference_scales/ScaleShiftReluModel_mixed.json +++ b/tests/openvino/native/data/2023.1/reference_scales/ScaleShiftReluModel_mixed.json @@ -35,15 +35,15 @@ }, "Relu/fq_output_0": { "input_low": 0.0, - "input_high": 2.5198161602020264, + "input_high": 2.1013052463531494, "output_low": 0.0, - "output_high": 2.5198161602020264 + "output_high": 2.1013052463531494 }, "MatMul/fq_output_0": { "input_low": 0.0, - "input_high": 2.1930606365203857, + "input_high": 1.6296062469482422, "output_low": 0.0, - "output_high": 2.1930606365203857 + "output_high": 1.6296062469482422 }, "MatMul/fq_weights_1": { "input_low": [ @@ -73,8 +73,8 @@ }, "Input/fq_output_0": { "input_low": 0.0, - "input_high": 0.9350724220275879, + "input_high": 0.8685823082923889, "output_low": 0.0, - "output_high": 0.9350724220275879 + "output_high": 0.8685823082923889 } } \ No newline at end of file diff --git a/tests/openvino/native/data/2023.1/reference_scales/ScaleShiftReluModel_performance.json b/tests/openvino/native/data/2023.1/reference_scales/ScaleShiftReluModel_performance.json index 71700abfa50..31dddc43f04 100644 --- a/tests/openvino/native/data/2023.1/reference_scales/ScaleShiftReluModel_performance.json +++ b/tests/openvino/native/data/2023.1/reference_scales/ScaleShiftReluModel_performance.json @@ -35,15 +35,15 @@ }, "Relu/fq_output_0": { "input_low": 0.0, - "input_high": 2.5198161602020264, + "input_high": 2.1013052463531494, "output_low": 0.0, - "output_high": 2.5198161602020264 + "output_high": 2.1013052463531494 }, "MatMul/fq_output_0": { "input_low": 0.0, - "input_high": 2.1930606365203857, + "input_high": 1.6296062469482422, "output_low": 0.0, - "output_high": 2.1930606365203857 + "output_high": 1.6296062469482422 }, "MatMul/fq_weights_1": { "input_low": [ @@ -73,8 +73,8 @@ }, "Input/fq_output_0": { "input_low": 0.0, - "input_high": 0.9350724220275879, + "input_high": 0.8685823082923889, "output_low": 0.0, - "output_high": 0.9350724220275879 + "output_high": 0.8685823082923889 } } \ No newline at end of file diff --git a/tests/post_training/test_templates/test_channel_alignment.py b/tests/post_training/test_templates/test_channel_alignment.py index 0ad51ff76c7..42b79eadb92 100644 --- a/tests/post_training/test_templates/test_channel_alignment.py +++ b/tests/post_training/test_templates/test_channel_alignment.py @@ -496,5 +496,5 @@ def test_statistic_collectors(self, inplace_ref, q_ref): for aggr in statistic_collector.aggregators.values(): assert isinstance(aggr, MedianAggregator) assert aggr.num_samples == num_samples_ref - assert not aggr._keepdims - assert aggr._aggregation_axes == (0,) + assert aggr._keepdims + assert aggr._stacked_tensor_all_aggregation_axes == 0 diff --git a/tests/torch/ptq/test_quantizer_config.py b/tests/torch/ptq/test_quantizer_config.py index 951f79149d7..58de7473dc0 100644 --- a/tests/torch/ptq/test_quantizer_config.py +++ b/tests/torch/ptq/test_quantizer_config.py @@ -55,8 +55,8 @@ def get_reduction_axes(self, reducer: TensorReducerBase) -> ReductionAxes: @pytest.fixture( params=[ (TargetType.PRE_LAYER_OPERATION, "/Sum_1_0", (0, 2), (0, 1, 2)), - (TargetType.POST_LAYER_OPERATION, "/Conv_1_0", (0, 2, 3), (0, 1, 2, 3)), - (TargetType.OPERATION_WITH_WEIGHTS, "/Conv_1_0", (1, 2, 3), (0, 1, 2, 3)), + (TargetType.POST_LAYER_OPERATION, "/Conv_1_0", (0, 2, 3), (1, 2, 3)), + (TargetType.OPERATION_WITH_WEIGHTS, "/Conv_1_0", (1, 2, 3), (1, 2, 3)), ] ) def statistic_collector_parameters(self, request) -> ParamsCls: From 37684bd1e3993fc5889e9d9db6c5acfe36a0bcfe Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 7 Nov 2023 13:20:55 +0100 Subject: [PATCH 007/108] add aggregation axes for OV; comment input check --- nncf/openvino/engine.py | 6 +++--- nncf/quantization/algorithms/min_max/openvino_backend.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nncf/openvino/engine.py b/nncf/openvino/engine.py index decd31a6364..47f09f32952 100644 --- a/nncf/openvino/engine.py +++ b/nncf/openvino/engine.py @@ -44,9 +44,9 @@ def _check_input_data_format( :param input_data: Provided inputs to infer the model. """ - actual_num_inputs = 1 if isinstance(input_data, np.ndarray) else len(input_data) - if actual_num_inputs != self.number_of_inputs: - raise RuntimeError(f"Model expects {self.number_of_inputs} inputs, but {actual_num_inputs} are provided.") + # actual_num_inputs = 1 if isinstance(input_data, np.ndarray) else len(input_data) + # if actual_num_inputs != self.number_of_inputs: + # raise RuntimeError(f"Model expects {self.number_of_inputs} inputs, but {actual_num_inputs} are provided.") if isinstance(input_data, dict): for name in input_data: if isinstance(name, str) and name not in self.input_tensor_names: diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py index 04e7a41614a..8b2dbbebf94 100644 --- a/nncf/quantization/algorithms/min_max/openvino_backend.py +++ b/nncf/quantization/algorithms/min_max/openvino_backend.py @@ -136,7 +136,7 @@ def _get_batch_axis() -> int: @staticmethod def _get_aggregation_axes(target_point: OVTargetPoint): - return None if target_point.is_weight_target_point() else (0,) + return None if target_point.is_weight_target_point() else (0, 1) @staticmethod def _get_reduction_axes( From 18d931d14c0fd5caf0051dc97cce01763fc2315a Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 8 Nov 2023 15:52:26 +0100 Subject: [PATCH 008/108] add test for OV and Torch --- .../test_tensor_collector_batch_size.py | 103 ++++++++++++++++++ .../test_tensor_collector_batch_size.py | 72 ++++++++++++ .../ptq/test_tensor_collector_batch_size.py | 73 +++++++++++++ 3 files changed, 248 insertions(+) create mode 100644 tests/common/experimental/test_tensor_collector_batch_size.py create mode 100644 tests/openvino/native/test_tensor_collector_batch_size.py create mode 100644 tests/torch/ptq/test_tensor_collector_batch_size.py diff --git a/tests/common/experimental/test_tensor_collector_batch_size.py b/tests/common/experimental/test_tensor_collector_batch_size.py new file mode 100644 index 00000000000..6406610d6e2 --- /dev/null +++ b/tests/common/experimental/test_tensor_collector_batch_size.py @@ -0,0 +1,103 @@ +# Copyright (c) 2023 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from abc import ABC +from abc import abstractmethod +from typing import List + +import numpy as np +import pytest + +from nncf.experimental.common.tensor_statistics.collectors import TensorCollector +from nncf.openvino.graph.node_utils import get_channel_agnostic_reduction_axes + + +class TemplateTestTensorCollectorBatchSize(ABC): + @staticmethod + @abstractmethod + def get_tensor_statistics_class(): + ... + + @staticmethod + @abstractmethod + def get_tensor_processor(): + ... + + @staticmethod + @abstractmethod + def get_nncf_tensor_class(): + ... + + @pytest.fixture + @abstractmethod + def reducers(self): + ... + + @pytest.fixture + @abstractmethod + def aggregators(self): + ... + + @pytest.fixture + @abstractmethod + def inplace(self): + ... + + @abstractmethod + def create_dataitems_without_batch_dim(self, input_shape: List[int], length: int = 100) -> List[np.ndarray]: + ... + + @abstractmethod + def add_batch_dim_to_dataitems(self, data_items: List[np.ndarray], batch_size: int) -> List[np.ndarray]: + ... + + def _create_tensor_collector(self, shape, inplace, reducer, aggregator) -> TensorCollector: + batch_axis = 0 + statistic_branch_random_name = "1" + collector = TensorCollector(self.get_tensor_statistics_class()) + reduction_axes = get_channel_agnostic_reduction_axes([batch_axis], shape) + aggregation_axes = (0, 1) + kwargs = {"reduction_axes": reduction_axes, "inplace": inplace} + reducer = reducer(**kwargs) + aggregator = aggregator( + aggregation_axes=aggregation_axes, + tensor_processor=self.get_tensor_processor(), + ) + collector.register_statistic_branch(statistic_branch_random_name, reducer, aggregator) + return collector, reducer, aggregator + + def _register_inputs(self, collector, dataitems, reducer, output_info): + for item in dataitems: + input_ = {hash(reducer): [self.get_nncf_tensor_class()(item)]} + collector.register_inputs(input_) + + def test_statistics_batch_size_equal(self, reducers, aggregators, inplace): + target_node_name = "target_node_name" + port_id = 0 + tensor_shape = [3, 20, 20] + dataitems = self.create_dataitems_without_batch_dim(input_shape=tensor_shape) + + shape_batch_1 = [1, *tensor_shape] + collector, reducer, _ = self._create_tensor_collector(shape_batch_1, inplace, reducers, aggregators) + # output_name = reducer.get_output_names(target_node_name, port_id) + dataitems_batch_1 = self.add_batch_dim_to_dataitems(dataitems, batch_size=1) + output_info = collector.get_output_info(target_node_name, port_id) + self._register_inputs(collector, dataitems_batch_1, reducer, output_info) + aggregated_tensor_batch_1 = list(collector._aggregate().values()) + + shape_batch_10 = [10, *tensor_shape] + collector, reducer, _ = self._create_tensor_collector(shape_batch_10, inplace, reducers, aggregators) + # output_name = reducer.get_output_names(target_node_name, port_id) + dataitems_batch_10 = self.add_batch_dim_to_dataitems(dataitems, batch_size=10) + output_info = collector.get_output_info(target_node_name, port_id) + self._register_inputs(collector, dataitems_batch_10, reducer, output_info) + aggregated_tensor_batch_10 = list(collector._aggregate().values()) + + assert np.array_equal(aggregated_tensor_batch_1, aggregated_tensor_batch_10) diff --git a/tests/openvino/native/test_tensor_collector_batch_size.py b/tests/openvino/native/test_tensor_collector_batch_size.py new file mode 100644 index 00000000000..04e4fa188ac --- /dev/null +++ b/tests/openvino/native/test_tensor_collector_batch_size.py @@ -0,0 +1,72 @@ +# Copyright (c) 2023 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List + +import numpy as np +import pytest + +from nncf.experimental.common.tensor_statistics.collectors import AGGREGATORS_MAP +from nncf.openvino.statistics.collectors import OV_REDUCERS_MAP +from nncf.openvino.statistics.collectors import OVNNCFCollectorTensorProcessor +from nncf.openvino.statistics.statistics import OVMinMaxTensorStatistic +from nncf.openvino.tensor import OVNNCFTensor +from tests.common.experimental.test_tensor_collector_batch_size import TemplateTestTensorCollectorBatchSize + + +class TestTensorCollectorBatchSize(TemplateTestTensorCollectorBatchSize): + @staticmethod + def get_tensor_statistics_class(): + return OVMinMaxTensorStatistic + + @staticmethod + def get_tensor_processor(): + return OVNNCFCollectorTensorProcessor() + + @staticmethod + def get_nncf_tensor_class(): + return OVNNCFTensor + + @pytest.fixture(params=OV_REDUCERS_MAP.values()) + def reducers(self, request) -> bool: + return request.param + + @pytest.fixture(params=AGGREGATORS_MAP.values()) + def aggregators(self, request) -> bool: + return request.param + + @pytest.fixture(params=[True, False]) + def inplace(self, request): + return request.param + + def create_dataitems_without_batch_dim(self, input_shape: List[int], length: int = 100) -> List[np.ndarray]: + rng = np.random.default_rng(seed=0) + data_items = [] + for _ in range(length): + data_items.append(rng.uniform(0, 1, input_shape)) + return data_items + + def add_batch_dim_to_dataitems(self, data_items: List[np.ndarray], batch_size: int) -> List[np.ndarray]: + assert batch_size >= 1 + dataset = [] + item = [] + cnt = 0 + for data_item in data_items: + if batch_size == 1: + dataset.append(np.expand_dims(data_item, 0)) + else: + item.append(data_item) + if cnt == batch_size - 1: + dataset.append(np.array(item)) + item = [] + cnt = -1 + cnt += 1 + + return dataset diff --git a/tests/torch/ptq/test_tensor_collector_batch_size.py b/tests/torch/ptq/test_tensor_collector_batch_size.py new file mode 100644 index 00000000000..8f9b5582fc0 --- /dev/null +++ b/tests/torch/ptq/test_tensor_collector_batch_size.py @@ -0,0 +1,73 @@ +# Copyright (c) 2023 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List + +import pytest +import torch + +from nncf.experimental.common.tensor_statistics.collectors import AGGREGATORS_MAP +from nncf.torch.tensor import PTNNCFTensor +from nncf.torch.tensor_statistics.collectors import PT_REDUCERS_MAP +from nncf.torch.tensor_statistics.collectors import PTNNCFCollectorTensorProcessor +from nncf.torch.tensor_statistics.statistics import PTMinMaxTensorStatistic +from tests.common.experimental.test_tensor_collector_batch_size import TemplateTestTensorCollectorBatchSize + + +class TestTensorCollectorBatchSize(TemplateTestTensorCollectorBatchSize): + @staticmethod + def get_tensor_statistics_class(): + return PTMinMaxTensorStatistic + + @staticmethod + def get_tensor_processor(): + return PTNNCFCollectorTensorProcessor() + + @staticmethod + def get_nncf_tensor_class(): + return PTNNCFTensor + + @pytest.fixture(params=PT_REDUCERS_MAP.values()) + def reducers(self, request) -> bool: + return request.param + + @pytest.fixture(params=AGGREGATORS_MAP.values()) + def aggregators(self, request) -> bool: + return request.param + + @pytest.fixture(params=[False]) + def inplace(self, request): + return request.param + + def create_dataitems_without_batch_dim(self, input_shape: List[int], length: int = 100) -> List[torch.TensorType]: + torch.random.manual_seed(seed=0) + + data_items = [] + for _ in range(length): + data_items.append(torch.rand(input_shape)) + return data_items + + def add_batch_dim_to_dataitems(self, data_items: List[torch.TensorType], batch_size: int) -> List[torch.TensorType]: + assert batch_size >= 1 + dataset = [] + item = [] + cnt = 0 + for data_item in data_items: + if batch_size == 1: + dataset.append(torch.unsqueeze(data_item, 0)) + else: + item.append(data_item) + if cnt == batch_size - 1: + dataset.append(torch.stack(item)) + item = [] + cnt = -1 + cnt += 1 + + return dataset From 605a32551aeb655ba8a06d4426875c8c307cfc6f Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 9 Nov 2023 12:33:52 +0100 Subject: [PATCH 009/108] add batch_size param to conformance test --- tests/post_training/conftest.py | 1 + tests/post_training/pipelines/base.py | 2 ++ .../post_training/pipelines/image_classification_timm.py | 9 +++++++-- tests/post_training/test_quantize_conformance.py | 8 +++++++- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/tests/post_training/conftest.py b/tests/post_training/conftest.py index fe38d0eb510..8af9cd3923f 100644 --- a/tests/post_training/conftest.py +++ b/tests/post_training/conftest.py @@ -25,6 +25,7 @@ def pytest_addoption(parser): parser.addoption("--data", action="store", help="Data directory") parser.addoption("--output", action="store", default="./tmp/", help="Directory to store artifacts") parser.addoption("--no-eval", action="store_true", help="Skip validation step") + parser.addoption("--batch_size", action="store", default=1, type=int, help="Batch size of calibration dataset") def pytest_configure(config): diff --git a/tests/post_training/pipelines/base.py b/tests/post_training/pipelines/base.py index 5e32199e43a..ddd7cdee4b0 100644 --- a/tests/post_training/pipelines/base.py +++ b/tests/post_training/pipelines/base.py @@ -108,6 +108,7 @@ def __init__( reference_data: dict, no_eval: bool, params: dict = None, + batch_size: int = 1, ) -> None: self.reported_name = reported_name self.model_id = model_id @@ -117,6 +118,7 @@ def __init__( self.data_dir = Path(data_dir) self.reference_data = reference_data self.params = params or {} + self.batch_size = batch_size self.no_eval = no_eval self.output_model_dir = self.output_dir / self.reported_name / self.backend.value diff --git a/tests/post_training/pipelines/image_classification_timm.py b/tests/post_training/pipelines/image_classification_timm.py index 00171d701b7..3e3bfb50173 100644 --- a/tests/post_training/pipelines/image_classification_timm.py +++ b/tests/post_training/pipelines/image_classification_timm.py @@ -47,6 +47,7 @@ def prepare_model(self) -> None: timm_model = replace_timm_custom_modules_with_torch_native(timm_model) self.model_cfg = timm_model.default_cfg self.input_size = [1] + list(timm_model.default_cfg["input_size"]) + self.dynamic_input_size = [-1] + list(timm_model.default_cfg["input_size"]) self.dummy_tensor = torch.rand(self.input_size) if self.backend in PT_BACKENDS: @@ -61,12 +62,16 @@ def prepare_model(self) -> None: export_params=True, opset_version=13, do_constant_folding=False, + input_names=["image"], + dynamic_axes={ + "image": {0: "batch"}, + }, ) self.model = onnx.load(onnx_path) self.input_name = self.model.graph.input[0].name if self.backend in OV_BACKENDS: - self.model = convert_model(timm_model, example_input=self.dummy_tensor, input_shape=self.input_size) + self.model = convert_model(timm_model, example_input=self.dummy_tensor, input_shape=self.dynamic_input_size) self.input_name = list(inp.get_any_name() for inp in self.model.inputs)[0] self._dump_model_fp32() @@ -122,7 +127,7 @@ def transform_fn(data_item): def prepare_calibration_dataset(self): dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform) - loader = torch.utils.data.DataLoader(dataset, batch_size=1, num_workers=2, shuffle=False) + loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, num_workers=2, shuffle=False) self.calibration_dataset = nncf.Dataset(loader, self.get_transform_calibration_fn()) diff --git a/tests/post_training/test_quantize_conformance.py b/tests/post_training/test_quantize_conformance.py index fa62eafd77b..bccf8e7a232 100644 --- a/tests/post_training/test_quantize_conformance.py +++ b/tests/post_training/test_quantize_conformance.py @@ -42,6 +42,11 @@ def fixture_no_eval(pytestconfig): return pytestconfig.getoption("no_eval") +@pytest.fixture(scope="session", name="batch_size") +def fixture_batch_size(pytestconfig): + return pytestconfig.getoption("batch_size") + + def read_reference_data(): path_reference = Path(__file__).parent / "reference_data.yaml" with path_reference.open() as f: @@ -53,7 +58,7 @@ def read_reference_data(): @pytest.mark.parametrize("test_case_name", TEST_CASES.keys()) -def test_ptq_quantization(test_case_name, data, output, result, no_eval): +def test_ptq_quantization(test_case_name, data, output, result, no_eval, batch_size): pipeline = None err_msg = None test_model_param = None @@ -81,6 +86,7 @@ def test_ptq_quantization(test_case_name, data, output, result, no_eval): "data_dir": data, "reference_data": REFERENCE_DATA[test_case_name], "no_eval": no_eval, + "batch_size": batch_size, } pipeline = pipeline_cls(**pipeline_kwargs) From fb16b99e60058d12a6d3376176cea3390acd5c43 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 9 Nov 2023 16:06:15 +0100 Subject: [PATCH 010/108] hardcode for CI run --- tests/post_training/pipelines/image_classification_timm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/post_training/pipelines/image_classification_timm.py b/tests/post_training/pipelines/image_classification_timm.py index 3e3bfb50173..13b5c6fed97 100644 --- a/tests/post_training/pipelines/image_classification_timm.py +++ b/tests/post_training/pipelines/image_classification_timm.py @@ -127,7 +127,7 @@ def transform_fn(data_item): def prepare_calibration_dataset(self): dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform) - loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, num_workers=2, shuffle=False) + loader = torch.utils.data.DataLoader(dataset, batch_size=10, num_workers=2, shuffle=False) self.calibration_dataset = nncf.Dataset(loader, self.get_transform_calibration_fn()) From cd60fa3da9f2783e0aa5f91dcf25a72794aa665b Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 10 Nov 2023 17:10:10 +0100 Subject: [PATCH 011/108] hardcode batch size = 10 for calibrate.py --- tests/openvino/tools/calibrate.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/openvino/tools/calibrate.py b/tests/openvino/tools/calibrate.py index ecb589aa1ca..2fe497830be 100644 --- a/tests/openvino/tools/calibrate.py +++ b/tests/openvino/tools/calibrate.py @@ -852,6 +852,7 @@ class DataItem: def __init__(self, model_evaluator): self.model_evaluator = model_evaluator + self.batch_size = self.model_evaluator.dataset.batch def __iter__(self): for sequence in self.model_evaluator.dataset: @@ -1027,6 +1028,13 @@ def filter_configuration(config: Config) -> Config: return config +def update_config_batch_size(accuracy_checker_config, batch_size): + for model in accuracy_checker_config["models"]: + for dataset in model["datasets"]: + print(f"Updated batch size value to {batch_size}") + dataset["batch"] = batch_size + + def main(): args = parse_args() config = Config.read_config(args.config) @@ -1034,6 +1042,7 @@ def main(): xml_path, bin_path = get_model_paths(config.model) accuracy_checker_config = get_accuracy_checker_config(config.engine) + update_config_batch_size(accuracy_checker_config, 10) nncf_algorithms_config = get_nncf_algorithms_config(config.compression, args.output_dir) set_log_file(f"{args.output_dir}/log.txt") From cc621abccd4abfa8cbd15a7cfa9f8f9c53670a08 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 18 Dec 2023 19:48:32 +0100 Subject: [PATCH 012/108] merge --- tests/post_training/conftest.py | 38 ++++++++------------------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/tests/post_training/conftest.py b/tests/post_training/conftest.py index 8af9cd3923f..9f9eca92d04 100644 --- a/tests/post_training/conftest.py +++ b/tests/post_training/conftest.py @@ -9,12 +9,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import collections -from pathlib import Path - -import pandas as pd -import pytest - from tests.shared.paths import TEST_ROOT PTQ_TEST_ROOT = TEST_ROOT / "post_training" @@ -26,26 +20,12 @@ def pytest_addoption(parser): parser.addoption("--output", action="store", default="./tmp/", help="Directory to store artifacts") parser.addoption("--no-eval", action="store_true", help="Skip validation step") parser.addoption("--batch_size", action="store", default=1, type=int, help="Batch size of calibration dataset") - - -def pytest_configure(config): - config.test_results = {} - - -PTQ_TEST_ROOT = TEST_ROOT / "post_training_quantization" - - -@pytest.hookimpl(tryfirst=True, hookwrapper=True) -def pytest_runtest_makereport(item, call): - outcome = yield - result = outcome.get_result() - - if result.when == "call": - test_results = collections.OrderedDict(sorted(item.config.test_results.items())) - df = pd.DataFrame() - for _, test_result in test_results.items(): - df = df.append(test_result, ignore_index=True) - - output_folder = Path(item.config.getoption("--output")) - output_folder.mkdir(parents=True, exist_ok=True) - df.to_csv(output_folder / "results.csv", index=False) + parser.addoption("--subset-size", type=int, default=None, help="Set subset size") + parser.addoption("--fp32", action="store_true", help="Test original model") + parser.addoption("--cuda", action="store_true", help="Enable CUDA_TORCH backend") + parser.addoption("--benchmark", action="store_true", help="Run benchmark_app") + parser.addoption( + "--extra-columns", + action="store_true", + help="Add additional columns to reports.csv", + ) From d2a9b009696e32b6211b362e31ea1bb6aa3ae090 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 20 Dec 2023 14:08:39 +0100 Subject: [PATCH 013/108] update aggregator --- nncf/common/tensor_statistics/aggregator.py | 33 ++++++++------------- nncf/data/dataset.py | 4 ++- nncf/onnx/statistics/aggregator.py | 4 --- nncf/openvino/statistics/aggregator.py | 4 --- nncf/torch/statistics/aggregator.py | 6 +--- 5 files changed, 17 insertions(+), 34 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index ce889fab6a1..12dc5caec2f 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -33,6 +33,9 @@ class StatisticsAggregator(ABC): def __init__(self, dataset: Dataset): self.dataset = dataset self.stat_subset_size = None + self.batch_size = 1 if self.dataset.get_batch_size() is None else self.dataset.get_batch_size() + self.dataset_size = self.dataset.get_length() + self.dataset_size = self.dataset_size * self.batch_size if self.dataset_size is not None else self.dataset_size self.statistic_points = StatisticPointsContainer() def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: @@ -45,36 +48,31 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: """ if not self.statistic_points: return - collected_statistics_num = 0 model_transformer = factory.ModelTransformerFactory.create(model) - merged_statistics = self._get_merged_statistic_points(self.statistic_points, model, graph) transformation_layout = self._get_transformation_layout_extra_outputs(merged_statistics) model_with_outputs = model_transformer.transform(transformation_layout) engine = factory.EngineFactory.create(model_with_outputs) - batch_size = self.dataset.get_batch_size() - batch_size = 1 if batch_size is None else batch_size - dataset_length = self.dataset.get_length() - dataset_length = dataset_length * batch_size if dataset_length is not None else dataset_length - total = ( - min(dataset_length or self.stat_subset_size, self.stat_subset_size) + calibration_samples_num = ( + min(self.dataset_size or self.stat_subset_size, self.stat_subset_size) if self.stat_subset_size is not None else None - ) - - with track(total=total, description="Statistics collection") as pbar: + ) # Maybe subsample should be in terms of a tensor with arbitary batch_size + collected_statistics_num = 0 + with track(total=calibration_samples_num, description="Statistics collection") as pbar: for input_data in islice(self.dataset.get_inference_data(), self.stat_subset_size): batch_size_to_collect = ( - min(total - collected_statistics_num, batch_size) if total is not None else batch_size + min(calibration_samples_num - collected_statistics_num, self.batch_size) + if calibration_samples_num is not None + else self.batch_size ) - sliced_iput = self._get_sliced_data(input_data, batch_size_to_collect) - outputs = engine.infer(sliced_iput) + outputs = engine.infer(input_data) processed_outputs = self._process_outputs(outputs) self._register_statistics(processed_outputs, merged_statistics) collected_statistics_num += batch_size_to_collect pbar.progress.update(pbar.task, advance=batch_size_to_collect) - if total and collected_statistics_num == total: + if calibration_samples_num and collected_statistics_num == calibration_samples_num: break if collected_statistics_num == 0: raise RuntimeError( @@ -147,8 +145,3 @@ def _process_outputs(outputs: Any) -> Dict[str, NNCFTensor]: :param outputs: raw model outputs :return: processed model outputs in Dict[str, NNCFTensor] format """ - - @staticmethod - @abstractmethod - def _get_sliced_data(inputs: Any, end: int) -> Any: - """ """ diff --git a/nncf/data/dataset.py b/nncf/data/dataset.py index d4eec574dc1..9c3ceb83e8d 100644 --- a/nncf/data/dataset.py +++ b/nncf/data/dataset.py @@ -83,8 +83,10 @@ def get_length(self) -> Optional[int]: def get_batch_size(self) -> Optional[int]: """ """ - if hasattr(self._data_source, "batch_size"): + if hasattr(self._data_source, "batch_size"): # Torch return self._data_source.batch_size + if hasattr(self._data_source, "_batch_size"): # TF + return self._data_source._batch_size return None diff --git a/nncf/onnx/statistics/aggregator.py b/nncf/onnx/statistics/aggregator.py index 0c9f2dc76a5..a768a855258 100644 --- a/nncf/onnx/statistics/aggregator.py +++ b/nncf/onnx/statistics/aggregator.py @@ -80,7 +80,3 @@ def _get_merged_statistic_points( @staticmethod def _process_outputs(outputs: Dict[str, np.ndarray]) -> Dict[str, ONNXNNCFTensor]: return {n: ONNXNNCFTensor(v) for n, v in outputs.items()} - - @staticmethod - def _get_sliced_data(inputs: Dict[str, np.ndarray], end: int) -> Dict[str, ONNXNNCFTensor]: - return inputs diff --git a/nncf/openvino/statistics/aggregator.py b/nncf/openvino/statistics/aggregator.py index 7a2f2ddb2a2..7fd5e26c72d 100644 --- a/nncf/openvino/statistics/aggregator.py +++ b/nncf/openvino/statistics/aggregator.py @@ -111,7 +111,3 @@ def _get_merged_statistic_points( @staticmethod def _process_outputs(outputs: Dict[str, np.ndarray]) -> Dict[str, OVNNCFTensor]: return {n: OVNNCFTensor(v) for n, v in outputs.items()} - - @staticmethod - def _get_sliced_data(inputs: Dict[str, np.ndarray], end: int) -> Dict[str, OVNNCFTensor]: - return inputs diff --git a/nncf/torch/statistics/aggregator.py b/nncf/torch/statistics/aggregator.py index 79fa76e0430..41fdc20c4fa 100644 --- a/nncf/torch/statistics/aggregator.py +++ b/nncf/torch/statistics/aggregator.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict +from typing import Dict import numpy as np import torch @@ -70,7 +70,3 @@ def _get_merged_statistic_points( @staticmethod def _process_outputs(outputs: Dict[str, np.ndarray]) -> Dict[str, PTNNCFTensor]: return outputs - - @staticmethod - def _get_sliced_data(inputs: Any, end: int) -> Any: - return inputs[:end] From d95be5deb8a8a4535565b1630f07b3f0ae57fb18 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 20 Dec 2023 16:54:49 +0100 Subject: [PATCH 014/108] revert unneseccary changes --- nncf/openvino/statistics/collectors.py | 7 ++++++- .../algorithms/bias_correction/openvino_backend.py | 3 ++- .../algorithms/fast_bias_correction/openvino_backend.py | 3 ++- .../algorithms/fast_bias_correction/torch_backend.py | 3 ++- nncf/torch/tensor_statistics/collectors.py | 5 ++++- 5 files changed, 16 insertions(+), 5 deletions(-) diff --git a/nncf/openvino/statistics/collectors.py b/nncf/openvino/statistics/collectors.py index f92b7c4830c..0c12eb147b1 100644 --- a/nncf/openvino/statistics/collectors.py +++ b/nncf/openvino/statistics/collectors.py @@ -283,12 +283,16 @@ def get_output_names(self, target_node_name: str, port_id: int) -> List[str]: return get_reducer_output_node_names(self.name, target_node_name, port_id, self.output_port_id, self.inplace) -def get_mean_statistic_collector(num_samples: int, channel_axis: int, inplace: bool = True) -> TensorCollector: +def get_mean_statistic_collector( + num_samples: int, channel_axis: int, window_size: Optional[int] = None, inplace: bool = True +) -> TensorCollector: """ Mean statistic collector builder. :param num_samples: Maximum number of samples to collect. :param channel_axis: Channel axis to use during reduction phase. + :param window_size: Number of samples from the end of the list of collected samples to aggregate. + Aggregates all available collected statistics in case parameter is None. :param inplace: Whether the mean reducer should be calculated inplace or out of place. :return: Mean statistic collector. """ @@ -304,6 +308,7 @@ def get_mean_statistic_collector(num_samples: int, channel_axis: int, inplace: b kwargs = { "tensor_processor": OVNNCFCollectorTensorProcessor, "num_samples": num_samples, + "window_size": window_size, } aggregate_mean = MeanAggregator(**kwargs) aggregate_shape = ShapeAggregator() diff --git a/nncf/quantization/algorithms/bias_correction/openvino_backend.py b/nncf/quantization/algorithms/bias_correction/openvino_backend.py index 000c725c873..03f6189dd3c 100644 --- a/nncf/quantization/algorithms/bias_correction/openvino_backend.py +++ b/nncf/quantization/algorithms/bias_correction/openvino_backend.py @@ -62,8 +62,9 @@ def mean_statistic_collector( channel_axis: int, inplace: bool, num_samples: Optional[int] = None, + window_size: Optional[int] = None, ) -> TensorCollector: - return get_mean_statistic_collector(num_samples, channel_axis, inplace) + return get_mean_statistic_collector(num_samples, channel_axis, window_size, inplace) @staticmethod def raw_statistic_collector(inplace: bool, num_samples: int = None) -> TensorCollector: diff --git a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py index f00e6abd331..ba8d18d1733 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py @@ -50,8 +50,9 @@ def mean_statistic_collector( channel_axis: int, inplace: bool, num_samples: Optional[int] = None, + window_size: Optional[int] = None, ) -> TensorCollector: - return get_mean_statistic_collector(num_samples, channel_axis, inplace) + return get_mean_statistic_collector(num_samples, channel_axis, window_size, inplace) @staticmethod def get_sub_input_output_names(subgraph: ov.Model) -> Tuple[str, str]: diff --git a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py index 03e0c02dddd..fea39ff068a 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py @@ -62,8 +62,9 @@ def mean_statistic_collector( channel_axis: int, inplace: bool, num_samples: Optional[int] = None, + window_size: Optional[int] = None, ) -> TensorCollector: - return get_mean_statistic_collector(num_samples, channel_axis) + return get_mean_statistic_collector(num_samples, channel_axis, window_size) @staticmethod def get_sub_input_output_names(subgraph: NNCFNetwork) -> Tuple[str, str]: diff --git a/nncf/torch/tensor_statistics/collectors.py b/nncf/torch/tensor_statistics/collectors.py index f9587db57a1..34bc7693218 100644 --- a/nncf/torch/tensor_statistics/collectors.py +++ b/nncf/torch/tensor_statistics/collectors.py @@ -514,7 +514,9 @@ def get_mean_percentile_statistic_collector( return tensor_collector -def get_mean_statistic_collector(num_samples: int, channel_axis: int) -> TensorCollector: +def get_mean_statistic_collector( + num_samples: int, channel_axis: int, window_size: Optional[int] = None +) -> TensorCollector: """ Mean statistic collector builder. @@ -533,6 +535,7 @@ def get_mean_statistic_collector(num_samples: int, channel_axis: int) -> TensorC kwargs = { "tensor_processor": PTNNCFCollectorTensorProcessor, "num_samples": num_samples, + "window_size": window_size, } aggregate_mean = MeanAggregator(**kwargs) aggregate_shape = ShapeAggregator() From cd68684acb5156c997b4b1575d83c3339a329607 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 20 Dec 2023 17:57:36 +0100 Subject: [PATCH 015/108] add logging; add torch data for OVEngine --- nncf/common/tensor_statistics/aggregator.py | 2 +- nncf/openvino/engine.py | 7 ++++--- nncf/quantization/algorithms/post_training/algorithm.py | 5 ++++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 12dc5caec2f..52910baee52 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -33,7 +33,7 @@ class StatisticsAggregator(ABC): def __init__(self, dataset: Dataset): self.dataset = dataset self.stat_subset_size = None - self.batch_size = 1 if self.dataset.get_batch_size() is None else self.dataset.get_batch_size() + self.batch_size = self.dataset.get_batch_size() or 1 self.dataset_size = self.dataset.get_length() self.dataset_size = self.dataset_size * self.batch_size if self.dataset_size is not None else self.dataset_size self.statistic_points = StatisticPointsContainer() diff --git a/nncf/openvino/engine.py b/nncf/openvino/engine.py index 47f09f32952..d1ab982f178 100644 --- a/nncf/openvino/engine.py +++ b/nncf/openvino/engine.py @@ -13,6 +13,7 @@ import numpy as np import openvino.runtime as ov +import torch from nncf.common.engine import Engine from nncf.parameters import TargetDevice @@ -44,9 +45,9 @@ def _check_input_data_format( :param input_data: Provided inputs to infer the model. """ - # actual_num_inputs = 1 if isinstance(input_data, np.ndarray) else len(input_data) - # if actual_num_inputs != self.number_of_inputs: - # raise RuntimeError(f"Model expects {self.number_of_inputs} inputs, but {actual_num_inputs} are provided.") + actual_num_inputs = 1 if isinstance(input_data, (np.ndarray, torch.tensor)) else len(input_data) + if actual_num_inputs != self.number_of_inputs: + raise RuntimeError(f"Model expects {self.number_of_inputs} inputs, but {actual_num_inputs} are provided.") if isinstance(input_data, dict): for name in input_data: if isinstance(name, str) and name not in self.input_tensor_names: diff --git a/nncf/quantization/algorithms/post_training/algorithm.py b/nncf/quantization/algorithms/post_training/algorithm.py index ff27f06eadb..64e782fcd73 100644 --- a/nncf/quantization/algorithms/post_training/algorithm.py +++ b/nncf/quantization/algorithms/post_training/algorithm.py @@ -14,6 +14,7 @@ from nncf import Dataset from nncf.common.graph.graph import NNCFGraph +from nncf.common.logging import nncf_logger from nncf.common.quantization.structs import QuantizationPreset from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType @@ -104,7 +105,9 @@ def apply( "A dataset is required for the post-training quantization " "algorithm to collect statistics for intermediate models." ) - + batch_size = dataset.get_batch_size() or 1 + if batch_size > 1: + nncf_logger.warn("Statistics for batch_size > 1 does not match to the recomended batch_size=1") step_index_to_statistics = None if statistic_points: step_index_to_statistics = {0: statistic_points} From 4a009f3d658ca66411cf5f16dc7731a406fcc15a Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 21 Dec 2023 12:08:43 +0100 Subject: [PATCH 016/108] refactor method get axes --- nncf/openvino/engine.py | 2 +- .../algorithms/min_max/openvino_backend.py | 67 ++++++++++++------- 2 files changed, 45 insertions(+), 24 deletions(-) diff --git a/nncf/openvino/engine.py b/nncf/openvino/engine.py index d1ab982f178..20c616e3dd9 100644 --- a/nncf/openvino/engine.py +++ b/nncf/openvino/engine.py @@ -45,7 +45,7 @@ def _check_input_data_format( :param input_data: Provided inputs to infer the model. """ - actual_num_inputs = 1 if isinstance(input_data, (np.ndarray, torch.tensor)) else len(input_data) + actual_num_inputs = 1 if isinstance(input_data, (np.ndarray, torch.Tensor)) else len(input_data) if actual_num_inputs != self.number_of_inputs: raise RuntimeError(f"Model expects {self.number_of_inputs} inputs, but {actual_num_inputs} are provided.") if isinstance(input_data, dict): diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py index 6ff33e21640..98b33bf37a5 100644 --- a/nncf/quantization/algorithms/min_max/openvino_backend.py +++ b/nncf/quantization/algorithms/min_max/openvino_backend.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Optional, Set +from typing import Dict, List, Optional, Set, Tuple import numpy as np @@ -149,36 +149,48 @@ def _get_activation_shape(target_point: OVTargetPoint, nncf_graph: NNCFGraph, no raise NotImplementedError(f"Unsupported target point type {target_point.type}.") @staticmethod - def _get_batch_axis() -> int: - return 0 # TODO (?) - - @staticmethod - def _get_aggregation_axes(target_point: OVTargetPoint): - return None if target_point.is_weight_target_point() else (0, 1) - - @staticmethod - def _get_reduction_axes( + def _get_reduction_aggregation_axes( nncf_graph: NNCFGraph, target_point: OVTargetPoint, quantizer_config: QuantizerConfig - ) -> ReductionAxes: + ) -> Tuple[ReductionAxes, ReductionAxes]: + """ + Returns reduce and aggregation axes. The following logic is applied: + If target point is applied to weight: + *Aggregator aggregates only inner saved statistics, + *Reducer reduces all axes except channels for per-channel, for per-tensor - all axes. + If target point is applied to activations: + *Aggregator aggregates batch dimension. + *Reducer reduces all axes except channel and batch for per-channel, for per-tensor - all axes except batch. + + :param nncf_graph: NNCFGraph instance. + :param target_point: Point to collect statistics. + :param quantizer_config: Quantization configuration. + :return: Reduction axes for reducer and aggregation axes for aggregator. + """ node = nncf_graph.get_node_by_name(target_point.target_node_name) + if target_point.is_weight_target_point(): + aggregation_axes = None assert isinstance(node.layer_attributes, OVLayerAttributes) shape = node.layer_attributes.constant_attributes[target_point.port_id]["shape"] if quantizer_config.per_channel: - channel_axes = get_weight_channel_axes(node, target_point.port_id) + channel_axes = get_weight_channel_axes(node) reduction_axes = get_channel_agnostic_reduction_axes(channel_axes, shape) else: reduction_axes = tuple(range(len(shape))) - return reduction_axes - - shape = OVMinMaxAlgoBackend._get_activation_shape(target_point, nncf_graph, node) - if quantizer_config.per_channel: - # TODO (l-bat): Disable quantizer propagation through layout changing operations - axis = 1 # OpenVINO activations have channel first layout: [N, C, Z, Y, X] else: - axis = OVMinMaxAlgoBackend._get_batch_axis() - reduction_axes = get_channel_agnostic_reduction_axes([axis], shape) - return reduction_axes + # OpenVINO activations have channel first layout: [N, C, Z, Y, X] + batch_axis, channel_axis = 0, 1 + aggregation_axes = (batch_axis, channel_axis) + shape = OVMinMaxAlgoBackend._get_activation_shape(target_point, nncf_graph, node) + if quantizer_config.per_channel: + # Keep batch to aggregate and channel for per-channel FakeQuantize. + # TODO (l-bat): Disable quantizer propagation through layout changing operations + reduction_axes = get_channel_agnostic_reduction_axes((batch_axis, channel_axis), shape) + else: + # Keep batch to aggregate + reduction_axes = get_channel_agnostic_reduction_axes((batch_axis,), shape) + + return reduction_axes, aggregation_axes @staticmethod def get_statistic_collector( @@ -190,14 +202,23 @@ def get_statistic_collector( num_samples: int = None, ) -> TensorCollector: use_abs_max = quantizer_config.mode == QuantizationMode.SYMMETRIC - reduction_axes = OVMinMaxAlgoBackend._get_reduction_axes(nncf_graph, target_point, quantizer_config) - aggregation_axes = OVMinMaxAlgoBackend._get_aggregation_axes(target_point) + reduction_axes, aggregation_axes = OVMinMaxAlgoBackend._get_reduction_aggregation_axes( + nncf_graph, target_point, quantizer_config + ) collector = TensorCollector(OVMinMaxTensorStatistic) for params, container_key in zip( [range_estimator_params.min, range_estimator_params.max], [OVMinMaxTensorStatistic.MIN_STAT, OVMinMaxTensorStatistic.MAX_STAT], ): + if params.statistics_type not in OV_REDUCERS_MAP: + raise RuntimeError( + f"Statistic type: {params.statistics_type} is not supported for OpenVino PTQ backend yet." + ) + if params.aggregator_type not in AGGREGATORS_MAP: + raise RuntimeError( + f"Aggregator type: {params.aggregator_type} is not supported for OpenVino PTQ backend yet." + ) kwargs = {"reduction_axes": reduction_axes, "inplace": inplace} if params.statistics_type in [StatisticsType.QUANTILE, StatisticsType.ABS_QUANTILE]: if container_key == OVMinMaxTensorStatistic.MIN_STAT: From c2659b39e83a40e9cd69d1b9ddff6de23d645eb3 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 21 Dec 2023 15:03:13 +0100 Subject: [PATCH 017/108] fix OV tests --- .../2023.2/reference_scales/DynamicModel_mixed.json | 8 ++++---- .../reference_scales/DynamicModel_performance.json | 8 ++++---- .../2023.2/reference_scales/MatMul2DModel_mixed.json | 4 ++-- .../reference_scales/MatMul2DModel_performance.json | 4 ++-- .../reference_scales/ScaleShiftReluModel_mixed.json | 12 ++++++------ .../ScaleShiftReluModel_performance.json | 12 ++++++------ .../native/quantization/test_quantizer_config.py | 6 +++--- 7 files changed, 27 insertions(+), 27 deletions(-) diff --git a/tests/openvino/native/data/2023.2/reference_scales/DynamicModel_mixed.json b/tests/openvino/native/data/2023.2/reference_scales/DynamicModel_mixed.json index 9cf4da1968f..b7df9051e7e 100644 --- a/tests/openvino/native/data/2023.2/reference_scales/DynamicModel_mixed.json +++ b/tests/openvino/native/data/2023.2/reference_scales/DynamicModel_mixed.json @@ -94,9 +94,9 @@ ] }, "Sub/fq_output_0": { - "input_low": -0.6373578310012817, - "input_high": 0.9560367465019226, - "output_low": -0.6373578310012817, - "output_high": 0.9560367465019226 + "input_low": -0.5242199897766113, + "input_high": 0.8538841009140015, + "output_low": -0.5242199897766113, + "output_high": 0.8538841009140015 } } \ No newline at end of file diff --git a/tests/openvino/native/data/2023.2/reference_scales/DynamicModel_performance.json b/tests/openvino/native/data/2023.2/reference_scales/DynamicModel_performance.json index 9727ddf1990..b450a080161 100644 --- a/tests/openvino/native/data/2023.2/reference_scales/DynamicModel_performance.json +++ b/tests/openvino/native/data/2023.2/reference_scales/DynamicModel_performance.json @@ -94,9 +94,9 @@ ] }, "Sub/fq_output_0": { - "input_low": -0.9635645747184753, - "input_high": 0.9560367465019226, - "output_low": -0.9635645747184753, - "output_high": 0.9560367465019226 + "input_low": -0.8591098189353943, + "input_high": 0.8523980379104614, + "output_low": -0.8591098189353943, + "output_high": 0.8523980379104614 } } \ No newline at end of file diff --git a/tests/openvino/native/data/2023.2/reference_scales/MatMul2DModel_mixed.json b/tests/openvino/native/data/2023.2/reference_scales/MatMul2DModel_mixed.json index fd0ed29955f..7e7fd2cdd5e 100644 --- a/tests/openvino/native/data/2023.2/reference_scales/MatMul2DModel_mixed.json +++ b/tests/openvino/native/data/2023.2/reference_scales/MatMul2DModel_mixed.json @@ -27,8 +27,8 @@ }, "Input/fq_output_0": { "input_low": 0.0, - "input_high": 0.9350724220275879, + "input_high": 0.8685823082923889, "output_low": 0.0, - "output_high": 0.9350724220275879 + "output_high": 0.8685823082923889 } } \ No newline at end of file diff --git a/tests/openvino/native/data/2023.2/reference_scales/MatMul2DModel_performance.json b/tests/openvino/native/data/2023.2/reference_scales/MatMul2DModel_performance.json index fd0ed29955f..7e7fd2cdd5e 100644 --- a/tests/openvino/native/data/2023.2/reference_scales/MatMul2DModel_performance.json +++ b/tests/openvino/native/data/2023.2/reference_scales/MatMul2DModel_performance.json @@ -27,8 +27,8 @@ }, "Input/fq_output_0": { "input_low": 0.0, - "input_high": 0.9350724220275879, + "input_high": 0.8685823082923889, "output_low": 0.0, - "output_high": 0.9350724220275879 + "output_high": 0.8685823082923889 } } \ No newline at end of file diff --git a/tests/openvino/native/data/2023.2/reference_scales/ScaleShiftReluModel_mixed.json b/tests/openvino/native/data/2023.2/reference_scales/ScaleShiftReluModel_mixed.json index 71700abfa50..31dddc43f04 100644 --- a/tests/openvino/native/data/2023.2/reference_scales/ScaleShiftReluModel_mixed.json +++ b/tests/openvino/native/data/2023.2/reference_scales/ScaleShiftReluModel_mixed.json @@ -35,15 +35,15 @@ }, "Relu/fq_output_0": { "input_low": 0.0, - "input_high": 2.5198161602020264, + "input_high": 2.1013052463531494, "output_low": 0.0, - "output_high": 2.5198161602020264 + "output_high": 2.1013052463531494 }, "MatMul/fq_output_0": { "input_low": 0.0, - "input_high": 2.1930606365203857, + "input_high": 1.6296062469482422, "output_low": 0.0, - "output_high": 2.1930606365203857 + "output_high": 1.6296062469482422 }, "MatMul/fq_weights_1": { "input_low": [ @@ -73,8 +73,8 @@ }, "Input/fq_output_0": { "input_low": 0.0, - "input_high": 0.9350724220275879, + "input_high": 0.8685823082923889, "output_low": 0.0, - "output_high": 0.9350724220275879 + "output_high": 0.8685823082923889 } } \ No newline at end of file diff --git a/tests/openvino/native/data/2023.2/reference_scales/ScaleShiftReluModel_performance.json b/tests/openvino/native/data/2023.2/reference_scales/ScaleShiftReluModel_performance.json index 71700abfa50..31dddc43f04 100644 --- a/tests/openvino/native/data/2023.2/reference_scales/ScaleShiftReluModel_performance.json +++ b/tests/openvino/native/data/2023.2/reference_scales/ScaleShiftReluModel_performance.json @@ -35,15 +35,15 @@ }, "Relu/fq_output_0": { "input_low": 0.0, - "input_high": 2.5198161602020264, + "input_high": 2.1013052463531494, "output_low": 0.0, - "output_high": 2.5198161602020264 + "output_high": 2.1013052463531494 }, "MatMul/fq_output_0": { "input_low": 0.0, - "input_high": 2.1930606365203857, + "input_high": 1.6296062469482422, "output_low": 0.0, - "output_high": 2.1930606365203857 + "output_high": 1.6296062469482422 }, "MatMul/fq_weights_1": { "input_low": [ @@ -73,8 +73,8 @@ }, "Input/fq_output_0": { "input_low": 0.0, - "input_high": 0.9350724220275879, + "input_high": 0.8685823082923889, "output_low": 0.0, - "output_high": 0.9350724220275879 + "output_high": 0.8685823082923889 } } \ No newline at end of file diff --git a/tests/openvino/native/quantization/test_quantizer_config.py b/tests/openvino/native/quantization/test_quantizer_config.py index 59ce5ec1126..c1430ec06ec 100644 --- a/tests/openvino/native/quantization/test_quantizer_config.py +++ b/tests/openvino/native/quantization/test_quantizer_config.py @@ -59,15 +59,15 @@ def get_reduction_axes(self, reducer: TensorReducerBase) -> ReductionAxes: ( TargetType.POST_LAYER_OPERATION, "/Conv_1_0", - (0, 2, 3), + (2, 3), (1, 2, 3), - ), # per-tensor: all tensor tensor except batch index is reduced + ), ( TargetType.OPERATION_WITH_WEIGHTS, "/Conv_1_0", (1, 2, 3), (0, 1, 2, 3), - ), # per-tensor: all weight tensor is reduced + ), ] ) def statistic_collector_parameters(self, request) -> ParamsCls: From 3a13f00f19b1b2b3f21d8c5b4422db16092b36eb Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 4 Jan 2024 12:21:40 +0100 Subject: [PATCH 018/108] fix Torch tests --- nncf/quantization/algorithms/min_max/torch_backend.py | 7 +++++-- nncf/quantization/algorithms/post_training/algorithm.py | 5 +++-- nncf/torch/quantization/init_range.py | 2 +- .../openvino/native/quantization/test_quantizer_config.py | 3 +-- tests/torch/ptq/test_graphs.py | 4 +++- tests/torch/ptq/test_quantizer_config.py | 6 +++--- 6 files changed, 16 insertions(+), 11 deletions(-) diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py index 96c8ef87af7..0bd2fff56f4 100644 --- a/nncf/quantization/algorithms/min_max/torch_backend.py +++ b/nncf/quantization/algorithms/min_max/torch_backend.py @@ -166,8 +166,11 @@ def get_statistic_collector( num_samples: int = None, ) -> TensorCollector: collector_params = PTMinMaxAlgoBackend._default_collector_params(nncf_graph, target_point, quantizer_config) - reduction_axes = collector_params.get_reduction_axes(per_sample_stats=False) - aggregation_axes = collector_params.get_aggregation_axes(per_sample_stats=False) + reduction_axes = collector_params.get_reduction_axes(per_sample_stats=True) + aggregation_axes = collector_params.get_aggregation_axes(per_sample_stats=True) + if target_point.is_weight_target_point(): + reduction_axes = collector_params.get_reduction_axes(per_sample_stats=False) + aggregation_axes = collector_params.get_aggregation_axes(per_sample_stats=False) collector = TensorCollector(PTMinMaxTensorStatistic) for params, container_key in zip( diff --git a/nncf/quantization/algorithms/post_training/algorithm.py b/nncf/quantization/algorithms/post_training/algorithm.py index 64e782fcd73..0fed24f0e67 100644 --- a/nncf/quantization/algorithms/post_training/algorithm.py +++ b/nncf/quantization/algorithms/post_training/algorithm.py @@ -105,8 +105,9 @@ def apply( "A dataset is required for the post-training quantization " "algorithm to collect statistics for intermediate models." ) - batch_size = dataset.get_batch_size() or 1 - if batch_size > 1: + if dataset is not None: + batch_size = dataset.get_batch_size() or 1 + if dataset is not None and batch_size > 1: nncf_logger.warn("Statistics for batch_size > 1 does not match to the recomended batch_size=1") step_index_to_statistics = None if statistic_points: diff --git a/nncf/torch/quantization/init_range.py b/nncf/torch/quantization/init_range.py index 88a7c03c95f..5f81372b9c1 100644 --- a/nncf/torch/quantization/init_range.py +++ b/nncf/torch/quantization/init_range.py @@ -130,7 +130,7 @@ def get_aggregation_axes(self, per_sample_stats: bool) -> AggregationAxes: :param per_sample_stats: Boolean flag that indicated whether statistics are collected per-sample or per-batch. :return: Shape to aggregate to. """ - return (0, 1) if self.use_per_sample_stats(per_sample_stats) else (0,) + return (0, 1) if self.use_per_sample_stats(per_sample_stats) else None class StatCollectorGenerator: diff --git a/tests/openvino/native/quantization/test_quantizer_config.py b/tests/openvino/native/quantization/test_quantizer_config.py index c1430ec06ec..4b8f1119736 100644 --- a/tests/openvino/native/quantization/test_quantizer_config.py +++ b/tests/openvino/native/quantization/test_quantizer_config.py @@ -53,8 +53,7 @@ def get_reduction_axes(self, reducer: TensorReducerBase) -> ReductionAxes: @pytest.fixture( params=[ pytest.param( - (TargetType.PRE_LAYER_OPERATION, "/Sum_1_0", (0, 2), (0, 1, 2)), - marks=pytest.mark.skip("Ticket 102414: remove hardcoded axes for activations"), + (TargetType.PRE_LAYER_OPERATION, "/Sum_1_0", (2,), (1, 2)), ), ( TargetType.POST_LAYER_OPERATION, diff --git a/tests/torch/ptq/test_graphs.py b/tests/torch/ptq/test_graphs.py index 4aace885e10..a1d6f524b7f 100644 --- a/tests/torch/ptq/test_graphs.py +++ b/tests/torch/ptq/test_graphs.py @@ -100,6 +100,8 @@ def test_min_max_classification_quantized_graphs(desc: ModelDesc, quantization_p quantization_parameters["advanced_parameters"] = AdvancedQuantizationParameters(disable_bias_correction=True) quantization_algorithm = PostTrainingQuantization(**quantization_parameters) - quantized_model = quantization_algorithm.apply(nncf_network, nncf_network.nncf.get_graph(), dataset=None) + quantized_model = quantization_algorithm.apply( + nncf_network, nncf_network.nncf.get_graph(), dataset=None + ) # TODO: could dataset be None? check_graph(quantized_model.nncf.get_graph(), desc.dot_filename(), graph_dir) diff --git a/tests/torch/ptq/test_quantizer_config.py b/tests/torch/ptq/test_quantizer_config.py index 9836bb769a2..cc02fc531c4 100644 --- a/tests/torch/ptq/test_quantizer_config.py +++ b/tests/torch/ptq/test_quantizer_config.py @@ -51,9 +51,9 @@ def get_reduction_axes(self, reducer: TensorReducerBase) -> ReductionAxes: @pytest.fixture( params=[ - (TargetType.PRE_LAYER_OPERATION, "/Sum_1_0", (0, 2), (0, 1, 2)), - (TargetType.POST_LAYER_OPERATION, "/Conv_1_0", (0, 2, 3), (1, 2, 3)), - (TargetType.OPERATION_WITH_WEIGHTS, "/Conv_1_0", (1, 2, 3), (1, 2, 3)), + (TargetType.PRE_LAYER_OPERATION, "/Sum_1_0", (2,), (1, 2)), + (TargetType.POST_LAYER_OPERATION, "/Conv_1_0", (2, 3), (1, 2, 3)), + (TargetType.OPERATION_WITH_WEIGHTS, "/Conv_1_0", (1, 2, 3), (0, 1, 2, 3)), ] ) def statistic_collector_parameters(self, request) -> ParamsCls: From 880073b754916ab7c70e1da2020506704f3cc896 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 15 Jan 2024 13:54:02 +0100 Subject: [PATCH 019/108] logic of warning message inside StatisticsAggregator --- nncf/common/tensor_statistics/aggregator.py | 21 ++++++++++++++++++- nncf/onnx/graph/metatypes/groups.py | 6 ++++++ nncf/onnx/graph/metatypes/onnx_metatypes.py | 4 ++-- nncf/onnx/statistics/aggregator.py | 8 ++++++- nncf/openvino/graph/metatypes/groups.py | 7 +++++++ .../graph/metatypes/openvino_metatypes.py | 10 +++++++-- nncf/openvino/statistics/aggregator.py | 8 ++++++- .../algorithms/post_training/algorithm.py | 5 ----- nncf/torch/graph/operator_metatypes.py | 7 +++++++ nncf/torch/statistics/aggregator.py | 8 ++++++- 10 files changed, 71 insertions(+), 13 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 52910baee52..e20d39134e3 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -11,11 +11,13 @@ from abc import ABC from abc import abstractmethod from itertools import islice -from typing import Any, Dict, TypeVar +from typing import Any, Dict, List, TypeVar from nncf.common import factory from nncf.common.graph.graph import NNCFGraph +from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.layout import TransformationLayout +from nncf.common.logging.logger import nncf_logger from nncf.common.logging.track_progress import track from nncf.common.tensor import NNCFTensor from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer @@ -46,6 +48,8 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: :param model: Backend-specific model instance. :param graph: Model graph. """ + if self.batch_size > 1 and self.is_model_batch_size_limited_support(graph): + nncf_logger.warning("The batch size > 1 for the specific model can lead to accuracy degradation") if not self.statistic_points: return model_transformer = factory.ModelTransformerFactory.create(model) @@ -99,6 +103,21 @@ def register_statistic_points(self, statistic_points: StatisticPointsContainer) elif tensor_collector.num_samples is not None: self.stat_subset_size = max(self.stat_subset_size, tensor_collector.num_samples) + def is_model_batch_size_limited_support(self, graph: NNCFGraph) -> bool: + """ + :param NNCFGraph graph: _description_ + :return bool: _description_ + """ + for metatype in self.metatypes_output_has_no_batch_axis: + if metatype in set(node.metatype for node in graph.get_all_nodes()): + return True + return False + + @property + @abstractmethod + def metatypes_output_has_no_batch_axis(self) -> List[OperatorMetatype]: + """ """ + @abstractmethod def _register_statistics(self, outputs: Dict[str, NNCFTensor], statistic_points: StatisticPointsContainer) -> None: """ diff --git a/nncf/onnx/graph/metatypes/groups.py b/nncf/onnx/graph/metatypes/groups.py index 404b65cf8a5..5e61605f618 100644 --- a/nncf/onnx/graph/metatypes/groups.py +++ b/nncf/onnx/graph/metatypes/groups.py @@ -121,3 +121,9 @@ onnx_metatypes.ONNXConvolutionMetatype, onnx_metatypes.ONNXDepthwiseConvolutionMetatype, ] + + +OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS = [ + onnx_metatypes.ONNXROIAlignMetatype, + onnx_metatypes.ONNXEmbeddingMetatype, +] diff --git a/nncf/onnx/graph/metatypes/onnx_metatypes.py b/nncf/onnx/graph/metatypes/onnx_metatypes.py index 3ff663a897f..fb80990edac 100644 --- a/nncf/onnx/graph/metatypes/onnx_metatypes.py +++ b/nncf/onnx/graph/metatypes/onnx_metatypes.py @@ -445,8 +445,8 @@ class ONNXScatterNDMetatype(ONNXOpMetatype): @ONNX_OPERATION_METATYPES.register() -class ONNXRoiAlignMetatype(ONNXOpMetatype): - name = "RoiAlignOp" +class ONNXROIAlignMetatype(ONNXOpMetatype): + name = "ROIAlignOp" op_names = ["RoiAlign"] diff --git a/nncf/onnx/statistics/aggregator.py b/nncf/onnx/statistics/aggregator.py index a768a855258..8c318327c66 100644 --- a/nncf/onnx/statistics/aggregator.py +++ b/nncf/onnx/statistics/aggregator.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict +from typing import Dict, List import numpy as np import onnx @@ -20,6 +20,8 @@ from nncf.common.graph.transformations.layout import TransformationLayout from nncf.common.tensor_statistics.aggregator import StatisticsAggregator from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer +from nncf.onnx.graph.metatypes.groups import OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS +from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXOpMetatype from nncf.onnx.graph.node_utils import get_input_edge from nncf.onnx.graph.node_utils import get_input_edges_mapping from nncf.onnx.graph.onnx_helper import get_name_to_node_map @@ -28,6 +30,10 @@ class ONNXStatisticsAggregator(StatisticsAggregator): + @property + def metatypes_output_has_no_batch_axis(self) -> List[ONNXOpMetatype]: + return OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS + def collect_statistics(self, model: onnx.ModelProto, graph: NNCFGraph) -> None: self.input_edges_mapping = get_input_edges_mapping(graph) self.node_mapping = get_name_to_node_map(model) diff --git a/nncf/openvino/graph/metatypes/groups.py b/nncf/openvino/graph/metatypes/groups.py index eef2fba12df..f24f5420c9b 100644 --- a/nncf/openvino/graph/metatypes/groups.py +++ b/nncf/openvino/graph/metatypes/groups.py @@ -197,3 +197,10 @@ ov_metatypes.OVConvolutionBackpropDataMetatype, ov_metatypes.OVGroupConvolutionBackpropDataMetatype, ] + +OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS = [ + ov_metatypes.OVSpaceToBatchMetatype, + ov_metatypes.OVROIPoolingMetatype, + ov_metatypes.OVROIAlignMetatype, + ov_metatypes.OVEmbeddingMetatype, +] diff --git a/nncf/openvino/graph/metatypes/openvino_metatypes.py b/nncf/openvino/graph/metatypes/openvino_metatypes.py index a51ea1f2187..dffc0f11c25 100644 --- a/nncf/openvino/graph/metatypes/openvino_metatypes.py +++ b/nncf/openvino/graph/metatypes/openvino_metatypes.py @@ -468,11 +468,17 @@ class OVLogMetatype(OVOpMetatype): @OV_OPERATOR_METATYPES.register() -class OVRoiAlignMetatype(OVOpMetatype): - name = "RoiAlignOp" +class OVROIAlignMetatype(OVOpMetatype): + name = "ROIAlignOp" op_names = ["ROIAlign"] +@OV_OPERATOR_METATYPES.register() +class OVROIPoolingMetatype(OVOpMetatype): + name = "ROIPoolingOp" + op_names = ["ROIPooling"] + + @OV_OPERATOR_METATYPES.register() class OVGatherMetatype(OVOpMetatype): name = "GatherOp" diff --git a/nncf/openvino/statistics/aggregator.py b/nncf/openvino/statistics/aggregator.py index 7fd5e26c72d..2d5fe49c6ac 100644 --- a/nncf/openvino/statistics/aggregator.py +++ b/nncf/openvino/statistics/aggregator.py @@ -10,7 +10,7 @@ # limitations under the License. from collections import defaultdict -from typing import Dict +from typing import Dict, List import numpy as np import openvino.runtime as ov @@ -23,12 +23,18 @@ from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.experimental.common.tensor_statistics.collectors import MergedTensorCollector from nncf.experimental.common.tensor_statistics.collectors import TensorCollector +from nncf.openvino.graph.metatypes.groups import OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS +from nncf.openvino.graph.metatypes.openvino_metatypes import OVOpMetatype from nncf.openvino.graph.transformations.commands import OVInplaceFnInsertionCommand from nncf.openvino.graph.transformations.commands import OVOutputInsertionCommand from nncf.openvino.tensor import OVNNCFTensor class OVStatisticsAggregator(StatisticsAggregator): + @property + def metatypes_output_has_no_batch_axis(self) -> List[OVOpMetatype]: + return OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS + def collect_statistics(self, model: ov.Model, graph: NNCFGraph) -> None: self._name_to_node_mapping = {op.get_friendly_name(): op for op in model.get_ops()} super().collect_statistics(model, graph) diff --git a/nncf/quantization/algorithms/post_training/algorithm.py b/nncf/quantization/algorithms/post_training/algorithm.py index 0fed24f0e67..3d66ea87a3a 100644 --- a/nncf/quantization/algorithms/post_training/algorithm.py +++ b/nncf/quantization/algorithms/post_training/algorithm.py @@ -14,7 +14,6 @@ from nncf import Dataset from nncf.common.graph.graph import NNCFGraph -from nncf.common.logging import nncf_logger from nncf.common.quantization.structs import QuantizationPreset from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType @@ -105,10 +104,6 @@ def apply( "A dataset is required for the post-training quantization " "algorithm to collect statistics for intermediate models." ) - if dataset is not None: - batch_size = dataset.get_batch_size() or 1 - if dataset is not None and batch_size > 1: - nncf_logger.warn("Statistics for batch_size > 1 does not match to the recomended batch_size=1") step_index_to_statistics = None if statistic_points: step_index_to_statistics = {0: statistic_points} diff --git a/nncf/torch/graph/operator_metatypes.py b/nncf/torch/graph/operator_metatypes.py index 324b0d7d48c..973c8de5812 100644 --- a/nncf/torch/graph/operator_metatypes.py +++ b/nncf/torch/graph/operator_metatypes.py @@ -1027,3 +1027,10 @@ def get_operator_metatypes() -> List[Type[OperatorMetatype]]: ] OP_NAMES_QUANTIZE_NODE = ["symmetric_quantize", "asymmetric_quantize"] + +OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS = [ + PTEmbeddingMetatype, + PTEmbeddingBagMetatype, + PTModuleEmbeddingBagMetatype, + PTModuleEmbeddingMetatype, +] diff --git a/nncf/torch/statistics/aggregator.py b/nncf/torch/statistics/aggregator.py index 41fdc20c4fa..d01f14f2ff2 100644 --- a/nncf/torch/statistics/aggregator.py +++ b/nncf/torch/statistics/aggregator.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict +from typing import Dict, List import numpy as np import torch @@ -20,6 +20,8 @@ from nncf.common.graph.transformations.layout import TransformationLayout from nncf.common.tensor_statistics.aggregator import StatisticPointsContainer from nncf.common.tensor_statistics.aggregator import StatisticsAggregator +from nncf.torch.graph.operator_metatypes import OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS +from nncf.torch.graph.operator_metatypes import PTOperatorMetatype from nncf.torch.graph.transformations.commands import PTInsertionCommand from nncf.torch.nncf_network import NNCFNetwork from nncf.torch.tensor import PTNNCFTensor @@ -27,6 +29,10 @@ class PTStatisticsAggregator(StatisticsAggregator): + @property + def metatypes_output_has_no_batch_axis(self) -> List[PTOperatorMetatype]: + return OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS + def collect_statistics(self, model: NNCFNetwork, graph: NNCFGraph) -> None: with torch.no_grad(): with model.nncf.temporary_clean_view() as intermediate_model: From e9062a5ba13fb14eef996a6077f13ecb249b6488 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 15 Jan 2024 15:42:06 +0100 Subject: [PATCH 020/108] remove _check_input_data_format in OVEngine --- nncf/openvino/engine.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/nncf/openvino/engine.py b/nncf/openvino/engine.py index 20c616e3dd9..0eddc3b30bd 100644 --- a/nncf/openvino/engine.py +++ b/nncf/openvino/engine.py @@ -13,7 +13,6 @@ import numpy as np import openvino.runtime as ov -import torch from nncf.common.engine import Engine from nncf.parameters import TargetDevice @@ -30,28 +29,6 @@ class OVCompiledModelEngine(Engine): def __init__(self, model: ov.CompiledModel): self.compiled_model = model - self.input_tensor_names = set() - self.number_of_inputs = len(model.inputs) - for model_input in model.inputs: - self.input_tensor_names.update(model_input.get_names()) - - def _check_input_data_format( - self, input_data: Union[np.ndarray, List[np.ndarray], Tuple[np.ndarray], Dict[str, np.ndarray]] - ) -> None: - """ - Checks correspondence of the model input names and the passed data. - If there is a mismatch, the method throws a more specific and readable error than - original error raised by the compiled model. - - :param input_data: Provided inputs to infer the model. - """ - actual_num_inputs = 1 if isinstance(input_data, (np.ndarray, torch.Tensor)) else len(input_data) - if actual_num_inputs != self.number_of_inputs: - raise RuntimeError(f"Model expects {self.number_of_inputs} inputs, but {actual_num_inputs} are provided.") - if isinstance(input_data, dict): - for name in input_data: - if isinstance(name, str) and name not in self.input_tensor_names: - raise RuntimeError(f"Missing a required input: {name} to run the model.") def infer( self, input_data: Union[np.ndarray, List[np.ndarray], Tuple[np.ndarray], Dict[str, np.ndarray]] @@ -63,7 +40,6 @@ def infer( :param input_data: Inputs for the model. :return output_data: Model's output. """ - self._check_input_data_format(input_data) model_outputs = self.compiled_model(input_data) output_data = {} From 8770ca46ca19d8d67c05e7aa4befc677a3f2fd23 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 15 Jan 2024 15:49:15 +0100 Subject: [PATCH 021/108] get_channel_agnostic_reduction_axes to common --- nncf/common/graph/utils.py | 17 ++++++++++++++++- nncf/openvino/graph/node_utils.py | 14 -------------- .../algorithms/channel_alignment/algorithm.py | 3 ++- .../algorithms/channel_alignment/backend.py | 12 ------------ .../channel_alignment/openvino_backend.py | 6 ------ .../algorithms/min_max/openvino_backend.py | 2 +- .../algorithms/smooth_quant/algorithm.py | 3 ++- .../algorithms/smooth_quant/backend.py | 11 ----------- .../smooth_quant/openvino_backend.py | 5 ----- .../weight_compression/openvino_backend.py | 2 +- .../test_tensor_collector_batch_size.py | 2 +- tests/common/graph/test_utils.py | 16 ++++++++++++++++ tests/openvino/native/test_node_utils.py | 18 ------------------ 13 files changed, 39 insertions(+), 72 deletions(-) diff --git a/nncf/common/graph/utils.py b/nncf/common/graph/utils.py index 0352693e6a5..b5bd4fddce7 100644 --- a/nncf/common/graph/utils.py +++ b/nncf/common/graph/utils.py @@ -10,13 +10,14 @@ # limitations under the License. from functools import partial -from typing import List, Set +from typing import List, Optional, Set from nncf.common.graph import NNCFGraph from nncf.common.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.logging import nncf_logger from nncf.common.pruning.utils import traverse_function +from nncf.common.tensor_statistics.collectors import ReductionAxes def get_concat_axis(input_shapes: List[List[int]], output_shapes: List[List[int]]) -> int: @@ -114,3 +115,17 @@ def get_number_of_quantized_ops( else: nodes_to_see.extend(graph.get_next_nodes(node)) return len(quantized_ops) + + +def get_channel_agnostic_reduction_axes(channel_axes: List[int], shape: List[int]) -> Optional[ReductionAxes]: + """ + Returns filtered reduction axes without axes that corresponds channels. + + :param channel_axes: List of the channel axes. + :param shape: Shape that need to be filtered. + :return: Reduction axes in tuple format. + """ + reduction_axes = list(range(len(shape))) + for channel_axis in sorted(channel_axes, reverse=True): + del reduction_axes[channel_axis] + return tuple(reduction_axes) diff --git a/nncf/openvino/graph/node_utils.py b/nncf/openvino/graph/node_utils.py index d58c6706325..48b72747130 100644 --- a/nncf/openvino/graph/node_utils.py +++ b/nncf/openvino/graph/node_utils.py @@ -393,20 +393,6 @@ def get_matmul_channel_axes(node: ov.Node) -> List[int]: return [idx for idx, elem in enumerate(weights_layout) if elem in [OVLayoutElem.SPATIAL, OVLayoutElem.C_OUT]] -def get_channel_agnostic_reduction_axes(channel_axes: List[int], shape: List[int]) -> Optional[ReductionAxes]: - """ - Returns filtered reduction axes without axes that corresponds channels. - - :param channel_axes: List of the channel axes. - :param shape: Shape that need to be filtered. - :return: Reduction axes in tuple format. - """ - reduction_axes = list(range(len(shape))) - for channel_axis in sorted(channel_axes, reverse=True): - del reduction_axes[channel_axis] - return tuple(reduction_axes) - - def create_bias_tensor(node_without_bias: NNCFNode, graph: NNCFGraph, value: Any) -> np.ndarray: """ Creates bias value constant array filled by given value. diff --git a/nncf/quantization/algorithms/channel_alignment/algorithm.py b/nncf/quantization/algorithms/channel_alignment/algorithm.py index 46754c903a8..81a2d2ba9f8 100644 --- a/nncf/quantization/algorithms/channel_alignment/algorithm.py +++ b/nncf/quantization/algorithms/channel_alignment/algorithm.py @@ -22,6 +22,7 @@ from nncf.common.graph.transformations.commands import TargetPoint from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout +from nncf.common.graph.utils import get_channel_agnostic_reduction_axes from nncf.common.logging import nncf_logger from nncf.common.logging.track_progress import track from nncf.common.tensor_statistics.statistic_point import StatisticPoint @@ -389,7 +390,7 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin channel_axis = conv_in.metatype.output_channel_axis activation_shape = list(range(len(graph.get_output_edges(node_in)[0].tensor_shape))) - reduction_axes = self._backend_entity.get_channel_agnostic_reduction_axes([channel_axis], activation_shape) + reduction_axes = get_channel_agnostic_reduction_axes([channel_axis], activation_shape) statistic_collector = self._backend_entity.get_statistic_collector( reduction_axes, self._quantile, self.subset_size, self.inplace_statistics diff --git a/nncf/quantization/algorithms/channel_alignment/backend.py b/nncf/quantization/algorithms/channel_alignment/backend.py index 9ab51977d4f..9d6e6bab44d 100644 --- a/nncf/quantization/algorithms/channel_alignment/backend.py +++ b/nncf/quantization/algorithms/channel_alignment/backend.py @@ -151,15 +151,3 @@ def create_bias_tensor(node: NNCFNode, nncf_graph: NNCFGraph, value: Any) -> np. :param value: Value to fill bias constant array. :return: Bias value constant array filled by given value. """ - - @staticmethod - @abstractmethod - def get_channel_agnostic_reduction_axes(channel_axis: int, shape: Tuple[int]) -> Tuple[int]: - """ - Returns filtered reduction shape without axes that corresponds channels. - Example: channel_axis=-2, shape=(1, 3, 2, 4), result=(0, 1, 3). - - :param channel_axes: List of the channel axes. - :param shape: Shape that need to be filtered. - :return: Reduction shape in tuple format. - """ diff --git a/nncf/quantization/algorithms/channel_alignment/openvino_backend.py b/nncf/quantization/algorithms/channel_alignment/openvino_backend.py index a92a86bb0ae..6eb7100f715 100644 --- a/nncf/quantization/algorithms/channel_alignment/openvino_backend.py +++ b/nncf/quantization/algorithms/channel_alignment/openvino_backend.py @@ -18,7 +18,6 @@ from nncf.common.graph import NNCFNode from nncf.common.graph.layer_attributes import ConvolutionLayerAttributes from nncf.common.graph.transformations.commands import TargetType -from nncf.common.tensor_statistics.collectors import ReductionAxes from nncf.common.tensor_statistics.collectors import TensorStatisticCollectorBase from nncf.experimental.common.tensor_statistics.collectors import MedianAggregator from nncf.experimental.common.tensor_statistics.collectors import TensorCollector @@ -34,7 +33,6 @@ from nncf.openvino.graph.metatypes.openvino_metatypes import OVSubtractMetatype from nncf.openvino.graph.node_utils import create_bias_tensor from nncf.openvino.graph.node_utils import get_bias_value -from nncf.openvino.graph.node_utils import get_channel_agnostic_reduction_axes from nncf.openvino.graph.node_utils import get_node_with_bias_value from nncf.openvino.graph.node_utils import get_weight_value from nncf.openvino.graph.transformations.commands import OVTargetPoint @@ -135,7 +133,3 @@ def get_conv_layer_attributes(node: NNCFNode) -> ConvolutionLayerAttributes: @staticmethod def create_bias_tensor(node: NNCFNode, nncf_graph: NNCFGraph, value: Any) -> np.ndarray: return create_bias_tensor(node, nncf_graph, value) - - @staticmethod - def get_channel_agnostic_reduction_axes(channel_axis: int, shape: Tuple[int]) -> ReductionAxes: - return get_channel_agnostic_reduction_axes(channel_axes=channel_axis, shape=shape) diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py index 98b33bf37a5..22ae427d366 100644 --- a/nncf/quantization/algorithms/min_max/openvino_backend.py +++ b/nncf/quantization/algorithms/min_max/openvino_backend.py @@ -17,6 +17,7 @@ from nncf.common.graph.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType +from nncf.common.graph.utils import get_channel_agnostic_reduction_axes from nncf.common.hardware.config import HWConfig from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig @@ -27,7 +28,6 @@ from nncf.openvino.graph.metatypes import openvino_metatypes as om from nncf.openvino.graph.metatypes.groups import OPERATIONS_WITH_WEIGHTS from nncf.openvino.graph.model_utils import get_start_nodes_for_activation_path_tracing -from nncf.openvino.graph.node_utils import get_channel_agnostic_reduction_axes from nncf.openvino.graph.node_utils import get_weight_channel_axes from nncf.openvino.graph.transformations.commands import OVConvertInsertionCommand from nncf.openvino.graph.transformations.commands import OVQuantizerInsertionCommand diff --git a/nncf/quantization/algorithms/smooth_quant/algorithm.py b/nncf/quantization/algorithms/smooth_quant/algorithm.py index 6f67cf9ee98..64b9bb065fa 100644 --- a/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -31,6 +31,7 @@ from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout +from nncf.common.graph.utils import get_channel_agnostic_reduction_axes from nncf.common.logging import nncf_logger from nncf.common.logging.track_progress import track from nncf.common.tensor_statistics.statistic_point import StatisticPoint @@ -345,7 +346,7 @@ def _calculate_input_reduction_axes(self, nncf_graph: NNCFGraph, node: NNCFNode, reduction_axes = tuple([]) if len(shape) > 1: channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port) - reduction_axes = self._backend_entity.get_channel_agnostic_reduction_axes(channel_axis, shape) + reduction_axes = get_channel_agnostic_reduction_axes(channel_axis, shape) return reduction_axes def _process_weight_statistics(self, node: NNCFNode, weights: TTensor) -> TTensor: diff --git a/nncf/quantization/algorithms/smooth_quant/backend.py b/nncf/quantization/algorithms/smooth_quant/backend.py index 38605929617..1c2f59cb9ba 100644 --- a/nncf/quantization/algorithms/smooth_quant/backend.py +++ b/nncf/quantization/algorithms/smooth_quant/backend.py @@ -86,17 +86,6 @@ def get_input_ports_map(node: NNCFNode, nncf_graph: NNCFGraph) -> Dict[str, int] :return: Map with the activation & weighted ports. """ - @staticmethod - @abstractmethod - def get_channel_agnostic_reduction_axes(channel_axis: int, shape: Tuple[int]) -> Tuple[int]: - """ - Returns filtered reduction axes without axes that corresponds channels. - - :param channel_axes: List of the channel axes. - :param shape: Shape that need to be filtered. - :return: Reduction axes in tuple format. - """ - @staticmethod @abstractmethod def get_abs_max_channel_collector( diff --git a/nncf/quantization/algorithms/smooth_quant/openvino_backend.py b/nncf/quantization/algorithms/smooth_quant/openvino_backend.py index fd312c202ba..58c59ccfb04 100644 --- a/nncf/quantization/algorithms/smooth_quant/openvino_backend.py +++ b/nncf/quantization/algorithms/smooth_quant/openvino_backend.py @@ -25,7 +25,6 @@ from nncf.openvino.graph.metatypes.groups import QUANTIZE_AGNOSTIC_OPERATIONS from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVMatMulMetatype -from nncf.openvino.graph.node_utils import get_channel_agnostic_reduction_axes from nncf.openvino.graph.node_utils import get_weight_value from nncf.openvino.graph.transformations.command_creation import OVCommandCreator from nncf.openvino.graph.transformations.commands import OVMultiplyInsertionCommand @@ -69,10 +68,6 @@ def get_input_ports_map(node: NNCFNode, nncf_graph: NNCFGraph) -> Dict[str, int] return {"activation": activation_ports[0], "weight": weight_ports[0]} - @staticmethod - def get_channel_agnostic_reduction_axes(channel_axis: int, shape: Tuple[int]) -> Tuple[int]: - return get_channel_agnostic_reduction_axes([channel_axis], shape) - @staticmethod def get_abs_max_channel_collector( num_samples: int, stats_reduction_axes: Tuple[int], inplace: bool, branch_key: str diff --git a/nncf/quantization/algorithms/weight_compression/openvino_backend.py b/nncf/quantization/algorithms/weight_compression/openvino_backend.py index 0f54f01a35a..b4a3f70eede 100644 --- a/nncf/quantization/algorithms/weight_compression/openvino_backend.py +++ b/nncf/quantization/algorithms/weight_compression/openvino_backend.py @@ -19,13 +19,13 @@ from nncf.common.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType +from nncf.common.graph.utils import get_channel_agnostic_reduction_axes from nncf.common.logging import nncf_logger from nncf.common.logging.track_progress import track from nncf.common.utils.helpers import create_table from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.openvino.graph.metatypes.openvino_metatypes import OVEmbeddingMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVMatMulMetatype -from nncf.openvino.graph.node_utils import get_channel_agnostic_reduction_axes from nncf.openvino.graph.node_utils import get_const_value from nncf.openvino.graph.node_utils import get_weight_channel_axes from nncf.openvino.graph.transformations.commands import OVTargetPoint diff --git a/tests/common/experimental/test_tensor_collector_batch_size.py b/tests/common/experimental/test_tensor_collector_batch_size.py index 6406610d6e2..9a0953ac2fa 100644 --- a/tests/common/experimental/test_tensor_collector_batch_size.py +++ b/tests/common/experimental/test_tensor_collector_batch_size.py @@ -15,8 +15,8 @@ import numpy as np import pytest +from nncf.common.graph.utils import get_channel_agnostic_reduction_axes from nncf.experimental.common.tensor_statistics.collectors import TensorCollector -from nncf.openvino.graph.node_utils import get_channel_agnostic_reduction_axes class TemplateTestTensorCollectorBatchSize(ABC): diff --git a/tests/common/graph/test_utils.py b/tests/common/graph/test_utils.py index c1bc08b4db5..9b391448cfc 100644 --- a/tests/common/graph/test_utils.py +++ b/tests/common/graph/test_utils.py @@ -11,6 +11,7 @@ import pytest +from nncf.common.graph.utils import get_channel_agnostic_reduction_axes from nncf.common.graph.utils import get_concat_axis TEST_CASES = [ @@ -27,3 +28,18 @@ def test_get_concat_axis(input_shape, output_shape, possible_axes): axis = get_concat_axis(input_shape, output_shape) assert axis in possible_axes + + +@pytest.mark.parametrize( + "shape, channel_axes, ref_reduction_axes", + [ + ((1, 128), [-1], (0,)), + ((1, 256, 1), [-2], (0, 2)), + ((1, 128, 512), [-1], (0, 1)), + ((1, 3, 224, 224), [1], (0, 2, 3)), + ((1, 1, 12, 12), [1], (0, 2, 3)), + ((1, 1, 12, 12), [1, 2], (0, 3)), + ], +) +def test_get_channel_agnostic_reduction_axes(shape, channel_axes, ref_reduction_axes): + assert get_channel_agnostic_reduction_axes(channel_axes=channel_axes, shape=shape) == ref_reduction_axes diff --git a/tests/openvino/native/test_node_utils.py b/tests/openvino/native/test_node_utils.py index 47e1af5162c..197a934a599 100644 --- a/tests/openvino/native/test_node_utils.py +++ b/tests/openvino/native/test_node_utils.py @@ -18,7 +18,6 @@ from nncf.openvino.graph.layer_attributes import OVLayerAttributes from nncf.openvino.graph.metatypes.openvino_metatypes import OVMatMulMetatype from nncf.openvino.graph.nncf_graph_builder import GraphConverter -from nncf.openvino.graph.node_utils import get_channel_agnostic_reduction_axes from nncf.openvino.graph.node_utils import get_weight_channel_axes from nncf.openvino.graph.node_utils import get_weight_value from nncf.openvino.graph.node_utils import get_weighted_layer_attributes @@ -97,20 +96,3 @@ def test_get_weight_channel_axes_for_matmul(weights_port_id, transpose, shape, e assert len(actual_channel_axes) == len(expected_channel_axes) assert all(a == b for a, b in zip(actual_channel_axes, expected_channel_axes)) - - -@pytest.mark.parametrize( - "shape, channel_axes, ref_reduction_axes", - [ - ((1, 128), [-1], (0,)), - ((1, 256, 1), [-2], (0, 2)), - ((1, 128, 512), [-1], (0, 1)), - ((1, 3, 224, 224), [1], (0, 2, 3)), - ((1, 1, 12, 12), [1], (0, 2, 3)), - ((1, 1, 12, 12), [1, 2], (0, 3)), - ], -) -def test_get_channel_agnostic_reduction_axes(shape, channel_axes, ref_reduction_axes): - reduction_axes = get_channel_agnostic_reduction_axes(channel_axes=channel_axes, shape=shape) - - assert reduction_axes == ref_reduction_axes From 9556c49a6dfe5d7a10a9d19c91b3f92d783c48a6 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 15 Jan 2024 15:55:10 +0100 Subject: [PATCH 022/108] use get_channel_agnostic_reduction_axes for Torch --- nncf/torch/quantization/init_range.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/nncf/torch/quantization/init_range.py b/nncf/torch/quantization/init_range.py index 5f81372b9c1..295ac24614f 100644 --- a/nncf/torch/quantization/init_range.py +++ b/nncf/torch/quantization/init_range.py @@ -17,6 +17,7 @@ import torch from nncf.common.graph.layer_attributes import WeightedLayerAttributes +from nncf.common.graph.utils import get_channel_agnostic_reduction_axes from nncf.common.quantization.initialization.range import RangeInitCollectorParams from nncf.common.quantization.initialization.range import RangeInitConfig from nncf.common.quantization.initialization.range import RangeInitParams @@ -114,14 +115,13 @@ def get_reduction_axes(self, per_sample_stats: bool) -> ReductionAxes: """ ndims = len(self._input_shape) reduction_axes: List[int] = list(range(ndims)) + axes_to_remove = [] + if self.use_per_sample_stats(per_sample_stats): + axes_to_remove.append(0) if self._per_channel: val = (ndims + self._channel_idx) % ndims - reduction_axes.remove(val) - if not val and self.use_per_sample_stats(per_sample_stats): - raise RuntimeError("Batch dimension should be equal to zero") - if self.use_per_sample_stats(per_sample_stats): - reduction_axes = reduction_axes[1:] # Assumes batch is the first dimension - return tuple(reduction_axes) + axes_to_remove.append(val) + return get_channel_agnostic_reduction_axes(axes_to_remove, reduction_axes) def get_aggregation_axes(self, per_sample_stats: bool) -> AggregationAxes: """ From cb90e7710bc58020d6822f80199beb82a192ed12 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 15 Jan 2024 16:03:01 +0100 Subject: [PATCH 023/108] use get_channel_agnostic_reduction_axes for ONNX --- nncf/onnx/graph/node_utils.py | 16 -- .../algorithms/min_max/onnx_backend.py | 4 +- tests/onnx/quantization/test_min_max.py | 222 ------------------ 3 files changed, 2 insertions(+), 240 deletions(-) delete mode 100644 tests/onnx/quantization/test_min_max.py diff --git a/nncf/onnx/graph/node_utils.py b/nncf/onnx/graph/node_utils.py index 7d46356904c..b9a314f2ef5 100644 --- a/nncf/onnx/graph/node_utils.py +++ b/nncf/onnx/graph/node_utils.py @@ -18,7 +18,6 @@ from nncf.common.graph.graph import NNCFNode from nncf.common.graph.transformations.commands import TargetType from nncf.common.logging.logger import nncf_logger -from nncf.common.tensor_statistics.collectors import ReductionAxes from nncf.onnx.graph.metatypes import onnx_metatypes as om from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXDequantizeLinearMetatype from nncf.onnx.graph.onnx_helper import get_tensor_value @@ -135,21 +134,6 @@ def transpose_axis(shape: List[int], axis: int) -> int: return range(len(shape) - 1, -1, -1)[axis] # Iterate backward throug axis -def get_reduction_shape(shape: List[int], axis: int) -> ReductionAxes: - """ - Returns reduction shape for shape and axis. - - :param shape: Shape. - :param axis: Axis. - :return: Reduction shape. - """ - reduction_shape = list(range(len(shape))) - if len(reduction_shape) == 1: # If only one channel - return tuple(reduction_shape) - reduction_shape.pop(axis) - return tuple(reduction_shape) - - def _get_weight_quantization_axis(node: NNCFNode, port_id: int) -> int: """ Returns weight tensor axis, along which quantizer parameters are calculated. diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py index 1ea7e2b3042..e9054f04e54 100644 --- a/nncf/quantization/algorithms/min_max/onnx_backend.py +++ b/nncf/quantization/algorithms/min_max/onnx_backend.py @@ -18,6 +18,7 @@ from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.commands import TransformationCommand +from nncf.common.graph.utils import get_channel_agnostic_reduction_axes from nncf.common.hardware.config import HWConfig from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig @@ -26,7 +27,6 @@ from nncf.onnx.graph.node_utils import get_input_edges_mapping from nncf.onnx.graph.node_utils import get_quantization_axis from nncf.onnx.graph.node_utils import get_quantized_tensor_shape -from nncf.onnx.graph.node_utils import get_reduction_shape from nncf.onnx.graph.transformations.commands import ONNXQuantizerInsertionCommand from nncf.onnx.graph.transformations.commands import ONNXTargetPoint from nncf.onnx.hardware.config import ONNXHWConfig @@ -159,7 +159,7 @@ def get_statistic_collector( quantization_axis = get_quantization_axis(is_per_channel, node, target_point) quantized_tensor_shape = get_quantized_tensor_shape(nncf_graph, node, target_point) if quantization_axis is not None and quantized_tensor_shape is not None: # Per-Channel - reduction_shape = get_reduction_shape(quantized_tensor_shape, quantization_axis) + reduction_shape = get_channel_agnostic_reduction_axes([quantization_axis], quantized_tensor_shape) if ( range_estimator_params.min.statistics_type == StatisticsType.MIN diff --git a/tests/onnx/quantization/test_min_max.py b/tests/onnx/quantization/test_min_max.py deleted file mode 100644 index 2c6a1cf78e2..00000000000 --- a/tests/onnx/quantization/test_min_max.py +++ /dev/null @@ -1,222 +0,0 @@ -# Copyright (c) 2023 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from dataclasses import dataclass -from typing import List - -import pytest - -import nncf.onnx.graph.metatypes.onnx_metatypes as om -from nncf.common.graph.graph import NNCFNode -from nncf.common.graph.transformations.commands import TargetType -from nncf.onnx.graph.nncf_graph_builder import ONNXLayerAttributes -from nncf.onnx.graph.node_utils import get_quantization_axis -from nncf.onnx.graph.node_utils import get_reduction_shape -from nncf.onnx.graph.transformations.commands import ONNXTargetPoint - - -@dataclass -class TestCase: - nncf_node: NNCFNode - target_point: ONNXTargetPoint - per_channel: bool - ref_reduction_shape: List[int] - - -test_cases = ( - TestCase( - nncf_node=NNCFNode( - { - NNCFNode.ID_NODE_ATTR: 0, - NNCFNode.NODE_NAME_ATTR: "conv_with_weight_per_tensor", - NNCFNode.METATYPE_ATTR: om.ONNXConvolutionMetatype, - NNCFNode.LAYER_ATTRIBUTES: ONNXLayerAttributes(weight_attrs={1: {"shape": [3, 5, 8]}}), - } - ), - target_point=ONNXTargetPoint( - target_type=TargetType.OPERATION_WITH_WEIGHTS, - target_node_name="conv_with_weight_per_tensor", - port_id=1, - ), - per_channel=False, - ref_reduction_shape=None, - ), - TestCase( - nncf_node=NNCFNode( - { - NNCFNode.ID_NODE_ATTR: 0, - NNCFNode.NODE_NAME_ATTR: "conv_with_weight_per_channel", - NNCFNode.METATYPE_ATTR: om.ONNXConvolutionMetatype, - NNCFNode.LAYER_ATTRIBUTES: ONNXLayerAttributes(weight_attrs={1: {"shape": [3, 5, 8]}}), - } - ), - target_point=ONNXTargetPoint( - target_type=TargetType.OPERATION_WITH_WEIGHTS, - target_node_name="gemm_with_weight_per_channel_0_port", - port_id=1, - ), - per_channel=True, - ref_reduction_shape=(1, 2), - ), - TestCase( - nncf_node=NNCFNode( - { - NNCFNode.ID_NODE_ATTR: 0, - NNCFNode.NODE_NAME_ATTR: "gemm_with_weight_per_tensor", - NNCFNode.METATYPE_ATTR: om.ONNXGemmMetatype, - NNCFNode.LAYER_ATTRIBUTES: ONNXLayerAttributes(weight_attrs={1: {"shape": [5, 8]}}), - } - ), - target_point=ONNXTargetPoint( - target_type=TargetType.OPERATION_WITH_WEIGHTS, - target_node_name="gemm_with_weight_per_tensor", - port_id=1, - ), - per_channel=False, - ref_reduction_shape=None, - ), - TestCase( - nncf_node=NNCFNode( - { - NNCFNode.ID_NODE_ATTR: 0, - NNCFNode.NODE_NAME_ATTR: "gemm_with_weight_per_channel", - NNCFNode.METATYPE_ATTR: om.ONNXGemmMetatype, - NNCFNode.LAYER_ATTRIBUTES: ONNXLayerAttributes(weight_attrs={1: {"shape": [5, 8]}}), - } - ), - target_point=ONNXTargetPoint( - target_type=TargetType.OPERATION_WITH_WEIGHTS, - target_node_name="gemm_with_weight_per_channel_0_port", - port_id=1, - ), - per_channel=True, - ref_reduction_shape=(0,), - ), - TestCase( - nncf_node=NNCFNode( - { - NNCFNode.ID_NODE_ATTR: 0, - NNCFNode.NODE_NAME_ATTR: "gemm_with_weight_per_channel_extra_attrs", - NNCFNode.METATYPE_ATTR: om.ONNXGemmMetatype, - NNCFNode.LAYER_ATTRIBUTES: ONNXLayerAttributes( - weight_attrs={1: {"shape": [5, 8]}}, node_attrs={"transA": 0, "transB": 0} - ), - } - ), - target_point=ONNXTargetPoint( - target_type=TargetType.OPERATION_WITH_WEIGHTS, - target_node_name="gemm_with_weight_per_channel_extra_attrs", - port_id=1, - ), - per_channel=True, - ref_reduction_shape=(0,), - ), - TestCase( - nncf_node=NNCFNode( - { - NNCFNode.ID_NODE_ATTR: 0, - NNCFNode.NODE_NAME_ATTR: "gemm_with_weight_per_channel_extra_attrs", - NNCFNode.METATYPE_ATTR: om.ONNXGemmMetatype, - NNCFNode.LAYER_ATTRIBUTES: ONNXLayerAttributes( - weight_attrs={1: {"shape": [5, 8]}}, node_attrs={"transA": 1, "transB": 0} - ), - } - ), - target_point=ONNXTargetPoint( - target_type=TargetType.OPERATION_WITH_WEIGHTS, - target_node_name="gemm_with_weight_per_channel_extra_attrs", - port_id=1, - ), - per_channel=True, - ref_reduction_shape=(0,), - ), - TestCase( - nncf_node=NNCFNode( - { - NNCFNode.ID_NODE_ATTR: 0, - NNCFNode.NODE_NAME_ATTR: "gemm_with_weight_per_channel_transpose", - NNCFNode.METATYPE_ATTR: om.ONNXGemmMetatype, - NNCFNode.LAYER_ATTRIBUTES: ONNXLayerAttributes( - weight_attrs={1: {"shape": [5, 8]}}, node_attrs={"transA": 0, "transB": 1} - ), - } - ), - target_point=ONNXTargetPoint( - target_type=TargetType.OPERATION_WITH_WEIGHTS, - target_node_name="gemm_with_weight_per_channel_transpose", - port_id=1, - ), - per_channel=True, - ref_reduction_shape=(1,), - ), - TestCase( - nncf_node=NNCFNode( - { - NNCFNode.ID_NODE_ATTR: 0, - NNCFNode.NODE_NAME_ATTR: "gemm_with_weight_per_channel_transpose_one_dim", - NNCFNode.METATYPE_ATTR: om.ONNXGemmMetatype, - NNCFNode.LAYER_ATTRIBUTES: ONNXLayerAttributes( - weight_attrs={1: {"shape": [5]}}, node_attrs={"transA": 0, "transB": 1} - ), - } - ), - target_point=ONNXTargetPoint( - target_type=TargetType.OPERATION_WITH_WEIGHTS, - target_node_name="gemm_with_weight_per_channel_0_port", - port_id=1, - ), - per_channel=True, - ref_reduction_shape=(0,), - ), - TestCase( - nncf_node=NNCFNode( - { - NNCFNode.ID_NODE_ATTR: 0, - NNCFNode.NODE_NAME_ATTR: "gemm_with_weight_per_channel_0_port", - NNCFNode.METATYPE_ATTR: om.ONNXGemmMetatype, - NNCFNode.LAYER_ATTRIBUTES: ONNXLayerAttributes( - weight_attrs={0: {"shape": [10, 10, 5]}}, node_attrs={"transA": 0, "transB": 1} - ), - } - ), - target_point=ONNXTargetPoint( - target_type=TargetType.OPERATION_WITH_WEIGHTS, - target_node_name="gemm_with_weight_per_channel_0_port", - port_id=0, - ), - per_channel=True, - ref_reduction_shape=(0, 1), - ), -) - - -@pytest.mark.parametrize( - "test_case", - (test_cases), - ids=[test_case.nncf_node.node_name for test_case in test_cases], -) -def test_get_reduction_shape(test_case): - """Checks the correct return reduction shape in ONNXMinMaxAlgo. - Edge cases: - 1) per-tensor. - 2) transpose axis of GEMM node. - 3) one dimensional weight tensor. - """ - quantization_axis = get_quantization_axis( - is_per_channel=test_case.per_channel, node=test_case.nncf_node, target_point=test_case.target_point - ) - if quantization_axis is not None: # Per-Channel - reduction_shape = get_reduction_shape( - test_case.nncf_node.layer_attributes.weight_attrs[test_case.target_point.port_id]["shape"], - quantization_axis, - ) - assert reduction_shape == test_case.ref_reduction_shape - else: - assert not test_case.per_channel From cd10c57b38d12c91dcacade838cbf2fdaa6d552e Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 17 Jan 2024 12:55:36 +0100 Subject: [PATCH 024/108] draft --- nncf/common/tensor_statistics/aggregator.py | 36 ++++++---- .../native/quantization/test_batch_size.py | 17 +++++ tests/post_training/test_templates/helpers.py | 12 ++-- .../test_templates/test_batch_size.py | 68 +++++++++++++++++++ 4 files changed, 110 insertions(+), 23 deletions(-) create mode 100644 tests/openvino/native/quantization/test_batch_size.py create mode 100644 tests/post_training/test_templates/test_batch_size.py diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index e20d39134e3..b83a0ab789d 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -40,6 +40,18 @@ def __init__(self, dataset: Dataset): self.dataset_size = self.dataset_size * self.batch_size if self.dataset_size is not None else self.dataset_size self.statistic_points = StatisticPointsContainer() + def _get_total_calibration_samples( + self, + ): + return ( + min(self.dataset_size or self.stat_subset_size, self.stat_subset_size) + if self.stat_subset_size is not None + else None + ) + + def _get_iterations_num(self, calibration_samples_num): + return calibration_samples_num // self.batch_size if calibration_samples_num is not None else None + def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: """ Collects statistics for registered StatisticPoints. @@ -58,26 +70,20 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: model_with_outputs = model_transformer.transform(transformation_layout) engine = factory.EngineFactory.create(model_with_outputs) - calibration_samples_num = ( - min(self.dataset_size or self.stat_subset_size, self.stat_subset_size) - if self.stat_subset_size is not None - else None - ) # Maybe subsample should be in terms of a tensor with arbitary batch_size + calibration_samples_num = self._get_total_calibration_samples() + iterataions_num = self._get_iterations_num(calibration_samples_num) + if iterataions_num == 0: + nncf_logger.error("Iterations num is 0") + iterataions_num = 1 collected_statistics_num = 0 with track(total=calibration_samples_num, description="Statistics collection") as pbar: - for input_data in islice(self.dataset.get_inference_data(), self.stat_subset_size): - batch_size_to_collect = ( - min(calibration_samples_num - collected_statistics_num, self.batch_size) - if calibration_samples_num is not None - else self.batch_size - ) + for input_data in islice(self.dataset.get_inference_data(), iterataions_num): outputs = engine.infer(input_data) processed_outputs = self._process_outputs(outputs) self._register_statistics(processed_outputs, merged_statistics) - collected_statistics_num += batch_size_to_collect - pbar.progress.update(pbar.task, advance=batch_size_to_collect) - if calibration_samples_num and collected_statistics_num == calibration_samples_num: - break + collected_statistics_num += self.batch_size + pbar.progress.update(pbar.task, advance=self.batch_size) + if collected_statistics_num == 0: raise RuntimeError( "Calibration dataset must not be empty. Please provide calibration dataset with at least one sample." diff --git a/tests/openvino/native/quantization/test_batch_size.py b/tests/openvino/native/quantization/test_batch_size.py new file mode 100644 index 00000000000..0f74a9f282a --- /dev/null +++ b/tests/openvino/native/quantization/test_batch_size.py @@ -0,0 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from nncf.openvino.statistics.aggregator import OVStatisticsAggregator +from tests.post_training.test_templates.test_batch_size import TemplateTestBatchSize + + +class TestOVBatchSize(TemplateTestBatchSize): + def create_statistics_aggregator(self, dataset): + return OVStatisticsAggregator(dataset) diff --git a/tests/post_training/test_templates/helpers.py b/tests/post_training/test_templates/helpers.py index ae15d8ee3eb..9f69e83f1a9 100644 --- a/tests/post_training/test_templates/helpers.py +++ b/tests/post_training/test_templates/helpers.py @@ -30,9 +30,9 @@ class StaticDatasetMock: to convert data to backend specific type. """ - def __init__(self, input_size: Tuple, fn_to_type: Callable = None): + def __init__(self, input_size: Tuple, length: int = 1, fn_to_type: Callable = None): super().__init__() - self._len = 1 + self._len = length self._input_size = input_size self._fn_to_type = fn_to_type @@ -47,11 +47,7 @@ def __len__(self) -> int: return self._len -def get_static_dataset( - input_size: Tuple, - transform_fn: Callable, - fn_to_type: Callable, -) -> Dataset: +def get_static_dataset(input_size: Tuple, transform_fn: Callable, fn_to_type: Callable, length: int = 1) -> Dataset: """ Create nncf.Dataset for StaticDatasetMock. :param input_size: Size of generated tensors, @@ -59,7 +55,7 @@ def get_static_dataset( :param fn_to_type: Function, defaults to None. :return: Instance of nncf.Dataset for StaticDatasetMock. """ - return Dataset(StaticDatasetMock(input_size, fn_to_type), transform_fn) + return Dataset(StaticDatasetMock(input_size, length, fn_to_type), transform_fn) class ConvTestModel(nn.Module): diff --git a/tests/post_training/test_templates/test_batch_size.py b/tests/post_training/test_templates/test_batch_size.py new file mode 100644 index 00000000000..02b7ecc9734 --- /dev/null +++ b/tests/post_training/test_templates/test_batch_size.py @@ -0,0 +1,68 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from abc import ABC +from abc import abstractmethod +from dataclasses import dataclass +from dataclasses import fields + +import pytest + +from nncf.common.tensor_statistics.aggregator import StatisticsAggregator +from tests.post_training.test_templates.helpers import get_static_dataset + + +@dataclass +class DataForTest: + batch_size: int + dataset_len: int + subset_size: int + ref_calibration_samples_num: int + ref_iterations_num: int + + +class TemplateTestBatchSize(ABC): + @abstractmethod + def create_statistics_aggregator(self, dataset) -> StatisticsAggregator: + ... + + def create_dataset(self, lenght, batch_size): + dataset = get_static_dataset(None, None, None, lenght) + dataset._data_source.batch_size = batch_size + # print(dataset.get_batch_size()) + return dataset + + @pytest.mark.parametrize( + ("test_data"), + ( + [ + # DataForTest(None, 1000, None, None, None), + # DataForTest(1, 1000, 300, 300, 300), + # DataForTest(10, 1000, 300, 300, 30), + # DataForTest(300, 1000, 300, 300, 1), + # DataForTest(301, 1000, 300, 300, 0), + # DataForTest(301, 1000, 300, 300, 0), # batch_size > subset_size + DataForTest(300, 10, 300, 10, 0), # batch_size > len(dataset) + # DataForTest(300, 10, 300, 10, 0), # batch_size > len(dataset) + ] + ), + ) + def test_batch_size_subset(self, test_data): + batch_size, dataset_length, subset_size, ref_calibration_samples_num, ref_iterations_num = ( + getattr(test_data, field.name) for field in fields(test_data) + ) + dataset = self.create_dataset(dataset_length, batch_size) + statistics_aggregator = self.create_statistics_aggregator(dataset) + statistics_aggregator.stat_subset_size = subset_size + print(statistics_aggregator.dataset_size) + calibration_samples_num = statistics_aggregator._get_total_calibration_samples() + assert calibration_samples_num == ref_calibration_samples_num + iterataions_num = statistics_aggregator._get_iterations_num(calibration_samples_num) + assert iterataions_num == ref_iterations_num From 426ec044217202cac9b756d4ab5a9bca39f402d1 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 18 Jan 2024 11:30:31 +0100 Subject: [PATCH 025/108] fix test --- .../test_tensor_collector_batch_size.py | 10 +-- .../test_tensor_collector_batch_size.py | 72 +++++++++++++++++++ 2 files changed, 75 insertions(+), 7 deletions(-) create mode 100644 tests/onnx/quantization/test_tensor_collector_batch_size.py diff --git a/tests/common/experimental/test_tensor_collector_batch_size.py b/tests/common/experimental/test_tensor_collector_batch_size.py index 9a0953ac2fa..b7363e52a58 100644 --- a/tests/common/experimental/test_tensor_collector_batch_size.py +++ b/tests/common/experimental/test_tensor_collector_batch_size.py @@ -73,14 +73,12 @@ def _create_tensor_collector(self, shape, inplace, reducer, aggregator) -> Tenso collector.register_statistic_branch(statistic_branch_random_name, reducer, aggregator) return collector, reducer, aggregator - def _register_inputs(self, collector, dataitems, reducer, output_info): + def _register_inputs(self, collector, dataitems, reducer): for item in dataitems: input_ = {hash(reducer): [self.get_nncf_tensor_class()(item)]} collector.register_inputs(input_) def test_statistics_batch_size_equal(self, reducers, aggregators, inplace): - target_node_name = "target_node_name" - port_id = 0 tensor_shape = [3, 20, 20] dataitems = self.create_dataitems_without_batch_dim(input_shape=tensor_shape) @@ -88,16 +86,14 @@ def test_statistics_batch_size_equal(self, reducers, aggregators, inplace): collector, reducer, _ = self._create_tensor_collector(shape_batch_1, inplace, reducers, aggregators) # output_name = reducer.get_output_names(target_node_name, port_id) dataitems_batch_1 = self.add_batch_dim_to_dataitems(dataitems, batch_size=1) - output_info = collector.get_output_info(target_node_name, port_id) - self._register_inputs(collector, dataitems_batch_1, reducer, output_info) + self._register_inputs(collector, dataitems_batch_1, reducer) aggregated_tensor_batch_1 = list(collector._aggregate().values()) shape_batch_10 = [10, *tensor_shape] collector, reducer, _ = self._create_tensor_collector(shape_batch_10, inplace, reducers, aggregators) # output_name = reducer.get_output_names(target_node_name, port_id) dataitems_batch_10 = self.add_batch_dim_to_dataitems(dataitems, batch_size=10) - output_info = collector.get_output_info(target_node_name, port_id) - self._register_inputs(collector, dataitems_batch_10, reducer, output_info) + self._register_inputs(collector, dataitems_batch_10, reducer) aggregated_tensor_batch_10 = list(collector._aggregate().values()) assert np.array_equal(aggregated_tensor_batch_1, aggregated_tensor_batch_10) diff --git a/tests/onnx/quantization/test_tensor_collector_batch_size.py b/tests/onnx/quantization/test_tensor_collector_batch_size.py new file mode 100644 index 00000000000..8904dcc096f --- /dev/null +++ b/tests/onnx/quantization/test_tensor_collector_batch_size.py @@ -0,0 +1,72 @@ +# Copyright (c) 2023 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List + +import numpy as np +import pytest + +from nncf.experimental.common.tensor_statistics.collectors import AGGREGATORS_MAP +from nncf.onnx.statistics.collectors import ONNX_REDUCERS_MAP +from nncf.onnx.statistics.collectors import ONNXNNCFCollectorTensorProcessor +from nncf.onnx.statistics.statistics import ONNXMinMaxTensorStatistic +from nncf.onnx.tensor import ONNXNNCFTensor +from tests.common.experimental.test_tensor_collector_batch_size import TemplateTestTensorCollectorBatchSize + + +class TestTensorCollectorBatchSize(TemplateTestTensorCollectorBatchSize): + @staticmethod + def get_tensor_statistics_class(): + return ONNXMinMaxTensorStatistic + + @staticmethod + def get_tensor_processor(): + return ONNXNNCFCollectorTensorProcessor() + + @staticmethod + def get_nncf_tensor_class(): + return ONNXNNCFTensor + + @pytest.fixture(params=ONNX_REDUCERS_MAP.values()) + def reducers(self, request) -> bool: + return request.param + + @pytest.fixture(params=AGGREGATORS_MAP.values()) + def aggregators(self, request) -> bool: + return request.param + + @pytest.fixture(params=[False]) + def inplace(self, request): + return request.param + + def create_dataitems_without_batch_dim(self, input_shape: List[int], length: int = 100) -> List[np.ndarray]: + rng = np.random.default_rng(seed=0) + data_items = [] + for _ in range(length): + data_items.append(rng.uniform(0, 1, input_shape)) + return data_items + + def add_batch_dim_to_dataitems(self, data_items: List[np.ndarray], batch_size: int) -> List[np.ndarray]: + assert batch_size >= 1 + dataset = [] + item = [] + cnt = 0 + for data_item in data_items: + if batch_size == 1: + dataset.append(np.expand_dims(data_item, 0)) + else: + item.append(data_item) + if cnt == batch_size - 1: + dataset.append(np.array(item)) + item = [] + cnt = -1 + cnt += 1 + + return dataset From 21b0963972f720a85868b0e65d5c02dc7e6d4f82 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 18 Jan 2024 12:16:44 +0100 Subject: [PATCH 026/108] align reduction shape and aggregation shape --- .../quantization/initialization/range.py | 32 +++++++++- .../algorithms/min_max/onnx_backend.py | 29 ++++----- .../algorithms/min_max/openvino_backend.py | 60 ++++--------------- .../algorithms/min_max/torch_backend.py | 26 ++------ 4 files changed, 55 insertions(+), 92 deletions(-) diff --git a/nncf/common/quantization/initialization/range.py b/nncf/common/quantization/initialization/range.py index 755077b5de1..64d7b74a688 100644 --- a/nncf/common/quantization/initialization/range.py +++ b/nncf/common/quantization/initialization/range.py @@ -9,12 +9,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Tuple +from nncf.common.graph.utils import get_channel_agnostic_reduction_axes from nncf.common.initialization.dataloader import NNCFDataLoader from nncf.common.quantization.structs import QuantizationScheme from nncf.common.quantization.structs import QuantizerGroup +from nncf.common.tensor_statistics.collectors import ReductionAxes from nncf.config.schemata.defaults import NUM_INIT_SAMPLES +from nncf.experimental.common.tensor_statistics.collectors import AggregationAxes class RangeInitConfig: @@ -204,3 +207,30 @@ def use_means_of_mins(self) -> bool: @property def use_means_of_maxs(self) -> bool: return not self._is_weights and not self._is_per_channel + + def get_reduction_aggregation_axes(self, shape, channel_axes) -> Tuple[ReductionAxes, AggregationAxes]: + """ + Calculates the reduction axes of the tensor. + + :param per_sample_stats: Boolean flag that indicated whether statistics are collected per-sample or per-batch. + :return: Shape to reduce to. + """ + if self.is_weights: + aggregation_axes = None + if self.is_per_channel: + reduction_axes = get_channel_agnostic_reduction_axes(channel_axes, shape) + else: + reduction_axes = tuple(range(len(shape))) + else: + # OpenVINO activations have channel first layout: [N, C, Z, Y, X] + batch_axis = 0 + aggregation_axes = (batch_axis, *channel_axes) + if self.is_per_channel: + # Keep batch to aggregate and channel for per-channel FakeQuantize. + # TODO (l-bat): Disable quantizer propagation through layout changing operations + reduction_axes = get_channel_agnostic_reduction_axes(aggregation_axes, shape) + else: + # Keep batch to aggregate + reduction_axes = get_channel_agnostic_reduction_axes((batch_axis,), shape) + + return reduction_axes, aggregation_axes diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py index 45dc42eac46..be922f2531a 100644 --- a/nncf/quantization/algorithms/min_max/onnx_backend.py +++ b/nncf/quantization/algorithms/min_max/onnx_backend.py @@ -18,7 +18,6 @@ from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.commands import TransformationCommand -from nncf.common.graph.utils import get_channel_agnostic_reduction_axes from nncf.common.hardware.config import HWConfig from nncf.common.quantization.initialization.range import RangeInitCollectorParams from nncf.common.quantization.structs import QuantizerConfig @@ -161,15 +160,10 @@ def get_statistic_collector( inplace: bool, num_samples: int = None, ) -> TensorCollector: - is_per_channel = collector_params.is_per_channel node = nncf_graph.get_node_by_name(target_point.target_node_name) - use_abs_max = collector_params.use_abs_max - quantization_axis = get_quantization_axis(is_per_channel, node, target_point) - quantized_tensor_shape = get_quantized_tensor_shape(nncf_graph, node, target_point) - reduction_axes = None # Per-Tensor - if quantization_axis is not None and quantized_tensor_shape is not None: # Per-Channel - reduction_axes = get_channel_agnostic_reduction_axes([quantization_axis], quantized_tensor_shape) - + shape = get_quantized_tensor_shape(nncf_graph, node, target_point) + channel_axis = get_quantization_axis(collector_params.is_per_channel, node, target_point) + reduction_axes, aggregation_axes = collector_params.get_reduction_aggregation_axes(shape, [channel_axis]) collector = TensorCollector(ONNXMinMaxTensorStatistic) for params, container_key in zip( [range_estimator_params.min, range_estimator_params.max], @@ -179,29 +173,26 @@ def get_statistic_collector( raise RuntimeError( f"Statistic type: {params.statistics_type} is not supported for ONNX PTQ backend yet." ) - if params.aggregator_type not in AGGREGATORS_MAP: raise RuntimeError( f"Aggregator type: {params.aggregator_type} is not supported for ONNX PTQ backend yet." ) - - statistic_type = params.statistics_type - kwargs = {"reduction_axes": reduction_axes, "inplace": inplace} - if statistic_type in [StatisticsType.QUANTILE, StatisticsType.ABS_QUANTILE]: - # TODO(dlyakhov): merge two quantile aggregators in one + kwargs = {"reduction_axes": reduction_axes, "inplace": False} + if params.statistics_type in [StatisticsType.QUANTILE, StatisticsType.ABS_QUANTILE]: if container_key == ONNXMinMaxTensorStatistic.MIN_STAT: quantile = params.quantile_outlier_prob else: quantile = 1 - params.quantile_outlier_prob kwargs.update({"quantile": [quantile]}) - if use_abs_max and statistic_type == StatisticsType.MAX: + # TODO(dlyakhov): merge two quantile aggregators in one + statistic_type = params.statistics_type + if collector_params.use_abs_max and statistic_type == StatisticsType.MAX: statistic_type = StatisticsType.ABS_MAX - reducer = ONNX_REDUCERS_MAP[statistic_type](reduction_axes=reduction_axes) + reducer = ONNX_REDUCERS_MAP[statistic_type](**kwargs) - aggregation_axes = (0,) aggregator = AGGREGATORS_MAP[params.aggregator_type]( - aggregation_axes=aggregation_axes, num_samples=num_samples, + aggregation_axes=aggregation_axes, tensor_processor=ONNXNNCFCollectorTensorProcessor, ) diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py index 2c3e00da871..0a398595827 100644 --- a/nncf/quantization/algorithms/min_max/openvino_backend.py +++ b/nncf/quantization/algorithms/min_max/openvino_backend.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Optional, Set, Tuple +from typing import Dict, List, Optional, Set import numpy as np @@ -17,11 +17,9 @@ from nncf.common.graph.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType -from nncf.common.graph.utils import get_channel_agnostic_reduction_axes from nncf.common.hardware.config import HWConfig from nncf.common.quantization.initialization.range import RangeInitCollectorParams from nncf.common.quantization.structs import QuantizerConfig -from nncf.common.tensor_statistics.collectors import ReductionAxes from nncf.experimental.common.tensor_statistics.collectors import AGGREGATORS_MAP from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.openvino.graph.layer_attributes import OVLayerAttributes @@ -148,50 +146,6 @@ def _get_activation_shape(target_point: OVTargetPoint, nncf_graph: NNCFGraph, no else: raise NotImplementedError(f"Unsupported target point type {target_point.type}.") - @staticmethod - def _get_reduction_aggregation_axes( - nncf_graph: NNCFGraph, target_point: OVTargetPoint, collector_params: RangeInitCollectorParams - ) -> Tuple[ReductionAxes, ReductionAxes]: - """ - Returns reduce and aggregation axes. The following logic is applied: - If target point is applied to weight: - *Aggregator aggregates only inner saved statistics, - *Reducer reduces all axes except channels for per-channel, for per-tensor - all axes. - If target point is applied to activations: - *Aggregator aggregates batch dimension. - *Reducer reduces all axes except channel and batch for per-channel, for per-tensor - all axes except batch. - - :param nncf_graph: NNCFGraph instance. - :param target_point: Point to collect statistics. - :param quantizer_config: Quantization configuration. - :return: Reduction axes for reducer and aggregation axes for aggregator. - """ - node = nncf_graph.get_node_by_name(target_point.target_node_name) - - if target_point.is_weight_target_point(): - aggregation_axes = None - assert isinstance(node.layer_attributes, OVLayerAttributes) - shape = node.layer_attributes.constant_attributes[target_point.port_id]["shape"] - if collector_params.is_per_channel: - channel_axes = get_weight_channel_axes(node) - reduction_axes = get_channel_agnostic_reduction_axes(channel_axes, shape) - else: - reduction_axes = tuple(range(len(shape))) - else: - # OpenVINO activations have channel first layout: [N, C, Z, Y, X] - batch_axis, channel_axis = 0, 1 - aggregation_axes = (batch_axis, channel_axis) - shape = OVMinMaxAlgoBackend._get_activation_shape(target_point, nncf_graph, node) - if collector_params.is_per_channel: - # Keep batch to aggregate and channel for per-channel FakeQuantize. - # TODO (l-bat): Disable quantizer propagation through layout changing operations - reduction_axes = get_channel_agnostic_reduction_axes((batch_axis, channel_axis), shape) - else: - # Keep batch to aggregate - reduction_axes = get_channel_agnostic_reduction_axes((batch_axis,), shape) - - return reduction_axes, aggregation_axes - @staticmethod def get_statistic_collector( range_estimator_params: RangeEstimatorParameters, @@ -201,9 +155,15 @@ def get_statistic_collector( inplace: bool, num_samples: int = None, ) -> TensorCollector: - reduction_axes, aggregation_axes = OVMinMaxAlgoBackend._get_reduction_aggregation_axes( - nncf_graph, target_point, collector_params - ) + node = nncf_graph.get_node_by_name(target_point.target_node_name) + if target_point.is_weight_target_point(): + assert isinstance(node.layer_attributes, OVLayerAttributes) + shape = node.layer_attributes.constant_attributes[target_point.port_id]["shape"] + channel_axes = get_weight_channel_axes(node) + else: + shape = OVMinMaxAlgoBackend._get_activation_shape(target_point, nncf_graph, node) + channel_axes = (1,) + reduction_axes, aggregation_axes = collector_params.get_reduction_aggregation_axes(shape, channel_axes) collector = TensorCollector(OVMinMaxTensorStatistic) for params, container_key in zip( diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py index 1247d9b84d2..8b65f8150fd 100644 --- a/nncf/quantization/algorithms/min_max/torch_backend.py +++ b/nncf/quantization/algorithms/min_max/torch_backend.py @@ -40,7 +40,6 @@ from nncf.torch.hardware.config import PTHWConfig from nncf.torch.nncf_network import NNCFNetwork from nncf.torch.quantization.default_quantization import DEFAULT_PT_QUANT_TRAIT_TO_OP_DICT -from nncf.torch.quantization.init_range import PTRangeInitCollectorParams from nncf.torch.quantization.layers import QUANTIZATION_MODULES from nncf.torch.quantization.layers import AsymmetricQuantizer from nncf.torch.quantization.layers import BaseQuantizer @@ -166,12 +165,10 @@ def get_statistic_collector( inplace: bool, num_samples: int = None, ) -> TensorCollector: - collector_params = PTMinMaxAlgoBackend._default_collector_params(nncf_graph, target_point, collector_params) - reduction_axes = collector_params.get_reduction_axes(per_sample_stats=True) - aggregation_axes = collector_params.get_aggregation_axes(per_sample_stats=True) - if target_point.is_weight_target_point(): - reduction_axes = collector_params.get_reduction_axes(per_sample_stats=False) - aggregation_axes = collector_params.get_aggregation_axes(per_sample_stats=False) + input_shape, _, channel_idx = PTMinMaxAlgoBackend._get_input_scale_shape( + nncf_graph, target_point, collector_params.is_per_channel + ) + reduction_axes, aggregation_axes = collector_params.get_reduction_aggregation_axes(input_shape, channel_idx) collector = TensorCollector(PTMinMaxTensorStatistic) for params, container_key in zip( [range_estimator_params.min, range_estimator_params.max], @@ -247,21 +244,6 @@ def _get_input_scale_shape( return input_shape, scale_shape, channel_idx - @staticmethod - def _default_collector_params( - nncf_graph: NNCFGraph, target_point: PTTargetPoint, collector_params: RangeInitCollectorParams - ) -> PTRangeInitCollectorParams: - input_shape, _, channel_idx = PTMinMaxAlgoBackend._get_input_scale_shape( - nncf_graph, target_point, collector_params.is_per_channel - ) - return PTRangeInitCollectorParams( - is_weights=collector_params.is_weights, - scheme=collector_params.scheme, - per_channel=collector_params.is_per_channel, - input_shape=input_shape, - channel_idx=channel_idx, - ) - @staticmethod def _create_quantizer( quantizer_config: QuantizerConfig, From e90ca3292fe952be60e849b4a407bde77a2b6c16 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 18 Jan 2024 12:32:23 +0100 Subject: [PATCH 027/108] get_channel_agnostic_reduction_axes -> get_reduction_axes --- nncf/common/graph/utils.py | 16 ++++++++-------- nncf/common/quantization/initialization/range.py | 8 ++++---- .../algorithms/channel_alignment/algorithm.py | 4 ++-- .../algorithms/smooth_quant/algorithm.py | 4 ++-- .../algorithms/weight_compression/algorithm.py | 2 +- .../algorithms/weight_compression/backend.py | 4 +--- .../weight_compression/openvino_backend.py | 8 +++----- .../weight_compression/torch_backend.py | 4 +--- nncf/torch/quantization/init_range.py | 4 ++-- .../test_tensor_collector_batch_size.py | 4 ++-- tests/common/graph/test_utils.py | 6 +++--- 11 files changed, 29 insertions(+), 35 deletions(-) diff --git a/nncf/common/graph/utils.py b/nncf/common/graph/utils.py index b5bd4fddce7..bf567db320e 100644 --- a/nncf/common/graph/utils.py +++ b/nncf/common/graph/utils.py @@ -10,7 +10,7 @@ # limitations under the License. from functools import partial -from typing import List, Optional, Set +from typing import List, Set, Union from nncf.common.graph import NNCFGraph from nncf.common.graph import NNCFNode @@ -117,15 +117,15 @@ def get_number_of_quantized_ops( return len(quantized_ops) -def get_channel_agnostic_reduction_axes(channel_axes: List[int], shape: List[int]) -> Optional[ReductionAxes]: +def get_reduction_axes(axes_to_keep: Union[List[int], ReductionAxes], shape_to_reduce: List[int]) -> ReductionAxes: """ - Returns filtered reduction axes without axes that corresponds channels. + Returns reduction axes without axes needed to keep. - :param channel_axes: List of the channel axes. - :param shape: Shape that need to be filtered. - :return: Reduction axes in tuple format. + :param axes_to_keep: Axes to keep. + :param shape_to_reduce: Shape to reduce. + :return: Reduction axes. """ - reduction_axes = list(range(len(shape))) - for channel_axis in sorted(channel_axes, reverse=True): + reduction_axes = list(range(len(shape_to_reduce))) + for channel_axis in sorted(axes_to_keep, reverse=True): del reduction_axes[channel_axis] return tuple(reduction_axes) diff --git a/nncf/common/quantization/initialization/range.py b/nncf/common/quantization/initialization/range.py index 64d7b74a688..58a0b0b6cb1 100644 --- a/nncf/common/quantization/initialization/range.py +++ b/nncf/common/quantization/initialization/range.py @@ -11,7 +11,7 @@ from typing import Dict, List, Optional, Tuple -from nncf.common.graph.utils import get_channel_agnostic_reduction_axes +from nncf.common.graph.utils import get_reduction_axes from nncf.common.initialization.dataloader import NNCFDataLoader from nncf.common.quantization.structs import QuantizationScheme from nncf.common.quantization.structs import QuantizerGroup @@ -218,7 +218,7 @@ def get_reduction_aggregation_axes(self, shape, channel_axes) -> Tuple[Reduction if self.is_weights: aggregation_axes = None if self.is_per_channel: - reduction_axes = get_channel_agnostic_reduction_axes(channel_axes, shape) + reduction_axes = get_reduction_axes(channel_axes, shape) else: reduction_axes = tuple(range(len(shape))) else: @@ -228,9 +228,9 @@ def get_reduction_aggregation_axes(self, shape, channel_axes) -> Tuple[Reduction if self.is_per_channel: # Keep batch to aggregate and channel for per-channel FakeQuantize. # TODO (l-bat): Disable quantizer propagation through layout changing operations - reduction_axes = get_channel_agnostic_reduction_axes(aggregation_axes, shape) + reduction_axes = get_reduction_axes(aggregation_axes, shape) else: # Keep batch to aggregate - reduction_axes = get_channel_agnostic_reduction_axes((batch_axis,), shape) + reduction_axes = get_reduction_axes((batch_axis,), shape) return reduction_axes, aggregation_axes diff --git a/nncf/quantization/algorithms/channel_alignment/algorithm.py b/nncf/quantization/algorithms/channel_alignment/algorithm.py index 81a2d2ba9f8..86e055bd7f2 100644 --- a/nncf/quantization/algorithms/channel_alignment/algorithm.py +++ b/nncf/quantization/algorithms/channel_alignment/algorithm.py @@ -22,7 +22,7 @@ from nncf.common.graph.transformations.commands import TargetPoint from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout -from nncf.common.graph.utils import get_channel_agnostic_reduction_axes +from nncf.common.graph.utils import get_reduction_axes from nncf.common.logging import nncf_logger from nncf.common.logging.track_progress import track from nncf.common.tensor_statistics.statistic_point import StatisticPoint @@ -390,7 +390,7 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin channel_axis = conv_in.metatype.output_channel_axis activation_shape = list(range(len(graph.get_output_edges(node_in)[0].tensor_shape))) - reduction_axes = get_channel_agnostic_reduction_axes([channel_axis], activation_shape) + reduction_axes = get_reduction_axes([channel_axis], activation_shape) statistic_collector = self._backend_entity.get_statistic_collector( reduction_axes, self._quantile, self.subset_size, self.inplace_statistics diff --git a/nncf/quantization/algorithms/smooth_quant/algorithm.py b/nncf/quantization/algorithms/smooth_quant/algorithm.py index 64b9bb065fa..a6a66f92344 100644 --- a/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -31,7 +31,7 @@ from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout -from nncf.common.graph.utils import get_channel_agnostic_reduction_axes +from nncf.common.graph.utils import get_reduction_axes from nncf.common.logging import nncf_logger from nncf.common.logging.track_progress import track from nncf.common.tensor_statistics.statistic_point import StatisticPoint @@ -346,7 +346,7 @@ def _calculate_input_reduction_axes(self, nncf_graph: NNCFGraph, node: NNCFNode, reduction_axes = tuple([]) if len(shape) > 1: channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port) - reduction_axes = get_channel_agnostic_reduction_axes(channel_axis, shape) + reduction_axes = get_reduction_axes(channel_axis, shape) return reduction_axes def _process_weight_statistics(self, node: NNCFNode, weights: TTensor) -> TTensor: diff --git a/nncf/quantization/algorithms/weight_compression/algorithm.py b/nncf/quantization/algorithms/weight_compression/algorithm.py index 3e8d50312e4..329fa46c9fb 100644 --- a/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -292,7 +292,7 @@ def do_compression( weight = self._backend_entity.get_weight(node, weight_port_id, model, graph) if weight.dtype not in [TensorDataType.float32, TensorDataType.float16, TensorDataType.float64]: continue - reduction_axes = self._backend_entity.get_channel_agnostic_reduction_axes(node, weight_port_id, graph) + reduction_axes = self._backend_entity.get_reduction_axes(node, weight_port_id, graph) if isinstance(reduction_axes, tuple) and len(reduction_axes) != 1: nncf_logger.warning( f"Weight compression expects a single reduction axis, but {len(reduction_axes)} given. " diff --git a/nncf/quantization/algorithms/weight_compression/backend.py b/nncf/quantization/algorithms/weight_compression/backend.py index 557f123eae7..4d72db7ed83 100644 --- a/nncf/quantization/algorithms/weight_compression/backend.py +++ b/nncf/quantization/algorithms/weight_compression/backend.py @@ -53,9 +53,7 @@ def is_node_with_weights(node: NNCFNode, graph: NNCFGraph) -> bool: @staticmethod @abstractmethod - def get_channel_agnostic_reduction_axes( - node_with_weight: NNCFNode, weight_port_id: int, graph: NNCFGraph - ) -> Optional[Tuple[int]]: + def get_reduction_axes(node_with_weight: NNCFNode, weight_port_id: int, graph: NNCFGraph) -> Optional[Tuple[int]]: """ Returns reduction axes without axes that corresponds to weight channels of the node with weight. diff --git a/nncf/quantization/algorithms/weight_compression/openvino_backend.py b/nncf/quantization/algorithms/weight_compression/openvino_backend.py index 6f2aeecea79..7ea3ab0087b 100644 --- a/nncf/quantization/algorithms/weight_compression/openvino_backend.py +++ b/nncf/quantization/algorithms/weight_compression/openvino_backend.py @@ -17,7 +17,7 @@ from nncf.common.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType -from nncf.common.graph.utils import get_channel_agnostic_reduction_axes +from nncf.common.graph.utils import get_reduction_axes from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.experimental.tensor.tensor import Tensor from nncf.openvino.graph.metatypes.openvino_metatypes import OVEmbeddingMetatype @@ -51,12 +51,10 @@ def is_node_with_weights(node: NNCFNode, graph: NNCFGraph) -> bool: return node.layer_attributes and node.layer_attributes.constant_attributes @staticmethod - def get_channel_agnostic_reduction_axes( - node_with_weight: NNCFNode, weight_port_id: int, graph: NNCFGraph - ) -> Optional[Tuple[int]]: + def get_reduction_axes(node_with_weight: NNCFNode, weight_port_id: int, graph: NNCFGraph) -> Optional[Tuple[int]]: channel_axes = get_weight_channel_axes(node_with_weight) const_shape = node_with_weight.layer_attributes.constant_attributes[weight_port_id]["shape"] - return get_channel_agnostic_reduction_axes(channel_axes, const_shape) + return get_reduction_axes(channel_axes, const_shape) @staticmethod def target_point(target_type: TargetType, target_node_name: str, port_id: int) -> OVTargetPoint: diff --git a/nncf/quantization/algorithms/weight_compression/torch_backend.py b/nncf/quantization/algorithms/weight_compression/torch_backend.py index 3cbe72f4d20..566b03d92e8 100644 --- a/nncf/quantization/algorithms/weight_compression/torch_backend.py +++ b/nncf/quantization/algorithms/weight_compression/torch_backend.py @@ -121,9 +121,7 @@ def get_weight_names_and_port_ids(node: NNCFNode, graph: NNCFGraph) -> List[Tupl return weight_port_ids @staticmethod - def get_channel_agnostic_reduction_axes( - node_with_weight: NNCFNode, weight_port_id: int, graph: NNCFGraph - ) -> Optional[Tuple[int]]: + def get_reduction_axes(node_with_weight: NNCFNode, weight_port_id: int, graph: NNCFGraph) -> Optional[Tuple[int]]: weight_node = get_weight_node(node_with_weight, weight_port_id, graph) ndims = len(weight_node.layer_attributes.shape) diff --git a/nncf/torch/quantization/init_range.py b/nncf/torch/quantization/init_range.py index 3a81fa61b03..e46173c7380 100644 --- a/nncf/torch/quantization/init_range.py +++ b/nncf/torch/quantization/init_range.py @@ -17,7 +17,7 @@ import torch from nncf.common.graph.layer_attributes import WeightedLayerAttributes -from nncf.common.graph.utils import get_channel_agnostic_reduction_axes +from nncf.common.graph.utils import get_reduction_axes from nncf.common.quantization.initialization.range import RangeInitCollectorParams from nncf.common.quantization.initialization.range import RangeInitConfig from nncf.common.quantization.initialization.range import RangeInitParams @@ -122,7 +122,7 @@ def get_reduction_axes(self, per_sample_stats: bool) -> ReductionAxes: if self.is_per_channel: val = (ndims + self._channel_idx) % ndims axes_to_remove.append(val) - return get_channel_agnostic_reduction_axes(axes_to_remove, reduction_axes) + return get_reduction_axes(axes_to_remove, reduction_axes) def get_aggregation_axes(self, per_sample_stats: bool) -> AggregationAxes: """ diff --git a/tests/common/experimental/test_tensor_collector_batch_size.py b/tests/common/experimental/test_tensor_collector_batch_size.py index b7363e52a58..7d83022ba78 100644 --- a/tests/common/experimental/test_tensor_collector_batch_size.py +++ b/tests/common/experimental/test_tensor_collector_batch_size.py @@ -15,7 +15,7 @@ import numpy as np import pytest -from nncf.common.graph.utils import get_channel_agnostic_reduction_axes +from nncf.common.graph.utils import get_reduction_axes from nncf.experimental.common.tensor_statistics.collectors import TensorCollector @@ -62,7 +62,7 @@ def _create_tensor_collector(self, shape, inplace, reducer, aggregator) -> Tenso batch_axis = 0 statistic_branch_random_name = "1" collector = TensorCollector(self.get_tensor_statistics_class()) - reduction_axes = get_channel_agnostic_reduction_axes([batch_axis], shape) + reduction_axes = get_reduction_axes([batch_axis], shape) aggregation_axes = (0, 1) kwargs = {"reduction_axes": reduction_axes, "inplace": inplace} reducer = reducer(**kwargs) diff --git a/tests/common/graph/test_utils.py b/tests/common/graph/test_utils.py index 9b391448cfc..7997ac010ec 100644 --- a/tests/common/graph/test_utils.py +++ b/tests/common/graph/test_utils.py @@ -11,8 +11,8 @@ import pytest -from nncf.common.graph.utils import get_channel_agnostic_reduction_axes from nncf.common.graph.utils import get_concat_axis +from nncf.common.graph.utils import get_reduction_axes TEST_CASES = [ ([(1, 1), (1, 1)], [(2, 1)], [0]), @@ -41,5 +41,5 @@ def test_get_concat_axis(input_shape, output_shape, possible_axes): ((1, 1, 12, 12), [1, 2], (0, 3)), ], ) -def test_get_channel_agnostic_reduction_axes(shape, channel_axes, ref_reduction_axes): - assert get_channel_agnostic_reduction_axes(channel_axes=channel_axes, shape=shape) == ref_reduction_axes +def test_get_reduction_axes(shape, channel_axes, ref_reduction_axes): + assert get_reduction_axes(channel_axes=channel_axes, shape=shape) == ref_reduction_axes From f078a78f731dc1e9fe596dbf44322ef5fa2a5609 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 18 Jan 2024 12:39:15 +0100 Subject: [PATCH 028/108] upd get_reduction_aggregation_axes --- .../quantization/initialization/range.py | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/nncf/common/quantization/initialization/range.py b/nncf/common/quantization/initialization/range.py index 58a0b0b6cb1..a656954f2ea 100644 --- a/nncf/common/quantization/initialization/range.py +++ b/nncf/common/quantization/initialization/range.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional, Tuple, Union from nncf.common.graph.utils import get_reduction_axes from nncf.common.initialization.dataloader import NNCFDataLoader @@ -208,29 +208,31 @@ def use_means_of_mins(self) -> bool: def use_means_of_maxs(self) -> bool: return not self._is_weights and not self._is_per_channel - def get_reduction_aggregation_axes(self, shape, channel_axes) -> Tuple[ReductionAxes, AggregationAxes]: + def get_reduction_aggregation_axes( + self, shape_to_reduce: Union[Tuple[int], List[int]], quantization_axes: Union[Tuple[int], List[int]] + ) -> Tuple[ReductionAxes, AggregationAxes]: """ - Calculates the reduction axes of the tensor. + Calculates the reduction axes, aggregation axes for the tensor. - :param per_sample_stats: Boolean flag that indicated whether statistics are collected per-sample or per-batch. - :return: Shape to reduce to. + :param shape_to_reduce: Shape of the tensor. + :param quantization_axes: Quantization axes if per-channel quantization. + :return: Reduction axes and aggregation axes. """ if self.is_weights: aggregation_axes = None if self.is_per_channel: - reduction_axes = get_reduction_axes(channel_axes, shape) + reduction_axes = get_reduction_axes(quantization_axes, shape_to_reduce) else: - reduction_axes = tuple(range(len(shape))) + reduction_axes = tuple(range(len(shape_to_reduce))) else: - # OpenVINO activations have channel first layout: [N, C, Z, Y, X] batch_axis = 0 - aggregation_axes = (batch_axis, *channel_axes) + aggregation_axes = (batch_axis, *quantization_axes) if self.is_per_channel: - # Keep batch to aggregate and channel for per-channel FakeQuantize. + # Keep batch to aggregate and channel for per-channel quantization. # TODO (l-bat): Disable quantizer propagation through layout changing operations - reduction_axes = get_reduction_axes(aggregation_axes, shape) + reduction_axes = get_reduction_axes(aggregation_axes, shape_to_reduce) else: # Keep batch to aggregate - reduction_axes = get_reduction_axes((batch_axis,), shape) + reduction_axes = get_reduction_axes((batch_axis,), shape_to_reduce) return reduction_axes, aggregation_axes From e4c57cd26792012415fe17984a46c4e72fba67bf Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 18 Jan 2024 13:28:41 +0100 Subject: [PATCH 029/108] upd aggregator --- nncf/common/tensor_statistics/aggregator.py | 37 +++++++++++++------ tests/onnx/quantization/test_batch_size.py | 17 +++++++++ .../test_templates/test_batch_size.py | 31 +++++++--------- tests/torch/ptq/test_batch_size.py | 17 +++++++++ 4 files changed, 73 insertions(+), 29 deletions(-) create mode 100644 tests/onnx/quantization/test_batch_size.py create mode 100644 tests/torch/ptq/test_batch_size.py diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index b83a0ab789d..30974dc1049 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -11,7 +11,7 @@ from abc import ABC from abc import abstractmethod from itertools import islice -from typing import Any, Dict, List, TypeVar +from typing import Any, Dict, List, Optional, TypeVar from nncf.common import factory from nncf.common.graph.graph import NNCFGraph @@ -36,21 +36,32 @@ def __init__(self, dataset: Dataset): self.dataset = dataset self.stat_subset_size = None self.batch_size = self.dataset.get_batch_size() or 1 - self.dataset_size = self.dataset.get_length() - self.dataset_size = self.dataset_size * self.batch_size if self.dataset_size is not None else self.dataset_size + dataset_len = self.dataset.get_length() + self.dataset_sample_size = dataset_len * self.batch_size if dataset_len is not None else dataset_len self.statistic_points = StatisticPointsContainer() - def _get_total_calibration_samples( + def _get_total_statistics_samples( self, - ): + ) -> Optional[int]: + """ + Returns total number of statistics samples used. + + :return: Total number of statistics samples used. + """ return ( - min(self.dataset_size or self.stat_subset_size, self.stat_subset_size) + min(self.dataset_sample_size or self.stat_subset_size, self.stat_subset_size) if self.stat_subset_size is not None else None ) - def _get_iterations_num(self, calibration_samples_num): - return calibration_samples_num // self.batch_size if calibration_samples_num is not None else None + def _get_iterations_num(self, total_statistics_samples: Optional[int]) -> Optional[int]: + """ + Returns number of iterations to collect statistics. + + :param total_statistics_samples: Number of statistics samples are used. + :return: Iterations number statistics collection. + """ + return total_statistics_samples // self.batch_size if total_statistics_samples is not None else None def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: """ @@ -61,7 +72,10 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: :param graph: Model graph. """ if self.batch_size > 1 and self.is_model_batch_size_limited_support(graph): - nncf_logger.warning("The batch size > 1 for the specific model can lead to accuracy degradation") + nncf_logger.warning( + "The batch size > 1 for the particular model can lead to accuracy degradation. \ + To collect the most appropriate statistics it is recommended to use batch size = 1." + ) if not self.statistic_points: return model_transformer = factory.ModelTransformerFactory.create(model) @@ -70,11 +84,10 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: model_with_outputs = model_transformer.transform(transformation_layout) engine = factory.EngineFactory.create(model_with_outputs) - calibration_samples_num = self._get_total_calibration_samples() + calibration_samples_num = self._get_total_statistics_samples() iterataions_num = self._get_iterations_num(calibration_samples_num) if iterataions_num == 0: - nncf_logger.error("Iterations num is 0") - iterataions_num = 1 + raise ValueError("Batch size > length of dataset or batch size > stat_subset_size.") collected_statistics_num = 0 with track(total=calibration_samples_num, description="Statistics collection") as pbar: for input_data in islice(self.dataset.get_inference_data(), iterataions_num): diff --git a/tests/onnx/quantization/test_batch_size.py b/tests/onnx/quantization/test_batch_size.py new file mode 100644 index 00000000000..debfd1382d5 --- /dev/null +++ b/tests/onnx/quantization/test_batch_size.py @@ -0,0 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from nncf.onnx.statistics.aggregator import ONNXStatisticsAggregator +from tests.post_training.test_templates.test_batch_size import TemplateTestBatchSize + + +class TestOVBatchSize(TemplateTestBatchSize): + def create_statistics_aggregator(self, dataset): + return ONNXStatisticsAggregator(dataset) diff --git a/tests/post_training/test_templates/test_batch_size.py b/tests/post_training/test_templates/test_batch_size.py index 02b7ecc9734..a1a0bb917b6 100644 --- a/tests/post_training/test_templates/test_batch_size.py +++ b/tests/post_training/test_templates/test_batch_size.py @@ -23,7 +23,7 @@ class DataForTest: batch_size: int dataset_len: int - subset_size: int + stat_subset_size: int ref_calibration_samples_num: int ref_iterations_num: int @@ -36,33 +36,30 @@ def create_statistics_aggregator(self, dataset) -> StatisticsAggregator: def create_dataset(self, lenght, batch_size): dataset = get_static_dataset(None, None, None, lenght) dataset._data_source.batch_size = batch_size - # print(dataset.get_batch_size()) return dataset @pytest.mark.parametrize( ("test_data"), ( - [ - # DataForTest(None, 1000, None, None, None), - # DataForTest(1, 1000, 300, 300, 300), - # DataForTest(10, 1000, 300, 300, 30), - # DataForTest(300, 1000, 300, 300, 1), - # DataForTest(301, 1000, 300, 300, 0), - # DataForTest(301, 1000, 300, 300, 0), # batch_size > subset_size - DataForTest(300, 10, 300, 10, 0), # batch_size > len(dataset) - # DataForTest(300, 10, 300, 10, 0), # batch_size > len(dataset) + [ # batch_size | dataset_len | stat_subset_size | ref_calibration_samples_num | ref_iterations_num + DataForTest(None, None, None, None, None), # None is None + DataForTest(1, 1000, 300, 300, 300), + DataForTest(10, 1000, 300, 300, 30), + DataForTest(300, 1000, 300, 300, 1), + DataForTest(301, 1000, 300, 300, 0), # batch_size > stat_subset_size + DataForTest(10, 10, 300, 100, 10), # len(dataset) * batch_size < subset_size + DataForTest(11, 300, 300, 300, 27), # stat_subset_size % batch_size != 0 ] ), ) def test_batch_size_subset(self, test_data): - batch_size, dataset_length, subset_size, ref_calibration_samples_num, ref_iterations_num = ( + batch_size, dataset_length, stat_subset_size, ref_calibration_samples_num, ref_iterations_num = ( getattr(test_data, field.name) for field in fields(test_data) ) dataset = self.create_dataset(dataset_length, batch_size) statistics_aggregator = self.create_statistics_aggregator(dataset) - statistics_aggregator.stat_subset_size = subset_size - print(statistics_aggregator.dataset_size) - calibration_samples_num = statistics_aggregator._get_total_calibration_samples() - assert calibration_samples_num == ref_calibration_samples_num - iterataions_num = statistics_aggregator._get_iterations_num(calibration_samples_num) + statistics_aggregator.stat_subset_size = stat_subset_size + total_calibration_samples = statistics_aggregator._get_total_statistics_samples() + assert total_calibration_samples == ref_calibration_samples_num + iterataions_num = statistics_aggregator._get_iterations_num(total_calibration_samples) assert iterataions_num == ref_iterations_num diff --git a/tests/torch/ptq/test_batch_size.py b/tests/torch/ptq/test_batch_size.py new file mode 100644 index 00000000000..d10eae83cde --- /dev/null +++ b/tests/torch/ptq/test_batch_size.py @@ -0,0 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from nncf.torch.statistics.aggregator import PTStatisticsAggregator +from tests.post_training.test_templates.test_batch_size import TemplateTestBatchSize + + +class TestOVBatchSize(TemplateTestBatchSize): + def create_statistics_aggregator(self, dataset): + return PTStatisticsAggregator(dataset) From d226074d5cc15a067896b3e67700a2366e9fbb96 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 18 Jan 2024 13:40:26 +0100 Subject: [PATCH 030/108] fix OV test --- .../algorithms/smooth_quant/algorithm.py | 2 +- tests/post_training/test_templates/helpers.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/nncf/quantization/algorithms/smooth_quant/algorithm.py b/nncf/quantization/algorithms/smooth_quant/algorithm.py index a6a66f92344..7bdc33725d4 100644 --- a/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -346,7 +346,7 @@ def _calculate_input_reduction_axes(self, nncf_graph: NNCFGraph, node: NNCFNode, reduction_axes = tuple([]) if len(shape) > 1: channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port) - reduction_axes = get_reduction_axes(channel_axis, shape) + reduction_axes = get_reduction_axes((channel_axis,), shape) return reduction_axes def _process_weight_statistics(self, node: NNCFNode, weights: TTensor) -> TTensor: diff --git a/tests/post_training/test_templates/helpers.py b/tests/post_training/test_templates/helpers.py index 9f69e83f1a9..c242b6558ce 100644 --- a/tests/post_training/test_templates/helpers.py +++ b/tests/post_training/test_templates/helpers.py @@ -30,7 +30,12 @@ class StaticDatasetMock: to convert data to backend specific type. """ - def __init__(self, input_size: Tuple, length: int = 1, fn_to_type: Callable = None): + def __init__( + self, + input_size: Tuple, + fn_to_type: Callable = None, + length: int = 1, + ): super().__init__() self._len = length self._input_size = input_size @@ -55,7 +60,10 @@ def get_static_dataset(input_size: Tuple, transform_fn: Callable, fn_to_type: Ca :param fn_to_type: Function, defaults to None. :return: Instance of nncf.Dataset for StaticDatasetMock. """ - return Dataset(StaticDatasetMock(input_size, length, fn_to_type), transform_fn) + return Dataset( + StaticDatasetMock(input_size, fn_to_type, length), + transform_fn, + ) class ConvTestModel(nn.Module): From f502de56421c8f143b0ddd77a38e9d4f6d1d107a Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 18 Jan 2024 13:57:24 +0100 Subject: [PATCH 031/108] fix ONNX test --- nncf/onnx/graph/node_utils.py | 8 ++------ nncf/quantization/algorithms/min_max/onnx_backend.py | 4 ++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/nncf/onnx/graph/node_utils.py b/nncf/onnx/graph/node_utils.py index 84cc32755b0..b03f963958d 100644 --- a/nncf/onnx/graph/node_utils.py +++ b/nncf/onnx/graph/node_utils.py @@ -217,18 +217,14 @@ def get_quantized_tensor_shape( return _get_activation_tensor_shape(nncf_graph, node, target_point) -def get_quantization_axis(is_per_channel: bool, node: NNCFNode, target_point: ONNXTargetPoint) -> Optional[int]: +def get_quantization_axis(node: NNCFNode, target_point: ONNXTargetPoint) -> int: """ Returns axis of quantizer parameters are calculated along. - If quantization is per-tensor returns None. - :param is_per_channel: True if quantizater is per-channel. :param node: NNCFNode. :param target_point: Target point indicates the quantizer place in the model graph. - :return: None if per-tensor, otherwise quantizion axis. + :return: Quantizion axis. """ - if not is_per_channel: - return None if target_point.is_weight_target_point(): return _get_weight_quantization_axis(node, target_point.port_id) return _get_activation_quantization_axis() diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py index be922f2531a..46d623500e6 100644 --- a/nncf/quantization/algorithms/min_max/onnx_backend.py +++ b/nncf/quantization/algorithms/min_max/onnx_backend.py @@ -124,7 +124,7 @@ def create_quantizer_insertion_command( tensor_type = np.int8 # The weight is restricted to have only signed range nncf_input_node_next_nodes = ONNXMinMaxAlgoBackend._get_input_edges_mapping(nncf_graph) node = nncf_graph.get_node_by_name(target_point.target_node_name) - axis = get_quantization_axis(quantizer_config.per_channel, node, target_point) + axis = get_quantization_axis(node, target_point) if quantizer_config.per_channel else None onnx_parameters = convert_fq_params_to_onnx_params(parameters, quantizer_config.num_bits, tensor_type, axis) return ONNXQuantizerInsertionCommand(target_point, nncf_input_node_next_nodes, onnx_parameters) @@ -162,7 +162,7 @@ def get_statistic_collector( ) -> TensorCollector: node = nncf_graph.get_node_by_name(target_point.target_node_name) shape = get_quantized_tensor_shape(nncf_graph, node, target_point) - channel_axis = get_quantization_axis(collector_params.is_per_channel, node, target_point) + channel_axis = get_quantization_axis(node, target_point) reduction_axes, aggregation_axes = collector_params.get_reduction_aggregation_axes(shape, [channel_axis]) collector = TensorCollector(ONNXMinMaxTensorStatistic) for params, container_key in zip( From 83d03cb1e7af59f24d04f3e26d99dc4e8d9c5f48 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 18 Jan 2024 14:05:24 +0100 Subject: [PATCH 032/108] tests --- nncf/common/tensor_statistics/aggregator.py | 7 ------- nncf/quantization/algorithms/min_max/onnx_backend.py | 5 ++++- tests/common/test_statistics_aggregator.py | 4 ++-- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 30974dc1049..861a6ff8b82 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -88,20 +88,13 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: iterataions_num = self._get_iterations_num(calibration_samples_num) if iterataions_num == 0: raise ValueError("Batch size > length of dataset or batch size > stat_subset_size.") - collected_statistics_num = 0 with track(total=calibration_samples_num, description="Statistics collection") as pbar: for input_data in islice(self.dataset.get_inference_data(), iterataions_num): outputs = engine.infer(input_data) processed_outputs = self._process_outputs(outputs) self._register_statistics(processed_outputs, merged_statistics) - collected_statistics_num += self.batch_size pbar.progress.update(pbar.task, advance=self.batch_size) - if collected_statistics_num == 0: - raise RuntimeError( - "Calibration dataset must not be empty. Please provide calibration dataset with at least one sample." - ) - def register_statistic_points(self, statistic_points: StatisticPointsContainer) -> None: """ Register statistic points for statistics collection and recalculates the maximum number samples diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py index 46d623500e6..f3bca4c3c91 100644 --- a/nncf/quantization/algorithms/min_max/onnx_backend.py +++ b/nncf/quantization/algorithms/min_max/onnx_backend.py @@ -163,7 +163,10 @@ def get_statistic_collector( node = nncf_graph.get_node_by_name(target_point.target_node_name) shape = get_quantized_tensor_shape(nncf_graph, node, target_point) channel_axis = get_quantization_axis(node, target_point) - reduction_axes, aggregation_axes = collector_params.get_reduction_aggregation_axes(shape, [channel_axis]) + if shape is None: # No information about shape + reduction_axes, aggregation_axes = None, (0, 1) # default per-tensor values + else: + reduction_axes, aggregation_axes = collector_params.get_reduction_aggregation_axes(shape, [channel_axis]) collector = TensorCollector(ONNXMinMaxTensorStatistic) for params, container_key in zip( [range_estimator_params.min, range_estimator_params.max], diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py index 34f737e52b3..6405c8fd7c1 100644 --- a/tests/common/test_statistics_aggregator.py +++ b/tests/common/test_statistics_aggregator.py @@ -921,6 +921,6 @@ def test_collect_with_empty_dataset(self, dataset_samples): statistics_aggregator = self.get_statistics_aggregator(dataset) statistics_aggregator.register_statistic_points(statistics_points) - with pytest.raises(RuntimeError) as e: + with pytest.raises(ValueError) as e: statistics_aggregator.collect_statistics(model, graph) - assert "Calibration dataset must not be empty" in e.info + assert "Batch size > length of dataset or batch size > stat_subset_size." in e.info From fbfe5871856e43c0424ce89ab18e4cba4478655e Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 18 Jan 2024 14:17:02 +0100 Subject: [PATCH 033/108] fix torch tests --- nncf/quantization/algorithms/min_max/torch_backend.py | 2 +- tests/torch/ptq/test_graphs.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py index 8b65f8150fd..d2f83cc8b8e 100644 --- a/nncf/quantization/algorithms/min_max/torch_backend.py +++ b/nncf/quantization/algorithms/min_max/torch_backend.py @@ -168,7 +168,7 @@ def get_statistic_collector( input_shape, _, channel_idx = PTMinMaxAlgoBackend._get_input_scale_shape( nncf_graph, target_point, collector_params.is_per_channel ) - reduction_axes, aggregation_axes = collector_params.get_reduction_aggregation_axes(input_shape, channel_idx) + reduction_axes, aggregation_axes = collector_params.get_reduction_aggregation_axes(input_shape, (channel_idx,)) collector = TensorCollector(PTMinMaxTensorStatistic) for params, container_key in zip( [range_estimator_params.min, range_estimator_params.max], diff --git a/tests/torch/ptq/test_graphs.py b/tests/torch/ptq/test_graphs.py index a1d6f524b7f..6afeeaf3eb2 100644 --- a/tests/torch/ptq/test_graphs.py +++ b/tests/torch/ptq/test_graphs.py @@ -20,6 +20,7 @@ from nncf.torch.layers import NNCF_RNN from nncf.torch.layers import LSTMCellNNCF from tests.post_training.test_templates.helpers import EmbeddingModel +from tests.post_training.test_templates.helpers import get_static_dataset from tests.torch import test_models from tests.torch.ptq.helpers import get_nncf_network from tests.torch.ptq.helpers import mock_collect_statistics @@ -101,7 +102,7 @@ def test_min_max_classification_quantized_graphs(desc: ModelDesc, quantization_p quantization_algorithm = PostTrainingQuantization(**quantization_parameters) quantized_model = quantization_algorithm.apply( - nncf_network, nncf_network.nncf.get_graph(), dataset=None - ) # TODO: could dataset be None? + nncf_network, nncf_network.nncf.get_graph(), dataset=get_static_dataset(desc.input_sample_sizes, None, None) + ) check_graph(quantized_model.nncf.get_graph(), desc.dot_filename(), graph_dir) From 0ae6ac407c92e96074b9bf84c8b34574d71b68ef Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 18 Jan 2024 14:22:39 +0100 Subject: [PATCH 034/108] fix tests --- tests/common/graph/test_utils.py | 2 +- .../data/reference_scales/activation_matmul_model_mixed.json | 4 ++-- .../reference_scales/activation_matmul_model_performance.json | 2 +- tests/onnx/data/reference_scales/linear_model_mixed.json | 2 +- .../one_depthwise_convolutional_model_mixed.json | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/common/graph/test_utils.py b/tests/common/graph/test_utils.py index 7997ac010ec..10f35f112bb 100644 --- a/tests/common/graph/test_utils.py +++ b/tests/common/graph/test_utils.py @@ -42,4 +42,4 @@ def test_get_concat_axis(input_shape, output_shape, possible_axes): ], ) def test_get_reduction_axes(shape, channel_axes, ref_reduction_axes): - assert get_reduction_axes(channel_axes=channel_axes, shape=shape) == ref_reduction_axes + assert get_reduction_axes(channel_axes, shape) == ref_reduction_axes diff --git a/tests/onnx/data/reference_scales/activation_matmul_model_mixed.json b/tests/onnx/data/reference_scales/activation_matmul_model_mixed.json index 7080679f2b8..40aa2a1bfc8 100644 --- a/tests/onnx/data/reference_scales/activation_matmul_model_mixed.json +++ b/tests/onnx/data/reference_scales/activation_matmul_model_mixed.json @@ -1,7 +1,7 @@ { "QuantizeLinear_X_1": { - "scale": 0.006937139667570591, - "zero_point": -11 + "scale": 0.0007594539201818407, + "zero_point": 0 }, "QuantizeLinear_Y_1": { "scale": 0.006937139667570591, diff --git a/tests/onnx/data/reference_scales/activation_matmul_model_performance.json b/tests/onnx/data/reference_scales/activation_matmul_model_performance.json index 0b82b1366b8..1d0b29a44dc 100644 --- a/tests/onnx/data/reference_scales/activation_matmul_model_performance.json +++ b/tests/onnx/data/reference_scales/activation_matmul_model_performance.json @@ -1,6 +1,6 @@ { "QuantizeLinear_X_1": { - "scale": 0.00749011617153883, + "scale": 0.0021499551367014647, "zero_point": 0 }, "QuantizeLinear_Y_1": { diff --git a/tests/onnx/data/reference_scales/linear_model_mixed.json b/tests/onnx/data/reference_scales/linear_model_mixed.json index 2cbfd2a51d8..8f2c40e5ffb 100644 --- a/tests/onnx/data/reference_scales/linear_model_mixed.json +++ b/tests/onnx/data/reference_scales/linear_model_mixed.json @@ -1,6 +1,6 @@ { "QuantizeLinear_X_1": { - "scale": 0.00786584708839655, + "scale": 0.007865846157073975, "zero_point": -1 }, "QuantizeLinear_Conv1_W_1": { diff --git a/tests/onnx/data/reference_scales/one_depthwise_convolutional_model_mixed.json b/tests/onnx/data/reference_scales/one_depthwise_convolutional_model_mixed.json index a4a2295cf2e..b062a9b0ebe 100644 --- a/tests/onnx/data/reference_scales/one_depthwise_convolutional_model_mixed.json +++ b/tests/onnx/data/reference_scales/one_depthwise_convolutional_model_mixed.json @@ -1,7 +1,7 @@ { "QuantizeLinear_X_1": { "scale": [ - 0.007609957829117775, + 0.007609957363456488, 0.007633729372173548, 0.007594745140522718 ], From 496339f5f678a6096b27fff559114c095d7d32a4 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 18 Jan 2024 14:30:49 +0100 Subject: [PATCH 035/108] common tests --- .../quantization/test_quantizer_config.py | 16 ------------ .../quantization/test_quantizer_config.py | 25 ------------------- .../test_templates/test_quantizer_config.py | 24 +++++++++++++++--- tests/torch/ptq/test_quantizer_config.py | 13 ---------- 4 files changed, 21 insertions(+), 57 deletions(-) diff --git a/tests/onnx/quantization/test_quantizer_config.py b/tests/onnx/quantization/test_quantizer_config.py index 3b77d5df243..ce604c1065e 100644 --- a/tests/onnx/quantization/test_quantizer_config.py +++ b/tests/onnx/quantization/test_quantizer_config.py @@ -11,7 +11,6 @@ import pytest -from nncf.common.graph.transformations.commands import TargetType from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXAddLayerMetatype from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXConvolutionMetatype from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXDepthwiseConvolutionMetatype @@ -22,26 +21,11 @@ from tests.post_training.test_templates.models import NNCFGraphToTestSumAggregation from tests.post_training.test_templates.test_quantizer_config import TemplateTestQuantizerConfig -ParamsCls = TemplateTestQuantizerConfig.TestGetStatisticsCollectorParameters - class TestQuantizerConfig(TemplateTestQuantizerConfig): def get_algo_backend(self): return ONNXMinMaxAlgoBackend() - @pytest.fixture( - params=[ - pytest.param( - (TargetType.PRE_LAYER_OPERATION, "/Sum_1_0", (0, 2), (0, 1, 2)), - marks=pytest.mark.skip("Ticket 102414: remove hardcoded axes for activations"), - ), - (TargetType.POST_LAYER_OPERATION, "/Conv_1_0", (0, 2, 3), None), - (TargetType.OPERATION_WITH_WEIGHTS, "/Conv_1_0", (1, 2, 3), None), - ] - ) - def statistic_collector_parameters(self, request) -> ParamsCls: - return ParamsCls(*request.param) - @pytest.fixture def single_conv_nncf_graph(self) -> NNCFGraphToTest: conv_layer_attrs = ONNXLayerAttributes(weight_attrs={1: {"shape": [4, 4, 4, 4]}}, bias_attrs={}) diff --git a/tests/openvino/native/quantization/test_quantizer_config.py b/tests/openvino/native/quantization/test_quantizer_config.py index 24fd7334ca7..fa625f96b72 100644 --- a/tests/openvino/native/quantization/test_quantizer_config.py +++ b/tests/openvino/native/quantization/test_quantizer_config.py @@ -11,7 +11,6 @@ import pytest -from nncf.common.graph.transformations.commands import TargetType from nncf.openvino.graph.layer_attributes import OVLayerAttributes from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVDepthwiseConvolutionMetatype @@ -22,35 +21,11 @@ from tests.post_training.test_templates.models import NNCFGraphToTestSumAggregation from tests.post_training.test_templates.test_quantizer_config import TemplateTestQuantizerConfig -ParamsCls = TemplateTestQuantizerConfig.TestGetStatisticsCollectorParameters - class TestQuantizerConfig(TemplateTestQuantizerConfig): def get_algo_backend(self): return OVMinMaxAlgoBackend() - @pytest.fixture( - params=[ - pytest.param( - (TargetType.PRE_LAYER_OPERATION, "/Sum_1_0", (2,), (1, 2)), - ), - ( - TargetType.POST_LAYER_OPERATION, - "/Conv_1_0", - (2, 3), - (1, 2, 3), - ), - ( - TargetType.OPERATION_WITH_WEIGHTS, - "/Conv_1_0", - (1, 2, 3), - (0, 1, 2, 3), - ), - ] - ) - def statistic_collector_parameters(self, request) -> ParamsCls: - return ParamsCls(*request.param) - @pytest.fixture def single_conv_nncf_graph(self) -> NNCFGraphToTest: conv_layer_attrs = OVLayerAttributes({0: {"name": "dummy", "shape": (4, 4, 4, 4)}}) diff --git a/tests/post_training/test_templates/test_quantizer_config.py b/tests/post_training/test_templates/test_quantizer_config.py index 66669988991..266998c2fa9 100644 --- a/tests/post_training/test_templates/test_quantizer_config.py +++ b/tests/post_training/test_templates/test_quantizer_config.py @@ -85,10 +85,27 @@ class TestGetStatisticsCollectorParameters: ref_per_ch_reduction_axes: List[int] ref_per_tensor_reduction_axes: List[int] - @abstractmethod - @pytest.fixture + @pytest.fixture( + params=[ + pytest.param( + TestGetStatisticsCollectorParameters(TargetType.PRE_LAYER_OPERATION, "/Sum_1_0", (2,), (1, 2)), + ), + TestGetStatisticsCollectorParameters( + TargetType.POST_LAYER_OPERATION, + "/Conv_1_0", + (2, 3), + (1, 2, 3), + ), + TestGetStatisticsCollectorParameters( + TargetType.OPERATION_WITH_WEIGHTS, + "/Conv_1_0", + (1, 2, 3), + (0, 1, 2, 3), + ), + ] + ) def statistic_collector_parameters(self, request) -> TestGetStatisticsCollectorParameters: - pass + return request.param def test_default_quantizer_config(self, single_conv_nncf_graph): min_max_algo = MinMaxQuantization() @@ -231,6 +248,7 @@ def test_get_stat_collector( statistic_collector_parameters: TestGetStatisticsCollectorParameters, ): params = statistic_collector_parameters + print(params) min_max_algo = MinMaxQuantization(activations_range_estimator_params=range_estimator_params) min_max_algo._backend_entity = self.get_algo_backend() q_config = QuantizerConfig(num_bits=8, mode=q_config_mode, per_channel=q_config_per_channel) diff --git a/tests/torch/ptq/test_quantizer_config.py b/tests/torch/ptq/test_quantizer_config.py index 916e5325574..62aa42d0374 100644 --- a/tests/torch/ptq/test_quantizer_config.py +++ b/tests/torch/ptq/test_quantizer_config.py @@ -11,7 +11,6 @@ import pytest -from nncf.common.graph.transformations.commands import TargetType from nncf.quantization.algorithms.min_max.torch_backend import PTMinMaxAlgoBackend from tests.post_training.test_templates.models import NNCFGraphToTest from tests.post_training.test_templates.models import NNCFGraphToTestDepthwiseConv @@ -21,23 +20,11 @@ from tests.torch.ptq.helpers import get_single_conv_nncf_graph from tests.torch.ptq.helpers import get_sum_aggregation_nncf_graph -ParamsCls = TemplateTestQuantizerConfig.TestGetStatisticsCollectorParameters - class TestQuantizerConfig(TemplateTestQuantizerConfig): def get_algo_backend(self): return PTMinMaxAlgoBackend() - @pytest.fixture( - params=[ - (TargetType.PRE_LAYER_OPERATION, "/Sum_1_0", (2,), (1, 2)), - (TargetType.POST_LAYER_OPERATION, "/Conv_1_0", (2, 3), (1, 2, 3)), - (TargetType.OPERATION_WITH_WEIGHTS, "/Conv_1_0", (1, 2, 3), (0, 1, 2, 3)), - ] - ) - def statistic_collector_parameters(self, request) -> ParamsCls: - return ParamsCls(*request.param) - @pytest.fixture def single_conv_nncf_graph(self) -> NNCFGraphToTest: return get_single_conv_nncf_graph() From bcce5846d7c34f12170eccf0f9f0b41b7b290f9a Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 18 Jan 2024 14:34:48 +0100 Subject: [PATCH 036/108] add docs --- nncf/common/tensor_statistics/aggregator.py | 10 +++++++--- nncf/onnx/graph/metatypes/groups.py | 2 +- nncf/openvino/graph/metatypes/groups.py | 1 + nncf/torch/graph/operator_metatypes.py | 1 + 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 861a6ff8b82..3fb260d06ae 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -117,8 +117,10 @@ def register_statistic_points(self, statistic_points: StatisticPointsContainer) def is_model_batch_size_limited_support(self, graph: NNCFGraph) -> bool: """ - :param NNCFGraph graph: _description_ - :return bool: _description_ + Returns True if NNCFGraph contains metatypes with no batch axis in output tensor. + + :param graph: NNCFGraph + :return: True if NNCFGraph contains metatypes with no batch axis in output tensor. """ for metatype in self.metatypes_output_has_no_batch_axis: if metatype in set(node.metatype for node in graph.get_all_nodes()): @@ -128,7 +130,9 @@ def is_model_batch_size_limited_support(self, graph: NNCFGraph) -> bool: @property @abstractmethod def metatypes_output_has_no_batch_axis(self) -> List[OperatorMetatype]: - """ """ + """ + Metatypes with no batch axis in output tensor. These metatypes lead to accuracy degradation when batch size > 1. + """ @abstractmethod def _register_statistics(self, outputs: Dict[str, NNCFTensor], statistic_points: StatisticPointsContainer) -> None: diff --git a/nncf/onnx/graph/metatypes/groups.py b/nncf/onnx/graph/metatypes/groups.py index 5e61605f618..7687d0e5c40 100644 --- a/nncf/onnx/graph/metatypes/groups.py +++ b/nncf/onnx/graph/metatypes/groups.py @@ -122,7 +122,7 @@ onnx_metatypes.ONNXDepthwiseConvolutionMetatype, ] - +# These metatypes lead to inaccurate statistics when batch size > 1. OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS = [ onnx_metatypes.ONNXROIAlignMetatype, onnx_metatypes.ONNXEmbeddingMetatype, diff --git a/nncf/openvino/graph/metatypes/groups.py b/nncf/openvino/graph/metatypes/groups.py index f24f5420c9b..3d0b96145d3 100644 --- a/nncf/openvino/graph/metatypes/groups.py +++ b/nncf/openvino/graph/metatypes/groups.py @@ -198,6 +198,7 @@ ov_metatypes.OVGroupConvolutionBackpropDataMetatype, ] +# These metatypes lead to inaccurate statistics when batch size > 1. OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS = [ ov_metatypes.OVSpaceToBatchMetatype, ov_metatypes.OVROIPoolingMetatype, diff --git a/nncf/torch/graph/operator_metatypes.py b/nncf/torch/graph/operator_metatypes.py index 009561b1797..ced77311ad9 100644 --- a/nncf/torch/graph/operator_metatypes.py +++ b/nncf/torch/graph/operator_metatypes.py @@ -1052,6 +1052,7 @@ def get_operator_metatypes() -> List[Type[OperatorMetatype]]: OP_NAMES_QUANTIZE_NODE = ["symmetric_quantize", "asymmetric_quantize"] +# These metatypes lead to inaccurate statistics when batch size > 1. OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS = [ PTEmbeddingMetatype, PTEmbeddingBagMetatype, From e5950e0c294a68f316224a7f62242770160af0cd Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 18 Jan 2024 14:35:33 +0100 Subject: [PATCH 037/108] comment --- nncf/data/dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nncf/data/dataset.py b/nncf/data/dataset.py index 9c3ceb83e8d..8f4a110120d 100644 --- a/nncf/data/dataset.py +++ b/nncf/data/dataset.py @@ -83,9 +83,9 @@ def get_length(self) -> Optional[int]: def get_batch_size(self) -> Optional[int]: """ """ - if hasattr(self._data_source, "batch_size"): # Torch + if hasattr(self._data_source, "batch_size"): # Torch dataloader return self._data_source.batch_size - if hasattr(self._data_source, "_batch_size"): # TF + if hasattr(self._data_source, "_batch_size"): # TF dataloader return self._data_source._batch_size return None From 41f27b5816e1e8df76a11910db02f480a6fbb314 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 19 Jan 2024 10:29:00 +0100 Subject: [PATCH 038/108] rollback changes for torch possible impact qat --- nncf/torch/quantization/init_range.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nncf/torch/quantization/init_range.py b/nncf/torch/quantization/init_range.py index e46173c7380..62dd18a4261 100644 --- a/nncf/torch/quantization/init_range.py +++ b/nncf/torch/quantization/init_range.py @@ -17,7 +17,6 @@ import torch from nncf.common.graph.layer_attributes import WeightedLayerAttributes -from nncf.common.graph.utils import get_reduction_axes from nncf.common.quantization.initialization.range import RangeInitCollectorParams from nncf.common.quantization.initialization.range import RangeInitConfig from nncf.common.quantization.initialization.range import RangeInitParams @@ -116,13 +115,14 @@ def get_reduction_axes(self, per_sample_stats: bool) -> ReductionAxes: """ ndims = len(self._input_shape) reduction_axes: List[int] = list(range(ndims)) - axes_to_remove = [] - if self.use_per_sample_stats(per_sample_stats): - axes_to_remove.append(0) if self.is_per_channel: val = (ndims + self._channel_idx) % ndims - axes_to_remove.append(val) - return get_reduction_axes(axes_to_remove, reduction_axes) + reduction_axes.remove(val) + if not val and self.use_per_sample_stats(per_sample_stats): + raise RuntimeError("Batch dimension should be equal to zero") + if self.use_per_sample_stats(per_sample_stats): + reduction_axes = reduction_axes[1:] # Assumes batch is the first dimension + return tuple(reduction_axes) def get_aggregation_axes(self, per_sample_stats: bool) -> AggregationAxes: """ @@ -131,7 +131,7 @@ def get_aggregation_axes(self, per_sample_stats: bool) -> AggregationAxes: :param per_sample_stats: Boolean flag that indicated whether statistics are collected per-sample or per-batch. :return: Shape to aggregate to. """ - return (0, 1) if self.use_per_sample_stats(per_sample_stats) else None + return (0, 1) if self.use_per_sample_stats(per_sample_stats) else (0,) class StatCollectorGenerator: From 51f3dd9e7becaec59ececbc60156a35f9c43be76 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 19 Jan 2024 10:39:52 +0100 Subject: [PATCH 039/108] upd conformance --- tests/post_training/pipelines/causal_language_model.py | 2 ++ tests/post_training/pipelines/image_classification_timm.py | 3 +-- tests/post_training/pipelines/masked_language_modeling.py | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/post_training/pipelines/causal_language_model.py b/tests/post_training/pipelines/causal_language_model.py index 5dbdc87327d..507c1ce128c 100644 --- a/tests/post_training/pipelines/causal_language_model.py +++ b/tests/post_training/pipelines/causal_language_model.py @@ -40,6 +40,8 @@ def transform_func(examples): return transform_func def prepare_calibration_dataset(self): + if self.batch_size > 1: + print("Batch size > 1 is not supported for causal language models. Batch size = 1 will be used.") quantizer = OVQuantizer.from_pretrained(self.model_hf) num_samples = self.ptq_params.get("subset_size", 300) diff --git a/tests/post_training/pipelines/image_classification_timm.py b/tests/post_training/pipelines/image_classification_timm.py index 0e50fbaccc5..710620bfcc4 100644 --- a/tests/post_training/pipelines/image_classification_timm.py +++ b/tests/post_training/pipelines/image_classification_timm.py @@ -60,7 +60,6 @@ def prepare_model(self) -> None: onnx_path, export_params=True, opset_version=13, - do_constant_folding=False, input_names=["image"], dynamic_axes={ "image": {0: "batch"}, @@ -124,7 +123,7 @@ def transform_fn(data_item): def prepare_calibration_dataset(self): dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform) - loader = torch.utils.data.DataLoader(dataset, batch_size=10, num_workers=2, shuffle=False) + loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, num_workers=2, shuffle=False) self.calibration_dataset = nncf.Dataset(loader, self.get_transform_calibration_fn()) diff --git a/tests/post_training/pipelines/masked_language_modeling.py b/tests/post_training/pipelines/masked_language_modeling.py index 13ed2a3e355..f562dd78d2b 100644 --- a/tests/post_training/pipelines/masked_language_modeling.py +++ b/tests/post_training/pipelines/masked_language_modeling.py @@ -86,6 +86,8 @@ def transform_func(data): return transform_func def prepare_calibration_dataset(self): + if self.batch_size > 1: + print("Batch size > 1 is not supported for masked language models. Batch size = 1 will be used.") quantizer = OVQuantizer.from_pretrained(self.model_hf) num_samples = self.ptq_params.get("subset_size", 300) From 3a8de2f315c8098a65b2ba2f436dbc03240d0a7f Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 19 Jan 2024 10:45:27 +0100 Subject: [PATCH 040/108] upd calibrate.py --- tests/openvino/tools/calibrate.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/openvino/tools/calibrate.py b/tests/openvino/tools/calibrate.py index 66825812c82..12cc7eca7ea 100644 --- a/tests/openvino/tools/calibrate.py +++ b/tests/openvino/tools/calibrate.py @@ -111,6 +111,8 @@ def parse_args(): parser.add_argument("--impl", help="NNCF OpenVINO backend implementation.", choices=["pot", "native"], default=None) + parser.add_argument("--batch_size", help="Batch size", type=int, default=1) + return parser.parse_args() @@ -1068,7 +1070,7 @@ def filter_configuration(config: Config) -> Config: return config -def update_config_batch_size(accuracy_checker_config, batch_size): +def update_config(accuracy_checker_config: Config, batch_size: int) -> None: for model in accuracy_checker_config["models"]: for dataset in model["datasets"]: print(f"Updated batch size value to {batch_size}") @@ -1082,8 +1084,9 @@ def main(): xml_path, bin_path = get_model_paths(config.model) accuracy_checker_config = get_accuracy_checker_config(config.engine) - update_config_batch_size(accuracy_checker_config, 10) nncf_algorithms_config = get_nncf_algorithms_config(config.compression, args.output_dir) + if args.batch_size > 1: + update_config(accuracy_checker_config, args.batch_size) set_log_file(f"{args.output_dir}/log.txt") output_dir = os.path.join(args.output_dir, "optimized") From 946523dad3c77d04c7e37ce8356ca85164b84ca6 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 19 Jan 2024 16:04:20 +0100 Subject: [PATCH 041/108] add get_reduction_aggregation_axes for PTRangeInitCollectorParams --- nncf/torch/quantization/init_range.py | 34 +++++---------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/nncf/torch/quantization/init_range.py b/nncf/torch/quantization/init_range.py index 62dd18a4261..f5a7dd2f060 100644 --- a/nncf/torch/quantization/init_range.py +++ b/nncf/torch/quantization/init_range.py @@ -106,32 +106,11 @@ def __init__( self._input_shape = input_shape self._channel_idx = channel_idx - def get_reduction_axes(self, per_sample_stats: bool) -> ReductionAxes: - """ - Calculates the reduction axes of the tensor. - - :param per_sample_stats: Boolean flag that indicated whether statistics are collected per-sample or per-batch. - :return: Shape to reduce to. - """ - ndims = len(self._input_shape) - reduction_axes: List[int] = list(range(ndims)) - if self.is_per_channel: - val = (ndims + self._channel_idx) % ndims - reduction_axes.remove(val) - if not val and self.use_per_sample_stats(per_sample_stats): - raise RuntimeError("Batch dimension should be equal to zero") - if self.use_per_sample_stats(per_sample_stats): - reduction_axes = reduction_axes[1:] # Assumes batch is the first dimension - return tuple(reduction_axes) - - def get_aggregation_axes(self, per_sample_stats: bool) -> AggregationAxes: - """ - Calculates the aggregation axes of the tensor. - - :param per_sample_stats: Boolean flag that indicated whether statistics are collected per-sample or per-batch. - :return: Shape to aggregate to. - """ - return (0, 1) if self.use_per_sample_stats(per_sample_stats) else (0,) + def get_reduction_aggregation_axes(self, per_sample_stats: bool) -> Tuple[ReductionAxes, AggregationAxes]: + reduction_axes, aggregation_axes = super().get_reduction_aggregation_axes(self._input_shape, self._channel_idx) + if per_sample_stats: + return (0,) + reduction_axes, (0,) + return reduction_axes, aggregation_axes class StatCollectorGenerator: @@ -179,8 +158,7 @@ def generate_stat_collector_for_range_init_config( raise RuntimeError("Unknown range init type: {}".format(init_config.init_type)) use_per_sample_stats = collector_params.use_per_sample_stats(init_config.init_type == "mixed_min_max") - reduction_axes = collector_params.get_reduction_axes(use_per_sample_stats) - aggregation_axes = collector_params.get_aggregation_axes(use_per_sample_stats) + reduction_axes, aggregation_axes = collector_params.get_reduction_aggregation_axes(use_per_sample_stats) if init_config.init_type == "min_max": return get_min_max_statistic_collector( From 1732d702f43eefb7bce3c301c7baad5c831bcc77 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 19 Jan 2024 16:11:58 +0100 Subject: [PATCH 042/108] non returning None for get_reduction_aggregation_axes --- nncf/common/quantization/initialization/range.py | 2 +- nncf/torch/quantization/init_range.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/nncf/common/quantization/initialization/range.py b/nncf/common/quantization/initialization/range.py index a656954f2ea..4f050f3be75 100644 --- a/nncf/common/quantization/initialization/range.py +++ b/nncf/common/quantization/initialization/range.py @@ -219,7 +219,7 @@ def get_reduction_aggregation_axes( :return: Reduction axes and aggregation axes. """ if self.is_weights: - aggregation_axes = None + aggregation_axes = (0,) if self.is_per_channel: reduction_axes = get_reduction_axes(quantization_axes, shape_to_reduce) else: diff --git a/nncf/torch/quantization/init_range.py b/nncf/torch/quantization/init_range.py index f5a7dd2f060..b7cb1d57930 100644 --- a/nncf/torch/quantization/init_range.py +++ b/nncf/torch/quantization/init_range.py @@ -107,7 +107,9 @@ def __init__( self._channel_idx = channel_idx def get_reduction_aggregation_axes(self, per_sample_stats: bool) -> Tuple[ReductionAxes, AggregationAxes]: - reduction_axes, aggregation_axes = super().get_reduction_aggregation_axes(self._input_shape, self._channel_idx) + reduction_axes, aggregation_axes = super().get_reduction_aggregation_axes( + self._input_shape, (self._channel_idx,) + ) if per_sample_stats: return (0,) + reduction_axes, (0,) return reduction_axes, aggregation_axes From 1e9631880e491997029a624a523f20a177be4f89 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 19 Jan 2024 16:34:07 +0100 Subject: [PATCH 043/108] comments --- nncf/common/tensor_statistics/aggregator.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 3fb260d06ae..16a02a04a40 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -71,13 +71,13 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: :param model: Backend-specific model instance. :param graph: Model graph. """ + if not self.statistic_points: + return if self.batch_size > 1 and self.is_model_batch_size_limited_support(graph): nncf_logger.warning( - "The batch size > 1 for the particular model can lead to accuracy degradation. \ - To collect the most appropriate statistics it is recommended to use batch size = 1." + "The batch size > 1 for the particular model can lead to inaccurate collected statistics . \ + To get the appropriate statistics it is recommended to use batch size = 1." ) - if not self.statistic_points: - return model_transformer = factory.ModelTransformerFactory.create(model) merged_statistics = self._get_merged_statistic_points(self.statistic_points, model, graph) transformation_layout = self._get_transformation_layout_extra_outputs(merged_statistics) @@ -122,8 +122,9 @@ def is_model_batch_size_limited_support(self, graph: NNCFGraph) -> bool: :param graph: NNCFGraph :return: True if NNCFGraph contains metatypes with no batch axis in output tensor. """ + graph_metatypes = set(node.metatype for node in graph.get_all_nodes()) for metatype in self.metatypes_output_has_no_batch_axis: - if metatype in set(node.metatype for node in graph.get_all_nodes()): + if metatype in graph_metatypes: return True return False From 03afe912a21f9bed85d80a61c514147053358d3d Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 19 Jan 2024 16:36:18 +0100 Subject: [PATCH 044/108] comments --- .../test_tensor_collector_batch_size.py | 16 ++++++++-------- .../test_templates/test_batch_size.py | 5 +++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/common/experimental/test_tensor_collector_batch_size.py b/tests/common/experimental/test_tensor_collector_batch_size.py index 7d83022ba78..a392c4c07c6 100644 --- a/tests/common/experimental/test_tensor_collector_batch_size.py +++ b/tests/common/experimental/test_tensor_collector_batch_size.py @@ -23,40 +23,40 @@ class TemplateTestTensorCollectorBatchSize(ABC): @staticmethod @abstractmethod def get_tensor_statistics_class(): - ... + pass @staticmethod @abstractmethod def get_tensor_processor(): - ... + pass @staticmethod @abstractmethod def get_nncf_tensor_class(): - ... + pass @pytest.fixture @abstractmethod def reducers(self): - ... + pass @pytest.fixture @abstractmethod def aggregators(self): - ... + pass @pytest.fixture @abstractmethod def inplace(self): - ... + pass @abstractmethod def create_dataitems_without_batch_dim(self, input_shape: List[int], length: int = 100) -> List[np.ndarray]: - ... + pass @abstractmethod def add_batch_dim_to_dataitems(self, data_items: List[np.ndarray], batch_size: int) -> List[np.ndarray]: - ... + pass def _create_tensor_collector(self, shape, inplace, reducer, aggregator) -> TensorCollector: batch_axis = 0 diff --git a/tests/post_training/test_templates/test_batch_size.py b/tests/post_training/test_templates/test_batch_size.py index a1a0bb917b6..cc5f94b8db6 100644 --- a/tests/post_training/test_templates/test_batch_size.py +++ b/tests/post_training/test_templates/test_batch_size.py @@ -31,7 +31,7 @@ class DataForTest: class TemplateTestBatchSize(ABC): @abstractmethod def create_statistics_aggregator(self, dataset) -> StatisticsAggregator: - ... + pass def create_dataset(self, lenght, batch_size): dataset = get_static_dataset(None, None, None, lenght) @@ -52,7 +52,8 @@ def create_dataset(self, lenght, batch_size): ] ), ) - def test_batch_size_subset(self, test_data): + def test_batch_size_subset_size_dataset_len(self, test_data): + # Checks correct iterations number depending on batch_size, dataset length, subset_size batch_size, dataset_length, stat_subset_size, ref_calibration_samples_num, ref_iterations_num = ( getattr(test_data, field.name) for field in fields(test_data) ) From bf792fb6854010297f2fa012255b06324ca63499 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 19 Jan 2024 16:39:17 +0100 Subject: [PATCH 045/108] describe comment --- nncf/onnx/graph/metatypes/groups.py | 4 +++- nncf/openvino/graph/metatypes/groups.py | 4 +++- nncf/torch/graph/operator_metatypes.py | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/nncf/onnx/graph/metatypes/groups.py b/nncf/onnx/graph/metatypes/groups.py index 7687d0e5c40..a2b719f944b 100644 --- a/nncf/onnx/graph/metatypes/groups.py +++ b/nncf/onnx/graph/metatypes/groups.py @@ -122,7 +122,9 @@ onnx_metatypes.ONNXDepthwiseConvolutionMetatype, ] -# These metatypes lead to inaccurate statistics when batch size > 1. +# These metatypes mix outputs for different samples into one axis. +# Reducers simply reduce the whole tensor and get 1 value instead of batch_size values. +# This leads to inaccurate statistics. OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS = [ onnx_metatypes.ONNXROIAlignMetatype, onnx_metatypes.ONNXEmbeddingMetatype, diff --git a/nncf/openvino/graph/metatypes/groups.py b/nncf/openvino/graph/metatypes/groups.py index 3d0b96145d3..c3c0eaad208 100644 --- a/nncf/openvino/graph/metatypes/groups.py +++ b/nncf/openvino/graph/metatypes/groups.py @@ -198,7 +198,9 @@ ov_metatypes.OVGroupConvolutionBackpropDataMetatype, ] -# These metatypes lead to inaccurate statistics when batch size > 1. +# These metatypes mix outputs for different samples into one axis. +# Reducers simply reduce the whole tensor and get 1 value instead of batch_size values. +# This leads to inaccurate statistics. OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS = [ ov_metatypes.OVSpaceToBatchMetatype, ov_metatypes.OVROIPoolingMetatype, diff --git a/nncf/torch/graph/operator_metatypes.py b/nncf/torch/graph/operator_metatypes.py index ced77311ad9..99fa786f5c3 100644 --- a/nncf/torch/graph/operator_metatypes.py +++ b/nncf/torch/graph/operator_metatypes.py @@ -1052,7 +1052,9 @@ def get_operator_metatypes() -> List[Type[OperatorMetatype]]: OP_NAMES_QUANTIZE_NODE = ["symmetric_quantize", "asymmetric_quantize"] -# These metatypes lead to inaccurate statistics when batch size > 1. +# These metatypes mix outputs for different samples into one axis. +# Reducers simply reduce the whole tensor and get 1 value instead of batch_size values. +# This leads to inaccurate statistics. OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS = [ PTEmbeddingMetatype, PTEmbeddingBagMetatype, From f98aea2c077a87c724cb16a90e58e47cb635241d Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 19 Jan 2024 16:41:28 +0100 Subject: [PATCH 046/108] description x2 --- nncf/common/tensor_statistics/aggregator.py | 4 +++- nncf/onnx/graph/metatypes/groups.py | 2 +- nncf/openvino/graph/metatypes/groups.py | 2 +- nncf/torch/graph/operator_metatypes.py | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 16a02a04a40..7b97860180a 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -132,7 +132,9 @@ def is_model_batch_size_limited_support(self, graph: NNCFGraph) -> bool: @abstractmethod def metatypes_output_has_no_batch_axis(self) -> List[OperatorMetatype]: """ - Metatypes with no batch axis in output tensor. These metatypes lead to accuracy degradation when batch size > 1. + These metatypes mix outputs for different samples into one axis. + When reducers reduce the tensor they get only 1 value instead of batch_size values. + This leads to inaccurate statistics. """ @abstractmethod diff --git a/nncf/onnx/graph/metatypes/groups.py b/nncf/onnx/graph/metatypes/groups.py index a2b719f944b..f752b0b79ed 100644 --- a/nncf/onnx/graph/metatypes/groups.py +++ b/nncf/onnx/graph/metatypes/groups.py @@ -123,7 +123,7 @@ ] # These metatypes mix outputs for different samples into one axis. -# Reducers simply reduce the whole tensor and get 1 value instead of batch_size values. +# When reducers reduce the tensor they get only 1 value instead of batch_size values. # This leads to inaccurate statistics. OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS = [ onnx_metatypes.ONNXROIAlignMetatype, diff --git a/nncf/openvino/graph/metatypes/groups.py b/nncf/openvino/graph/metatypes/groups.py index c3c0eaad208..02ceb58482a 100644 --- a/nncf/openvino/graph/metatypes/groups.py +++ b/nncf/openvino/graph/metatypes/groups.py @@ -199,7 +199,7 @@ ] # These metatypes mix outputs for different samples into one axis. -# Reducers simply reduce the whole tensor and get 1 value instead of batch_size values. +# When reducers reduce the tensor they get only 1 value instead of batch_size values. # This leads to inaccurate statistics. OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS = [ ov_metatypes.OVSpaceToBatchMetatype, diff --git a/nncf/torch/graph/operator_metatypes.py b/nncf/torch/graph/operator_metatypes.py index 99fa786f5c3..b8ca74dcd7a 100644 --- a/nncf/torch/graph/operator_metatypes.py +++ b/nncf/torch/graph/operator_metatypes.py @@ -1053,7 +1053,7 @@ def get_operator_metatypes() -> List[Type[OperatorMetatype]]: OP_NAMES_QUANTIZE_NODE = ["symmetric_quantize", "asymmetric_quantize"] # These metatypes mix outputs for different samples into one axis. -# Reducers simply reduce the whole tensor and get 1 value instead of batch_size values. +# When reducers reduce the tensor they get only 1 value instead of batch_size values. # This leads to inaccurate statistics. OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS = [ PTEmbeddingMetatype, From fbd05f91fbd0ead8c0366daa56a2911fb3054cfa Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 19 Jan 2024 16:46:16 +0100 Subject: [PATCH 047/108] description x3 --- nncf/common/quantization/initialization/range.py | 4 ++-- nncf/common/tensor_statistics/aggregator.py | 2 +- nncf/data/dataset.py | 5 ++++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/nncf/common/quantization/initialization/range.py b/nncf/common/quantization/initialization/range.py index 4f050f3be75..f5b31c3f373 100644 --- a/nncf/common/quantization/initialization/range.py +++ b/nncf/common/quantization/initialization/range.py @@ -228,11 +228,11 @@ def get_reduction_aggregation_axes( batch_axis = 0 aggregation_axes = (batch_axis, *quantization_axes) if self.is_per_channel: - # Keep batch to aggregate and channel for per-channel quantization. + # Batch and chanel axes should not be reduced in per-channel mode. # TODO (l-bat): Disable quantizer propagation through layout changing operations reduction_axes = get_reduction_axes(aggregation_axes, shape_to_reduce) else: - # Keep batch to aggregate + # Batch should not be reduced. reduction_axes = get_reduction_axes((batch_axis,), shape_to_reduce) return reduction_axes, aggregation_axes diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 7b97860180a..006e548280f 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -119,7 +119,7 @@ def is_model_batch_size_limited_support(self, graph: NNCFGraph) -> bool: """ Returns True if NNCFGraph contains metatypes with no batch axis in output tensor. - :param graph: NNCFGraph + :param graph: NNCFGraph. :return: True if NNCFGraph contains metatypes with no batch axis in output tensor. """ graph_metatypes = set(node.metatype for node in graph.get_all_nodes()) diff --git a/nncf/data/dataset.py b/nncf/data/dataset.py index 8f4a110120d..dd80b780bcd 100644 --- a/nncf/data/dataset.py +++ b/nncf/data/dataset.py @@ -82,7 +82,10 @@ def get_length(self) -> Optional[int]: return None def get_batch_size(self) -> Optional[int]: - """ """ + """ + Tries to fetch batch size of the underlying dataset. + :return: The value of batch_size or _batch_size attributes of the data_source if exist, and None otherwise. + """ if hasattr(self._data_source, "batch_size"): # Torch dataloader return self._data_source.batch_size if hasattr(self._data_source, "_batch_size"): # TF dataloader From e80bab1f2d7abac02d4bd871bc6b880058df2302 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 23 Jan 2024 10:41:43 +0100 Subject: [PATCH 048/108] apply suggestion --- nncf/common/tensor_statistics/aggregator.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 006e548280f..6d6ed96175a 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -122,11 +122,8 @@ def is_model_batch_size_limited_support(self, graph: NNCFGraph) -> bool: :param graph: NNCFGraph. :return: True if NNCFGraph contains metatypes with no batch axis in output tensor. """ - graph_metatypes = set(node.metatype for node in graph.get_all_nodes()) - for metatype in self.metatypes_output_has_no_batch_axis: - if metatype in graph_metatypes: - return True - return False + unique_graph_metatypes = set(node.metatype for node in graph.get_all_nodes()) + return any(metatype in self.metatypes_output_has_no_batch_axis for metatype in unique_graph_metatypes) @property @abstractmethod From 9c1648dcaac817fc9e9554555476360e0a82e157 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 24 Jan 2024 14:01:45 +0100 Subject: [PATCH 049/108] comments --- nncf/common/tensor_statistics/aggregator.py | 12 +++++++----- nncf/onnx/graph/node_utils.py | 4 ++-- nncf/quantization/algorithms/min_max/onnx_backend.py | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 6d6ed96175a..d83aca67f51 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -54,14 +54,14 @@ def _get_total_statistics_samples( else None ) - def _get_iterations_num(self, total_statistics_samples: Optional[int]) -> Optional[int]: + def _get_iterations_num(self, total_statistics_samples: int) -> int: """ Returns number of iterations to collect statistics. :param total_statistics_samples: Number of statistics samples are used. - :return: Iterations number statistics collection. + :return: Iterations number of statistics collection. """ - return total_statistics_samples // self.batch_size if total_statistics_samples is not None else None + return total_statistics_samples // self.batch_size def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: """ @@ -85,8 +85,10 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: engine = factory.EngineFactory.create(model_with_outputs) calibration_samples_num = self._get_total_statistics_samples() - iterataions_num = self._get_iterations_num(calibration_samples_num) - if iterataions_num == 0: + iterataions_num = ( + self._get_iterations_num(calibration_samples_num) if calibration_samples_num is not None else None + ) + if iterataions_num is not None and iterataions_num == 0: raise ValueError("Batch size > length of dataset or batch size > stat_subset_size.") with track(total=calibration_samples_num, description="Statistics collection") as pbar: for input_data in islice(self.dataset.get_inference_data(), iterataions_num): diff --git a/nncf/onnx/graph/node_utils.py b/nncf/onnx/graph/node_utils.py index b03f963958d..3c948fee501 100644 --- a/nncf/onnx/graph/node_utils.py +++ b/nncf/onnx/graph/node_utils.py @@ -187,12 +187,12 @@ def _get_activation_tensor_shape( if target_point.type == TargetType.PRE_LAYER_OPERATION: nncf_logger.info( f"The shape of input edge of a node {node.node_name} is unkown. \ - Therefore per-tensor quantizaiton is applied." + It could lead to inaccurate statistics collection." ) elif target_point.type == TargetType.POST_LAYER_OPERATION: nncf_logger.info( f"The shape of output edge of a node {node.node_name} is unkown. \ - Therefore per-tensor quantizaiton is applied." + It could lead to inaccurate statistics collection." ) nncf_logger.info("Please consider to run pre-processing before quantization.") # TODO: add preprocessing tool for ONNX model. diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py index f3bca4c3c91..f3d55a44193 100644 --- a/nncf/quantization/algorithms/min_max/onnx_backend.py +++ b/nncf/quantization/algorithms/min_max/onnx_backend.py @@ -164,7 +164,7 @@ def get_statistic_collector( shape = get_quantized_tensor_shape(nncf_graph, node, target_point) channel_axis = get_quantization_axis(node, target_point) if shape is None: # No information about shape - reduction_axes, aggregation_axes = None, (0, 1) # default per-tensor values + reduction_axes, aggregation_axes = None, None # default per-tensor values else: reduction_axes, aggregation_axes = collector_params.get_reduction_aggregation_axes(shape, [channel_axis]) collector = TensorCollector(ONNXMinMaxTensorStatistic) From df8ad032a7deae19b880d03b09d891aebbb2f676 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 25 Jan 2024 16:13:04 +0100 Subject: [PATCH 050/108] add default scenario when batch_size=1 or None --- .../quantization/initialization/range.py | 49 ++++++++++------ nncf/onnx/graph/node_utils.py | 24 +------- nncf/quantization/algorithms/algorithm.py | 3 +- .../algorithms/bias_correction/algorithm.py | 2 +- .../algorithms/channel_alignment/algorithm.py | 2 +- .../fast_bias_correction/algorithm.py | 2 +- .../hyperparameter_tuner/algorithm.py | 4 +- .../algorithms/min_max/algorithm.py | 57 +++++++++++-------- .../algorithms/min_max/backend.py | 44 +++++++++++--- .../algorithms/min_max/onnx_backend.py | 45 +++++++++------ .../algorithms/min_max/openvino_backend.py | 40 ++++++------- .../algorithms/min_max/torch_backend.py | 31 ++++++---- nncf/quantization/algorithms/pipeline.py | 7 ++- .../algorithms/post_training/algorithm.py | 4 +- .../algorithms/smooth_quant/algorithm.py | 2 +- .../weight_compression/algorithm.py | 2 +- .../reference_scales/DynamicModel_mixed.json | 8 +-- .../DynamicModel_performance.json | 8 +-- .../reference_scales/MatMul2DModel_mixed.json | 4 +- .../MatMul2DModel_performance.json | 4 +- .../ScaleShiftReluModel_mixed.json | 12 ++-- .../ScaleShiftReluModel_performance.json | 12 ++-- .../test_fq_params_calculation.py | 2 +- .../native/quantization/test_graphs.py | 2 +- .../test_templates/test_bias_correction.py | 2 +- .../test_templates/test_channel_alignment.py | 2 +- .../test_templates/test_ptq_params.py | 2 +- .../test_templates/test_quantizer_config.py | 33 +++++++++-- .../test_templates/test_smooth_quant.py | 2 +- .../ptq/test_calculation_quantizer_params.py | 2 +- 30 files changed, 243 insertions(+), 170 deletions(-) diff --git a/nncf/common/quantization/initialization/range.py b/nncf/common/quantization/initialization/range.py index f5b31c3f373..bd70a28589b 100644 --- a/nncf/common/quantization/initialization/range.py +++ b/nncf/common/quantization/initialization/range.py @@ -208,31 +208,44 @@ def use_means_of_mins(self) -> bool: def use_means_of_maxs(self) -> bool: return not self._is_weights and not self._is_per_channel + def _get_reduction_axes( + self, shape_to_reduce: List[int], quantization_axes: Union[Tuple[int], List[int]], aggregation_axes: List[int] + ): + """ + TODO + + :param shape_to_reduce: + :param quantization_axes: + :param aggregation_axes: + :return: + """ + axes_to_keep = set(el - 1 for el in aggregation_axes if el != 0) + axes_to_keep.update(quantization_axes) + return get_reduction_axes(axes_to_keep, shape_to_reduce) + + def _get_aggregation_axes(self, is_per_sample: bool) -> Tuple[int]: + """ + TODO + + :param bool is_per_sample: _description_ + :return Tuple[int]: _description_ + """ + return (0, 1) if is_per_sample else (0,) + def get_reduction_aggregation_axes( - self, shape_to_reduce: Union[Tuple[int], List[int]], quantization_axes: Union[Tuple[int], List[int]] + self, + shape_to_reduce: Union[Tuple[int], List[int]], + quantization_axes: Union[Tuple[int], List[int]], + is_per_sample: bool, ) -> Tuple[ReductionAxes, AggregationAxes]: """ Calculates the reduction axes, aggregation axes for the tensor. :param shape_to_reduce: Shape of the tensor. :param quantization_axes: Quantization axes if per-channel quantization. + :param is_per_sample: Whether to calculate statistics per-sample (aggregate batch axis) :return: Reduction axes and aggregation axes. """ - if self.is_weights: - aggregation_axes = (0,) - if self.is_per_channel: - reduction_axes = get_reduction_axes(quantization_axes, shape_to_reduce) - else: - reduction_axes = tuple(range(len(shape_to_reduce))) - else: - batch_axis = 0 - aggregation_axes = (batch_axis, *quantization_axes) - if self.is_per_channel: - # Batch and chanel axes should not be reduced in per-channel mode. - # TODO (l-bat): Disable quantizer propagation through layout changing operations - reduction_axes = get_reduction_axes(aggregation_axes, shape_to_reduce) - else: - # Batch should not be reduced. - reduction_axes = get_reduction_axes((batch_axis,), shape_to_reduce) - + aggregation_axes = self._get_aggregation_axes(is_per_sample) + reduction_axes = self._get_reduction_axes(shape_to_reduce, quantization_axes, aggregation_axes) return reduction_axes, aggregation_axes diff --git a/nncf/onnx/graph/node_utils.py b/nncf/onnx/graph/node_utils.py index 3c948fee501..2b0002bddf3 100644 --- a/nncf/onnx/graph/node_utils.py +++ b/nncf/onnx/graph/node_utils.py @@ -134,7 +134,7 @@ def transpose_axis(shape: List[int], axis: int) -> int: return range(len(shape) - 1, -1, -1)[axis] # Iterate backward throug axis -def _get_weight_quantization_axis(node: NNCFNode, port_id: int) -> int: +def get_weight_quantization_axis(node: NNCFNode, port_id: int) -> int: """ Returns weight tensor axis, along which quantizer parameters are calculated. @@ -155,15 +155,6 @@ def _get_weight_quantization_axis(node: NNCFNode, port_id: int) -> int: return weight_channel_axis -def _get_activation_quantization_axis() -> int: - """ - Returns activation tensor axis, along which quantizer parameters are calculated. - - :return: Axis, along which quantizer parameters are calculated. - """ - return 1 # Activations have channel first layout: [N, C, Z, Y, X] - - def _get_activation_tensor_shape( nncf_graph: NNCFGraph, node: NNCFNode, target_point: ONNXTargetPoint ) -> Optional[List[int]]: @@ -215,16 +206,3 @@ def get_quantized_tensor_shape( if target_point.is_weight_target_point(): return node.layer_attributes.weight_attrs[target_point.port_id]["shape"] return _get_activation_tensor_shape(nncf_graph, node, target_point) - - -def get_quantization_axis(node: NNCFNode, target_point: ONNXTargetPoint) -> int: - """ - Returns axis of quantizer parameters are calculated along. - - :param node: NNCFNode. - :param target_point: Target point indicates the quantizer place in the model graph. - :return: Quantizion axis. - """ - if target_point.is_weight_target_point(): - return _get_weight_quantization_axis(node, target_point.port_id) - return _get_activation_quantization_axis() diff --git a/nncf/quantization/algorithms/algorithm.py b/nncf/quantization/algorithms/algorithm.py index b2bca79a31e..772791942c5 100644 --- a/nncf/quantization/algorithms/algorithm.py +++ b/nncf/quantization/algorithms/algorithm.py @@ -54,11 +54,12 @@ def apply( """ @abstractmethod - def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: """ Returns statistic points, for which StatisticsCollector should collect statistics. :param model: Model for statistics collection. :param graph: Model graph. + :param dataset: A representative dataset for the calibration process. :return: Statistic points, for which StatisticsCollector should collect statistics. """ diff --git a/nncf/quantization/algorithms/bias_correction/algorithm.py b/nncf/quantization/algorithms/bias_correction/algorithm.py index 6b1265125e4..5d24e945281 100644 --- a/nncf/quantization/algorithms/bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/bias_correction/algorithm.py @@ -505,7 +505,7 @@ def output_filter_func(point): output_fp.extend(tensor_collector.get_statistics().mean_values) return np.array(output_fp) - def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: self._set_backend_entity(model) statistic_container = StatisticPointsContainer() diff --git a/nncf/quantization/algorithms/channel_alignment/algorithm.py b/nncf/quantization/algorithms/channel_alignment/algorithm.py index 86e055bd7f2..5d4275c4e02 100644 --- a/nncf/quantization/algorithms/channel_alignment/algorithm.py +++ b/nncf/quantization/algorithms/channel_alignment/algorithm.py @@ -381,7 +381,7 @@ def _get_target_point_and_node_in(self, conv_in, add_in) -> Tuple[TargetPoint, N node_in, ) - def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: self._set_backend_entity(model) statistic_container = StatisticPointsContainer() diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py index 41fb61c74a7..67d7be32b0a 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py @@ -321,7 +321,7 @@ def _get_bias_shift( bias_shift = fns.stack(output_fp) - q_outputs return bias_shift - def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: self._set_backend_entity(model) nodes_with_bias = [ node for node in graph.get_all_nodes() if self._backend_entity.is_node_with_bias(node, graph) diff --git a/nncf/quantization/algorithms/hyperparameter_tuner/algorithm.py b/nncf/quantization/algorithms/hyperparameter_tuner/algorithm.py index 2d0e123b1df..7588be4d2a4 100644 --- a/nncf/quantization/algorithms/hyperparameter_tuner/algorithm.py +++ b/nncf/quantization/algorithms/hyperparameter_tuner/algorithm.py @@ -292,7 +292,9 @@ def apply(self, model: TModel, validation_dataset: Dataset) -> TModel: # TODO(andrey-churkin): Think about how it can be avoided. params = apply_combination(self._init_params, best_settings) pipeline = self._pipeline_fn(**params) - container = pipeline.get_statistic_points_for_step(step_index, step_model, step_graph) + container = pipeline.get_statistic_points_for_step( + step_index, step_model, step_graph, self._calibration_dataset + ) step_statistics = collect_statistics(container, step_model, step_graph, self._calibration_dataset) step_model = pipeline.run_step(step_index, step_statistics, step_model, step_graph) continue diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index bf790dee42d..1ce9c8773d9 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -350,34 +350,50 @@ def _get_range_estimator_parameters( def _get_stat_collector( self, - nncf_graph: NNCFGraph, + graph: NNCFGraph, target_point: TargetPoint, - quantizer_config: QuantizerConfig, - num_samples: int, + qconfig: QuantizerConfig, + is_many_samples: bool, ) -> TensorStatisticCollectorBase: """ Creates and returns a statistic collector based on the quantizer's configuration. - :param nncf_graph: NNCFGraph instance. + :param graph: NNCFGraph instance. :param target_point: Target point indicates where statistics should be collected. - :param quantizer_config: Configuration of a quantizer layer, + :param qconfig: Configuration of a quantizer layer, defining the configuration of created statistic collector. - :param num_samples: Number of samples to collect from the 'target_point'. + :param is_many_samples: True meaning that one data tensor consists of some samples. + False - data tnesor has onle one sample. :return: Statistic Collector. """ - range_estimator_params = self._get_range_estimator_parameters(target_point, quantizer_config) + is_weight = target_point.is_weight_target_point() + node = graph.get_node_by_name(target_point.target_node_name) + + shape = self._backend_entity.get_target_point_shape(graph, node, target_point) + channel_axes = self._backend_entity.get_channel_axes(node, target_point, is_weight, qconfig.per_channel) + + range_estimator_params = self._get_range_estimator_parameters(target_point, qconfig) + + # Weight statistics is constant, so only one collection is enough. + num_samples = self._subset_size if not is_weight else 1 + + is_per_sample = is_many_samples and not is_weight collector_params = RangeInitCollectorParams( - is_weights=target_point.is_weight_target_point(), - scheme=quantizer_config.mode, - per_channel=quantizer_config.per_channel, + is_weights=is_weight, scheme=qconfig.mode, per_channel=qconfig.per_channel ) + reduction_axes, aggregation_axes = None, None + if shape is not None: + reduction_axes, aggregation_axes = collector_params.get_reduction_aggregation_axes( + shape, channel_axes, is_per_sample + ) + return self._backend_entity.get_statistic_collector( range_estimator_params, - nncf_graph, - target_point, - collector_params, - inplace=self._inplace_statistics, + collector_params.use_abs_max, + reduction_axes, + aggregation_axes, + self._inplace_statistics, num_samples=num_samples, ) @@ -859,21 +875,14 @@ def filter_func(point: StatisticPoint) -> bool: quantized_model = model_transformer.transform(transformation_layout) return quantized_model - def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: self._set_backend_entity(model) self._reset_cache() quantization_target_points, _ = self._get_quantization_target_points(model, graph) output = StatisticPointsContainer() for quantization_target_point, qconfig in quantization_target_points.items(): - nncf_logger.debug( - f"Adding target point {quantization_target_point.target_node_name}" - f" with type {quantization_target_point.type} for statistics collection" - ) - num_samples = self._subset_size - if quantization_target_point.is_weight_target_point(): - # Weight statistics is constant, so only one collection is enough. - num_samples = 1 - stat_collector = self._get_stat_collector(graph, quantization_target_point, qconfig, num_samples) + is_many_samples = dataset.get_batch_size() is not None and dataset.get_batch_size() > 1 + stat_collector = self._get_stat_collector(graph, quantization_target_point, qconfig, is_many_samples) output.add_statistic_point( StatisticPoint( target_point=quantization_target_point, diff --git a/nncf/quantization/algorithms/min_max/backend.py b/nncf/quantization/algorithms/min_max/backend.py index e122fb4399a..a6c15ef9eb3 100644 --- a/nncf/quantization/algorithms/min_max/backend.py +++ b/nncf/quantization/algorithms/min_max/backend.py @@ -11,7 +11,7 @@ from abc import ABC from abc import abstractmethod -from typing import Dict, List, Optional, Set, TypeVar +from typing import Dict, List, Optional, Set, Tuple, TypeVar from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.graph import NNCFNode @@ -20,7 +20,6 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.commands import TransformationCommand from nncf.common.hardware.config import HWConfig -from nncf.common.quantization.initialization.range import RangeInitCollectorParams from nncf.common.quantization.structs import QuantizerConfig from nncf.common.tensor_statistics.collectors import TensorStatisticCollectorBase from nncf.common.tensor_statistics.statistics import MinMaxTensorStatistic @@ -183,23 +182,50 @@ def unify_statistics(statistics: List[MinMaxTensorStatistic]) -> MinMaxTensorSta :return: Unified MinMaxTensorStatistic value. """ + @staticmethod + @abstractmethod + def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: TargetPoint) -> List[int]: + """ + TODO + + :param NNCFGraph nncf_graph: + :param NNCFNode node: + :param TargetPoint target_point: + :return List[int]: + """ + + @staticmethod + @abstractmethod + def get_channel_axes( + node: NNCFNode, target_point: TargetPoint, is_weight: bool, is_per_channel: bool + ) -> Tuple[int]: + """ + TODO + + :param NNCFNode node: + :param TargetPoint target_point: + :param bool is_weight: + :param bool is_per_channel: + :return Tuple[int]: + """ + @staticmethod @abstractmethod def get_statistic_collector( range_estimator_params: RangeEstimatorParameters, - nncf_graph: NNCFGraph, - target_point: TargetPoint, - collector_params: RangeInitCollectorParams, + use_abs_max: bool, + reduction_axes: Optional[Tuple[int]], + aggregation_axes: Optional[Tuple[int]], inplace: bool, - num_samples: int = None, + num_samples: Optional[int] = None, ) -> TensorStatisticCollectorBase: """ Returns backend-specific statistic collector. :param range_estimator_params: Parameters that specify estimators types. - :param nncf_graph: NNCFGraph to get input/output shapes for the target point. - :param target_point: Target location for the correction. - :param collector_params: RangeInitCollectorParams instance for the current layer. + :param use_abs_max: + :param reduction_axes: TODO + :param aggregation_axes: :param inplace: Whether to calculate statistic inplace or not. :param num_samples: Maximum number of samples to collect. :return: Backend-specific TensorStatisticCollectorBase for the statistics calculation. diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py index f3d55a44193..1a94f55ce19 100644 --- a/nncf/quantization/algorithms/min_max/onnx_backend.py +++ b/nncf/quantization/algorithms/min_max/onnx_backend.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Optional, Set +from typing import Dict, List, Optional, Set, Tuple import numpy as np @@ -19,15 +19,14 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.commands import TransformationCommand from nncf.common.hardware.config import HWConfig -from nncf.common.quantization.initialization.range import RangeInitCollectorParams from nncf.common.quantization.structs import QuantizerConfig from nncf.experimental.common.tensor_statistics.collectors import AGGREGATORS_MAP from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.onnx.graph.metatypes import onnx_metatypes as om from nncf.onnx.graph.metatypes.groups import MATMUL_METATYPES from nncf.onnx.graph.node_utils import get_input_edges_mapping -from nncf.onnx.graph.node_utils import get_quantization_axis from nncf.onnx.graph.node_utils import get_quantized_tensor_shape +from nncf.onnx.graph.node_utils import get_weight_quantization_axis from nncf.onnx.graph.transformations.commands import ONNXQuantizerInsertionCommand from nncf.onnx.graph.transformations.commands import ONNXTargetPoint from nncf.onnx.hardware.config import ONNXHWConfig @@ -120,11 +119,16 @@ def create_quantizer_insertion_command( parameters: FakeQuantizeParameters, ): tensor_type = np.int8 if np.any(parameters.input_low.data < 0) else np.uint8 - if target_point.is_weight_target_point(): + is_weight = target_point.is_weight_target_point() + if is_weight: tensor_type = np.int8 # The weight is restricted to have only signed range nncf_input_node_next_nodes = ONNXMinMaxAlgoBackend._get_input_edges_mapping(nncf_graph) node = nncf_graph.get_node_by_name(target_point.target_node_name) - axis = get_quantization_axis(node, target_point) if quantizer_config.per_channel else None + axis = ONNXMinMaxAlgoBackend.get_channel_axes(node, target_point, is_weight, quantizer_config.per_channel) + if not axis: + axis = None + else: + axis = axis[0] onnx_parameters = convert_fq_params_to_onnx_params(parameters, quantizer_config.num_bits, tensor_type, axis) return ONNXQuantizerInsertionCommand(target_point, nncf_input_node_next_nodes, onnx_parameters) @@ -151,22 +155,29 @@ def unify_statistics( def _get_input_edges_mapping(nncf_graph: NNCFGraph): return get_input_edges_mapping(nncf_graph) + @staticmethod + def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: ONNXTargetPoint) -> List[int]: + return get_quantized_tensor_shape(nncf_graph, node, target_point) + + @staticmethod + def get_channel_axes( + node: NNCFNode, target_point: ONNXTargetPoint, is_weight: bool, is_per_channel: bool + ) -> Tuple[int]: + if not is_per_channel: + return () + if is_weight: + return (get_weight_quantization_axis(node, target_point.port_id),) + return (1,) + @staticmethod def get_statistic_collector( range_estimator_params: RangeEstimatorParameters, - nncf_graph: NNCFGraph, - target_point: ONNXTargetPoint, - collector_params: RangeInitCollectorParams, + use_abs_max: bool, + reduction_axes: Optional[Tuple[int]], + aggregation_axes: Optional[Tuple[int]], inplace: bool, - num_samples: int = None, + num_samples: Optional[int] = None, ) -> TensorCollector: - node = nncf_graph.get_node_by_name(target_point.target_node_name) - shape = get_quantized_tensor_shape(nncf_graph, node, target_point) - channel_axis = get_quantization_axis(node, target_point) - if shape is None: # No information about shape - reduction_axes, aggregation_axes = None, None # default per-tensor values - else: - reduction_axes, aggregation_axes = collector_params.get_reduction_aggregation_axes(shape, [channel_axis]) collector = TensorCollector(ONNXMinMaxTensorStatistic) for params, container_key in zip( [range_estimator_params.min, range_estimator_params.max], @@ -189,7 +200,7 @@ def get_statistic_collector( kwargs.update({"quantile": [quantile]}) # TODO(dlyakhov): merge two quantile aggregators in one statistic_type = params.statistics_type - if collector_params.use_abs_max and statistic_type == StatisticsType.MAX: + if use_abs_max and statistic_type == StatisticsType.MAX: statistic_type = StatisticsType.ABS_MAX reducer = ONNX_REDUCERS_MAP[statistic_type](**kwargs) diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py index 0a398595827..99383055ad8 100644 --- a/nncf/quantization/algorithms/min_max/openvino_backend.py +++ b/nncf/quantization/algorithms/min_max/openvino_backend.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Optional, Set +from typing import Dict, List, Optional, Set, Tuple import numpy as np @@ -18,7 +18,6 @@ from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType from nncf.common.hardware.config import HWConfig -from nncf.common.quantization.initialization.range import RangeInitCollectorParams from nncf.common.quantization.structs import QuantizerConfig from nncf.experimental.common.tensor_statistics.collectors import AGGREGATORS_MAP from nncf.experimental.common.tensor_statistics.collectors import TensorCollector @@ -138,33 +137,34 @@ def unify_statistics(statistics: List[OVMinMaxTensorStatistic]) -> OVMinMaxTenso return OVMinMaxTensorStatistic(min_values=min_values, max_values=max_values) @staticmethod - def _get_activation_shape(target_point: OVTargetPoint, nncf_graph: NNCFGraph, node: NNCFNode) -> List[int]: + def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: OVTargetPoint) -> List[int]: + if target_point.is_weight_target_point(): + return node.layer_attributes.constant_attributes[target_point.port_id]["shape"] if target_point.type == TargetType.PRE_LAYER_OPERATION: return nncf_graph.get_input_edges(node)[target_point.port_id].tensor_shape elif target_point.type == TargetType.POST_LAYER_OPERATION: return nncf_graph.get_output_edges(node)[target_point.port_id].tensor_shape - else: - raise NotImplementedError(f"Unsupported target point type {target_point.type}.") + raise NotImplementedError(f"Unsupported target point type {target_point.type}.") + + @staticmethod + def get_channel_axes( + node: NNCFNode, target_point: OVTargetPoint, is_weight: bool, is_per_channel: bool + ) -> Tuple[int]: + if not is_per_channel: + return () + if is_weight: + return get_weight_channel_axes(node) + return (1,) @staticmethod def get_statistic_collector( range_estimator_params: RangeEstimatorParameters, - nncf_graph: NNCFGraph, - target_point: OVTargetPoint, - collector_params: RangeInitCollectorParams, + use_abs_max: bool, + reduction_axes: Optional[Tuple[int]], + aggregation_axes: Optional[Tuple[int]], inplace: bool, - num_samples: int = None, + num_samples: Optional[int] = None, ) -> TensorCollector: - node = nncf_graph.get_node_by_name(target_point.target_node_name) - if target_point.is_weight_target_point(): - assert isinstance(node.layer_attributes, OVLayerAttributes) - shape = node.layer_attributes.constant_attributes[target_point.port_id]["shape"] - channel_axes = get_weight_channel_axes(node) - else: - shape = OVMinMaxAlgoBackend._get_activation_shape(target_point, nncf_graph, node) - channel_axes = (1,) - reduction_axes, aggregation_axes = collector_params.get_reduction_aggregation_axes(shape, channel_axes) - collector = TensorCollector(OVMinMaxTensorStatistic) for params, container_key in zip( [range_estimator_params.min, range_estimator_params.max], @@ -187,7 +187,7 @@ def get_statistic_collector( kwargs.update({"quantile": [quantile]}) # TODO(dlyakhov): merge two quantile aggregators in one statistic_type = params.statistics_type - if collector_params.use_abs_max and statistic_type == StatisticsType.MAX: + if use_abs_max and statistic_type == StatisticsType.MAX: statistic_type = StatisticsType.ABS_MAX reducer = OV_REDUCERS_MAP[statistic_type](**kwargs) diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py index d2f83cc8b8e..321b433f7f5 100644 --- a/nncf/quantization/algorithms/min_max/torch_backend.py +++ b/nncf/quantization/algorithms/min_max/torch_backend.py @@ -22,7 +22,6 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.commands import TransformationCommand from nncf.common.hardware.config import HWConfig -from nncf.common.quantization.initialization.range import RangeInitCollectorParams from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.experimental.common.tensor_statistics.collectors import AGGREGATORS_MAP @@ -156,19 +155,31 @@ def unify_statistics(statistics: List[PTMinMaxTensorStatistic]) -> PTMinMaxTenso min_values = torch.amin(torch.stack(min_values), dim=0) return PTMinMaxTensorStatistic(min_values=min_values, max_values=max_values) + @staticmethod + def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: PTTargetPoint) -> List[int]: + if target_point.is_weight_target_point(): + return node.layer_attributes.get_weight_shape() + return nncf_graph.get_input_shape_for_insertion_point(target_point) + + @staticmethod + def get_channel_axes( + node: NNCFNode, target_point: PTTargetPoint, is_weight: bool, is_per_channel: bool + ) -> Tuple[int]: + if not is_per_channel: + return () + if is_weight: + return (node.layer_attributes.get_target_dim_for_compression(),) + return (1,) + @staticmethod def get_statistic_collector( range_estimator_params: RangeEstimatorParameters, - nncf_graph: NNCFGraph, - target_point: PTTargetPoint, - collector_params: RangeInitCollectorParams, + use_abs_max: bool, + reduction_axes: Optional[Tuple[int]], + aggregation_axes: Optional[Tuple[int]], inplace: bool, - num_samples: int = None, + num_samples: Optional[int] = None, ) -> TensorCollector: - input_shape, _, channel_idx = PTMinMaxAlgoBackend._get_input_scale_shape( - nncf_graph, target_point, collector_params.is_per_channel - ) - reduction_axes, aggregation_axes = collector_params.get_reduction_aggregation_axes(input_shape, (channel_idx,)) collector = TensorCollector(PTMinMaxTensorStatistic) for params, container_key in zip( [range_estimator_params.min, range_estimator_params.max], @@ -193,7 +204,7 @@ def get_statistic_collector( quantile = 1 - params.quantile_outlier_prob reducer = PT_REDUCERS_MAP[statistic_type](reduction_axes=reduction_axes, quantile=[quantile]) else: - if collector_params.use_abs_max and statistic_type == StatisticsType.MAX: + if use_abs_max and statistic_type == StatisticsType.MAX: statistic_type = StatisticsType.ABS_MAX reducer = PT_REDUCERS_MAP[statistic_type](reduction_axes=reduction_axes) diff --git a/nncf/quantization/algorithms/pipeline.py b/nncf/quantization/algorithms/pipeline.py index 2c02e3753c6..5740fd6caa9 100644 --- a/nncf/quantization/algorithms/pipeline.py +++ b/nncf/quantization/algorithms/pipeline.py @@ -157,7 +157,7 @@ def run_from_step( # Collect statistics required to run current pipeline step step_statistics = step_index_to_statistics.get(step_index) if step_statistics is None: - statistic_points = self.get_statistic_points_for_step(step_index, step_model, step_graph) + statistic_points = self.get_statistic_points_for_step(step_index, step_model, step_graph, dataset) step_statistics = collect_statistics(statistic_points, step_model, step_graph, dataset) # Run current pipeline step @@ -168,7 +168,7 @@ def run_from_step( return step_model def get_statistic_points_for_step( - self, step_index: int, model: TModel, graph: NNCFGraph + self, step_index: int, model: TModel, graph: NNCFGraph, dataset: Dataset ) -> StatisticPointsContainer: """ Returns statistics that should be collected to execute `step_index`-th pipeline step. @@ -176,13 +176,14 @@ def get_statistic_points_for_step( :param step_index: Zero-based index of the pipeline step. :param model: A model. :param graph: A graph assosiated with a model. + :param dataset: A dataset that holds the data items for pipeline steps. :return: Statistics that should be collected to execute `step_index`-th pipeline step. """ container = StatisticPointsContainer() pipeline_steps = self._remove_unsupported_algorithms(get_backend(model)) pipeline_step = pipeline_steps[step_index] for algorithm in pipeline_step: - for statistic_points in algorithm.get_statistic_points(model, graph).values(): + for statistic_points in algorithm.get_statistic_points(model, graph, dataset).values(): for statistic_point in statistic_points: container.add_statistic_point(statistic_point) diff --git a/nncf/quantization/algorithms/post_training/algorithm.py b/nncf/quantization/algorithms/post_training/algorithm.py index 3d66ea87a3a..d119a084abb 100644 --- a/nncf/quantization/algorithms/post_training/algorithm.py +++ b/nncf/quantization/algorithms/post_training/algorithm.py @@ -89,8 +89,8 @@ def available_backends(self) -> List[BackendType]: backends = backends.intersection(algorithm.available_backends) return list(backends) - def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: - return self._pipeline.get_statistic_points_for_step(0, model, graph) + def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: + return self._pipeline.get_statistic_points_for_step(0, model, graph, dataset) def apply( self, diff --git a/nncf/quantization/algorithms/smooth_quant/algorithm.py b/nncf/quantization/algorithms/smooth_quant/algorithm.py index 7bdc33725d4..d33ec2ab2b3 100644 --- a/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -223,7 +223,7 @@ def filter_func(point: StatisticPoint) -> bool: statistics_for_node.append(tensor_collector.get_statistics()[STATISTIC_BRANCH_KEY]) return statistics_for_node - def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: statistic_container = StatisticPointsContainer() self._set_backend_entity(model) diff --git a/nncf/quantization/algorithms/weight_compression/algorithm.py b/nncf/quantization/algorithms/weight_compression/algorithm.py index 329fa46c9fb..fefe05fc685 100644 --- a/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -331,7 +331,7 @@ def do_compression( ) return transformed_model - def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: pass def _get_activation_node_and_port(self, node: NNCFNode, nncf_graph: NNCFGraph) -> Tuple[NNCFNode, int]: diff --git a/tests/openvino/native/data/2023.2/reference_scales/DynamicModel_mixed.json b/tests/openvino/native/data/2023.2/reference_scales/DynamicModel_mixed.json index b7df9051e7e..9cf4da1968f 100644 --- a/tests/openvino/native/data/2023.2/reference_scales/DynamicModel_mixed.json +++ b/tests/openvino/native/data/2023.2/reference_scales/DynamicModel_mixed.json @@ -94,9 +94,9 @@ ] }, "Sub/fq_output_0": { - "input_low": -0.5242199897766113, - "input_high": 0.8538841009140015, - "output_low": -0.5242199897766113, - "output_high": 0.8538841009140015 + "input_low": -0.6373578310012817, + "input_high": 0.9560367465019226, + "output_low": -0.6373578310012817, + "output_high": 0.9560367465019226 } } \ No newline at end of file diff --git a/tests/openvino/native/data/2023.2/reference_scales/DynamicModel_performance.json b/tests/openvino/native/data/2023.2/reference_scales/DynamicModel_performance.json index b450a080161..9727ddf1990 100644 --- a/tests/openvino/native/data/2023.2/reference_scales/DynamicModel_performance.json +++ b/tests/openvino/native/data/2023.2/reference_scales/DynamicModel_performance.json @@ -94,9 +94,9 @@ ] }, "Sub/fq_output_0": { - "input_low": -0.8591098189353943, - "input_high": 0.8523980379104614, - "output_low": -0.8591098189353943, - "output_high": 0.8523980379104614 + "input_low": -0.9635645747184753, + "input_high": 0.9560367465019226, + "output_low": -0.9635645747184753, + "output_high": 0.9560367465019226 } } \ No newline at end of file diff --git a/tests/openvino/native/data/2023.2/reference_scales/MatMul2DModel_mixed.json b/tests/openvino/native/data/2023.2/reference_scales/MatMul2DModel_mixed.json index 7e7fd2cdd5e..fd0ed29955f 100644 --- a/tests/openvino/native/data/2023.2/reference_scales/MatMul2DModel_mixed.json +++ b/tests/openvino/native/data/2023.2/reference_scales/MatMul2DModel_mixed.json @@ -27,8 +27,8 @@ }, "Input/fq_output_0": { "input_low": 0.0, - "input_high": 0.8685823082923889, + "input_high": 0.9350724220275879, "output_low": 0.0, - "output_high": 0.8685823082923889 + "output_high": 0.9350724220275879 } } \ No newline at end of file diff --git a/tests/openvino/native/data/2023.2/reference_scales/MatMul2DModel_performance.json b/tests/openvino/native/data/2023.2/reference_scales/MatMul2DModel_performance.json index 7e7fd2cdd5e..fd0ed29955f 100644 --- a/tests/openvino/native/data/2023.2/reference_scales/MatMul2DModel_performance.json +++ b/tests/openvino/native/data/2023.2/reference_scales/MatMul2DModel_performance.json @@ -27,8 +27,8 @@ }, "Input/fq_output_0": { "input_low": 0.0, - "input_high": 0.8685823082923889, + "input_high": 0.9350724220275879, "output_low": 0.0, - "output_high": 0.8685823082923889 + "output_high": 0.9350724220275879 } } \ No newline at end of file diff --git a/tests/openvino/native/data/2023.2/reference_scales/ScaleShiftReluModel_mixed.json b/tests/openvino/native/data/2023.2/reference_scales/ScaleShiftReluModel_mixed.json index 31dddc43f04..71700abfa50 100644 --- a/tests/openvino/native/data/2023.2/reference_scales/ScaleShiftReluModel_mixed.json +++ b/tests/openvino/native/data/2023.2/reference_scales/ScaleShiftReluModel_mixed.json @@ -35,15 +35,15 @@ }, "Relu/fq_output_0": { "input_low": 0.0, - "input_high": 2.1013052463531494, + "input_high": 2.5198161602020264, "output_low": 0.0, - "output_high": 2.1013052463531494 + "output_high": 2.5198161602020264 }, "MatMul/fq_output_0": { "input_low": 0.0, - "input_high": 1.6296062469482422, + "input_high": 2.1930606365203857, "output_low": 0.0, - "output_high": 1.6296062469482422 + "output_high": 2.1930606365203857 }, "MatMul/fq_weights_1": { "input_low": [ @@ -73,8 +73,8 @@ }, "Input/fq_output_0": { "input_low": 0.0, - "input_high": 0.8685823082923889, + "input_high": 0.9350724220275879, "output_low": 0.0, - "output_high": 0.8685823082923889 + "output_high": 0.9350724220275879 } } \ No newline at end of file diff --git a/tests/openvino/native/data/2023.2/reference_scales/ScaleShiftReluModel_performance.json b/tests/openvino/native/data/2023.2/reference_scales/ScaleShiftReluModel_performance.json index 31dddc43f04..71700abfa50 100644 --- a/tests/openvino/native/data/2023.2/reference_scales/ScaleShiftReluModel_performance.json +++ b/tests/openvino/native/data/2023.2/reference_scales/ScaleShiftReluModel_performance.json @@ -35,15 +35,15 @@ }, "Relu/fq_output_0": { "input_low": 0.0, - "input_high": 2.1013052463531494, + "input_high": 2.5198161602020264, "output_low": 0.0, - "output_high": 2.1013052463531494 + "output_high": 2.5198161602020264 }, "MatMul/fq_output_0": { "input_low": 0.0, - "input_high": 1.6296062469482422, + "input_high": 2.1930606365203857, "output_low": 0.0, - "output_high": 1.6296062469482422 + "output_high": 2.1930606365203857 }, "MatMul/fq_weights_1": { "input_low": [ @@ -73,8 +73,8 @@ }, "Input/fq_output_0": { "input_low": 0.0, - "input_high": 0.8685823082923889, + "input_high": 0.9350724220275879, "output_low": 0.0, - "output_high": 0.8685823082923889 + "output_high": 0.9350724220275879 } } \ No newline at end of file diff --git a/tests/openvino/native/quantization/test_fq_params_calculation.py b/tests/openvino/native/quantization/test_fq_params_calculation.py index 333f23209af..d40a496a7d7 100644 --- a/tests/openvino/native/quantization/test_fq_params_calculation.py +++ b/tests/openvino/native/quantization/test_fq_params_calculation.py @@ -62,7 +62,7 @@ def quantize_model(ov_model, q_params): min_max_algo = MinMaxQuantization(subset_size=1, **q_params) statistics_aggregator = OVStatisticsAggregator(dataset) - statistic_points = min_max_algo.get_statistic_points(ov_model, graph) + statistic_points = min_max_algo.get_statistic_points(ov_model, graph, dataset) statistics_aggregator.register_statistic_points(statistic_points) statistics_aggregator.collect_statistics(ov_model, graph) quantized_model = min_max_algo.apply(ov_model, graph, statistics_aggregator.statistic_points) diff --git a/tests/openvino/native/quantization/test_graphs.py b/tests/openvino/native/quantization/test_graphs.py index 268f7e4b501..d488873635b 100644 --- a/tests/openvino/native/quantization/test_graphs.py +++ b/tests/openvino/native/quantization/test_graphs.py @@ -143,7 +143,7 @@ def smooth_quant_model(ov_model: ov.Model, q_params: Dict, quantize=True): smooth_quant_algo = SmoothQuant(subset_size=1) statistics_aggregator = OVStatisticsAggregator(dataset) - statistic_points = smooth_quant_algo.get_statistic_points(ov_model, graph) + statistic_points = smooth_quant_algo.get_statistic_points(ov_model, graph, dataset) statistics_aggregator.register_statistic_points(statistic_points) statistics_aggregator.collect_statistics(ov_model, graph) modified_model = smooth_quant_algo.apply(ov_model, graph, statistics_aggregator.statistic_points) diff --git a/tests/post_training/test_templates/test_bias_correction.py b/tests/post_training/test_templates/test_bias_correction.py index 589b17fd12b..ec78c6ea43c 100644 --- a/tests/post_training/test_templates/test_bias_correction.py +++ b/tests/post_training/test_templates/test_bias_correction.py @@ -184,7 +184,7 @@ def test_verify_collected_stat_inputs_map(self, model_cls, ref_stat_inputs_map, graph = NNCFGraphFactory.create(model) bc_algo = self.get_bias_correction_algorithm() - bc_algo.get_statistic_points(model, graph) + bc_algo.get_statistic_points(model, graph, None) collected_stat_inputs_map = getattr(bc_algo, "_collected_stat_inputs_map") assert collected_stat_inputs_map == ref_stat_inputs_map diff --git a/tests/post_training/test_templates/test_channel_alignment.py b/tests/post_training/test_templates/test_channel_alignment.py index 65d5b49e180..22cabba1f00 100644 --- a/tests/post_training/test_templates/test_channel_alignment.py +++ b/tests/post_training/test_templates/test_channel_alignment.py @@ -510,7 +510,7 @@ class MockBackend(backend_cls): MockBackend.get_statistic_collector = mocker.MagicMock(return_value=ref_stat_collector) algorithm._backend_entity = MockBackend - statistic_container = algorithm.get_statistic_points(None, nncf_graph) + statistic_container = algorithm.get_statistic_points(None, nncf_graph, None) backend_cls = self.get_backend_cls() target_node_name = "/Add_1_0" if num_biases else "/Conv_1_0" diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py index 79ea6c8b29e..216f70ce25a 100644 --- a/tests/post_training/test_templates/test_ptq_params.py +++ b/tests/post_training/test_templates/test_ptq_params.py @@ -139,7 +139,7 @@ def test_range_estimator_per_tensor(self, test_params, range_estimator_params): assert min_max_algo._range_estimator_params[QuantizerGroup.ACTIVATIONS] == range_estimator_params params = test_params["test_range_estimator_per_tensor"] - stat_points = min_max_algo.get_statistic_points(params["model"], params["nncf_graph"]) + stat_points = min_max_algo.get_statistic_points(params["model"], params["nncf_graph"], None) assert len(stat_points) == params["stat_points_num"] for _, stat_point in stat_points.items(): diff --git a/tests/post_training/test_templates/test_quantizer_config.py b/tests/post_training/test_templates/test_quantizer_config.py index 266998c2fa9..eb4e50ac5fd 100644 --- a/tests/post_training/test_templates/test_quantizer_config.py +++ b/tests/post_training/test_templates/test_quantizer_config.py @@ -82,23 +82,41 @@ def conv_sum_aggregation_nncf_graph(self) -> NNCFGraphToTestSumAggregation: class TestGetStatisticsCollectorParameters: target_type: TargetType target_node_name: str + is_many_samples: bool ref_per_ch_reduction_axes: List[int] ref_per_tensor_reduction_axes: List[int] @pytest.fixture( params=[ pytest.param( - TestGetStatisticsCollectorParameters(TargetType.PRE_LAYER_OPERATION, "/Sum_1_0", (2,), (1, 2)), + TestGetStatisticsCollectorParameters(TargetType.PRE_LAYER_OPERATION, "/Sum_1_0", True, (2,), (1, 2)), ), TestGetStatisticsCollectorParameters( TargetType.POST_LAYER_OPERATION, "/Conv_1_0", + True, (2, 3), (1, 2, 3), ), TestGetStatisticsCollectorParameters( TargetType.OPERATION_WITH_WEIGHTS, "/Conv_1_0", + True, + (1, 2, 3), + (0, 1, 2, 3), + ), + TestGetStatisticsCollectorParameters(TargetType.PRE_LAYER_OPERATION, "/Sum_1_0", False, (0, 2), (0, 1, 2)), + TestGetStatisticsCollectorParameters( + TargetType.POST_LAYER_OPERATION, + "/Conv_1_0", + False, + (0, 2, 3), + (0, 1, 2, 3), + ), + TestGetStatisticsCollectorParameters( + TargetType.OPERATION_WITH_WEIGHTS, + "/Conv_1_0", + False, (1, 2, 3), (0, 1, 2, 3), ), @@ -248,8 +266,9 @@ def test_get_stat_collector( statistic_collector_parameters: TestGetStatisticsCollectorParameters, ): params = statistic_collector_parameters - print(params) - min_max_algo = MinMaxQuantization(activations_range_estimator_params=range_estimator_params) + min_max_algo = MinMaxQuantization( + subset_size=num_samples, activations_range_estimator_params=range_estimator_params + ) min_max_algo._backend_entity = self.get_algo_backend() q_config = QuantizerConfig(num_bits=8, mode=q_config_mode, per_channel=q_config_per_channel) @@ -265,7 +284,7 @@ def test_get_stat_collector( target_point = list(min_max_algo._quantization_target_points_to_qconfig.keys())[0] tensor_collector = min_max_algo._get_stat_collector( - conv_sum_aggregation_nncf_graph.nncf_graph, target_point, q_config, num_samples + conv_sum_aggregation_nncf_graph.nncf_graph, target_point, q_config, params.is_many_samples ) is_weight_tp = target_point.is_weight_target_point() @@ -302,5 +321,7 @@ def test_get_stat_collector( assert self.get_reduction_axes(reducer) == params.ref_per_ch_reduction_axes else: assert self.get_reduction_axes(reducer) == params.ref_per_tensor_reduction_axes - - assert tensor_collector.num_samples == num_samples + if is_weight_tp: + assert tensor_collector.num_samples == 1 + else: + assert tensor_collector.num_samples == num_samples diff --git a/tests/post_training/test_templates/test_smooth_quant.py b/tests/post_training/test_templates/test_smooth_quant.py index baca6887c7f..375448e23b0 100644 --- a/tests/post_training/test_templates/test_smooth_quant.py +++ b/tests/post_training/test_templates/test_smooth_quant.py @@ -190,7 +190,7 @@ def test_empty_stats(self, mocker, tmpdir): graph = NNCFGraphFactory.create(model) algo = SmoothQuant(subset_size=1, inplace_statistics=False) - algo_statistic_points = algo.get_statistic_points(model, graph) + algo_statistic_points = algo.get_statistic_points(model, graph, dataset) statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) statistics_aggregator.register_statistic_points(algo_statistic_points) statistics_aggregator.collect_statistics(model, graph) diff --git a/tests/torch/ptq/test_calculation_quantizer_params.py b/tests/torch/ptq/test_calculation_quantizer_params.py index 811228b5d64..80db7a6ccde 100644 --- a/tests/torch/ptq/test_calculation_quantizer_params.py +++ b/tests/torch/ptq/test_calculation_quantizer_params.py @@ -314,7 +314,7 @@ def test_quantizer_parameters_export(tmp_path: Path): nncf_config = NNCFConfig({"input_info": {"sample_size": [1, 3, 32, 32]}}) nncf_network = create_nncf_network(model, nncf_config) - statistic_points = min_max_algo.get_statistic_points(nncf_network, nncf_network.nncf.get_graph()) + statistic_points = min_max_algo.get_statistic_points(nncf_network, nncf_network.nncf.get_graph(), dataset) statistics_aggregator.register_statistic_points(statistic_points) statistics_aggregator.collect_statistics(model, nncf_network.nncf.get_graph()) torch_quantized_model = min_max_algo.apply( From f4dfd1cd891924dec7142529142a5539ac0bc127 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 26 Jan 2024 11:28:37 +0100 Subject: [PATCH 051/108] rollback scales changes --- .../data/reference_scales/activation_matmul_model_mixed.json | 4 ++-- .../reference_scales/activation_matmul_model_performance.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/onnx/data/reference_scales/activation_matmul_model_mixed.json b/tests/onnx/data/reference_scales/activation_matmul_model_mixed.json index 40aa2a1bfc8..7080679f2b8 100644 --- a/tests/onnx/data/reference_scales/activation_matmul_model_mixed.json +++ b/tests/onnx/data/reference_scales/activation_matmul_model_mixed.json @@ -1,7 +1,7 @@ { "QuantizeLinear_X_1": { - "scale": 0.0007594539201818407, - "zero_point": 0 + "scale": 0.006937139667570591, + "zero_point": -11 }, "QuantizeLinear_Y_1": { "scale": 0.006937139667570591, diff --git a/tests/onnx/data/reference_scales/activation_matmul_model_performance.json b/tests/onnx/data/reference_scales/activation_matmul_model_performance.json index 1d0b29a44dc..0b82b1366b8 100644 --- a/tests/onnx/data/reference_scales/activation_matmul_model_performance.json +++ b/tests/onnx/data/reference_scales/activation_matmul_model_performance.json @@ -1,6 +1,6 @@ { "QuantizeLinear_X_1": { - "scale": 0.0021499551367014647, + "scale": 0.00749011617153883, "zero_point": 0 }, "QuantizeLinear_Y_1": { From 4a44a1c850adfbea7b7b09b04803b6cfbaa8b6f0 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 26 Jan 2024 14:08:09 +0100 Subject: [PATCH 052/108] fix tests --- tests/common/test_statistics_aggregator.py | 89 +++++++++---------- .../test_templates/test_batch_size.py | 4 +- .../test_templates/test_ptq_params.py | 8 +- 3 files changed, 51 insertions(+), 50 deletions(-) diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py index e7539d598bf..ae1e92ef7f3 100644 --- a/tests/common/test_statistics_aggregator.py +++ b/tests/common/test_statistics_aggregator.py @@ -22,7 +22,6 @@ from nncf.common.factory import NNCFGraphFactory from nncf.common.graph.transformations.commands import TargetPoint from nncf.common.graph.transformations.commands import TargetType -from nncf.common.quantization.initialization.range import RangeInitCollectorParams from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.tensor_statistics.statistic_point import StatisticPoint @@ -32,6 +31,7 @@ from nncf.experimental.common.tensor_statistics.collectors import TensorReducerBase from nncf.quantization.algorithms.bias_correction.backend import BiasCorrectionAlgoBackend from nncf.quantization.algorithms.fast_bias_correction.backend import FastBiasCorrectionAlgoBackend +from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend from nncf.quantization.range_estimator import AggregatorType from nncf.quantization.range_estimator import RangeEstimatorParameters @@ -119,6 +119,10 @@ def reducers_map(self) -> List[TensorReducerBase]: def dataset_values(self): return [{"max": 1, "min": -10}, {"max": 0.1, "min": -1}, {"max": 128, "min": -128}] + @staticmethod + def get_min_max_algo_cls() -> Type[MinMaxQuantization]: + return MinMaxQuantization + @dataclass class MinMaxTestParameters: range_estimator_params: RangeEstimatorParameters @@ -626,24 +630,21 @@ def filter_func(point): @classmethod def create_statistics_point( - cls, model, q_config, target_point, subset_size, algorithm_name, inplace_statistics, range_estimator + cls, model, q_config, target_point, subset_size, algorithm_name, inplace_statistics, range_estimator, algo=None ): - algo_backend = cls.get_min_max_algo_backend_cls() - nncf_graph = NNCFGraphFactory.create(model) - - collector_params = RangeInitCollectorParams( - is_weights=target_point.is_weight_target_point(), - scheme=q_config.mode, - per_channel=q_config.per_channel, - ) - tensor_collector = algo_backend.get_statistic_collector( - range_estimator, - nncf_graph=nncf_graph, - target_point=target_point, - collector_params=collector_params, - num_samples=subset_size, - inplace=inplace_statistics, + algo = ( + cls.get_min_max_algo_cls()( + subset_size=subset_size, + inplace_statistics=inplace_statistics, + activations_range_estimator_params=range_estimator, + ) + if algo is None + else algo ) + algo._set_backend_entity(model) + nncf_graph = NNCFGraphFactory.create(model) + algo._subset_size = subset_size + tensor_collector = algo._get_stat_collector(nncf_graph, target_point, q_config, is_many_samples=False) return StatisticPoint(target_point=target_point, tensor_collector=tensor_collector, algorithm=algorithm_name) @pytest.mark.parametrize( @@ -752,38 +753,31 @@ def test_statistic_merging(self, test_params, key, dataset_samples, inplace_stat quantizer_config = QuantizerConfig(mode=QuantizationMode.SYMMETRIC, per_channel=False) statistics_points = StatisticPointsContainer() - collectors_and_refs = [] - algo_backend = self.get_min_max_algo_backend_cls() target_point_cls = self.get_target_point_cls() + sp_and_refs = [] for target_point_args, ref in self.MERGED_TARGET_POINT_AND_REFS[key]: target_point = target_point_cls(*target_point_args) - collector_params = RangeInitCollectorParams( - is_weights=target_point.is_weight_target_point(), - scheme=quantizer_config.mode, - per_channel=quantizer_config.per_channel, - ) - min_max_tensor_collector = algo_backend.get_statistic_collector( - RangeEstimatorParametersSet.MINMAX, - nncf_graph=nncf_graph, - target_point=target_point, - collector_params=collector_params, - num_samples=len(dataset_samples), - inplace=inplace_statistics, - ) - mean_min_max_tensor_collector = algo_backend.get_statistic_collector( - RangeEstimatorParametersSet.MEAN_MINMAX, - nncf_graph=nncf_graph, - target_point=target_point, - collector_params=collector_params, - num_samples=len(dataset_samples), - inplace=inplace_statistics, - ) - - for tensor_collector in [min_max_tensor_collector, mean_min_max_tensor_collector]: - stat_point = StatisticPoint(target_point, tensor_collector, "TEST") - statistics_points.add_statistic_point(stat_point) - collectors_and_refs.append((min_max_tensor_collector, ref["min_max"])) - collectors_and_refs.append((mean_min_max_tensor_collector, ref["mean_min_max"])) + for estimator, ref_val in ( + (RangeEstimatorParametersSet.MINMAX, ref["min_max"]), + (RangeEstimatorParametersSet.MEAN_MINMAX, ref["mean_min_max"]), + ): + algo = self.get_min_max_algo_cls()( + subset_size=len(dataset_samples), + inplace_statistics=inplace_statistics, + activations_range_estimator_params=estimator, + ) + s_p = self.create_statistics_point( + model, + quantizer_config, + target_point, + len(dataset_samples), + "TEST", + inplace_statistics, + estimator, + algo, + ) + statistics_points.add_statistic_point(s_p) + sp_and_refs.append((s_p, ref_val)) dataset = self.get_dataset(dataset_samples) statistics_aggregator = self.get_statistics_aggregator(dataset) @@ -798,7 +792,8 @@ def test_statistic_merging(self, test_params, key, dataset_samples, inplace_stat statistics_aggregator.register_statistic_points(statistics_points) statistics_aggregator.collect_statistics(model, nncf_graph) - for collector, ref in collectors_and_refs: + for sp, ref in sp_and_refs: + collector = sp.algorithm_to_tensor_collectors["TEST"][0] stat = collector.get_statistics() assert np.allclose(stat.min_values, ref[0]) assert np.allclose(stat.max_values, ref[1]) diff --git a/tests/post_training/test_templates/test_batch_size.py b/tests/post_training/test_templates/test_batch_size.py index cc5f94b8db6..adbac996e69 100644 --- a/tests/post_training/test_templates/test_batch_size.py +++ b/tests/post_training/test_templates/test_batch_size.py @@ -40,9 +40,9 @@ def create_dataset(self, lenght, batch_size): @pytest.mark.parametrize( ("test_data"), - ( + ( # BAD TEST [ # batch_size | dataset_len | stat_subset_size | ref_calibration_samples_num | ref_iterations_num - DataForTest(None, None, None, None, None), # None is None + # DataForTest(None, None, None, None, None), # None is None DataForTest(1, 1000, 300, 300, 300), DataForTest(10, 1000, 300, 300, 30), DataForTest(300, 1000, 300, 300, 1), diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py index 4d43bce557d..d7d96c8bb9a 100644 --- a/tests/post_training/test_templates/test_ptq_params.py +++ b/tests/post_training/test_templates/test_ptq_params.py @@ -47,6 +47,11 @@ from tests.common.quantization.mock_graphs import get_nncf_graph_from_mock_nx_graph +class MockdedDataset: + def get_batch_size(self): + return 1 + + class ModelToTestOverflowFix: # Input_1 Input_2 # | | @@ -140,7 +145,8 @@ def test_range_estimator_per_tensor(self, test_params, range_estimator_params): assert min_max_algo._range_estimator_params[QuantizerGroup.ACTIVATIONS] == range_estimator_params params = test_params["test_range_estimator_per_tensor"] - stat_points = min_max_algo.get_statistic_points(params["model"], params["nncf_graph"], None) + + stat_points = min_max_algo.get_statistic_points(params["model"], params["nncf_graph"], MockdedDataset()) assert len(stat_points) == params["stat_points_num"] for _, stat_point in stat_points.items(): From f77f59b57c6d43a30dfb2bd02eef47861a72c939 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 26 Jan 2024 17:17:29 +0100 Subject: [PATCH 053/108] fix OV test --- tests/common/test_statistics_aggregator.py | 46 ++++++++++++---------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py index c8d4910a04b..432c7122bc3 100644 --- a/tests/common/test_statistics_aggregator.py +++ b/tests/common/test_statistics_aggregator.py @@ -386,6 +386,7 @@ def test_statistics_aggregator_min_max( dataset_samples, inplace_statistics, is_backend_support_custom_estimators, + mocker, ): model = self.get_backend_model(dataset_samples) quantizer_config = QuantizerConfig( @@ -409,6 +410,7 @@ def test_statistics_aggregator_min_max( algorithm_name, inplace_statistics, test_parameters.range_estimator_params, + mocker, ) statistics_points = StatisticPointsContainer() statistics_points.add_statistic_point(statistic_point) @@ -630,16 +632,15 @@ def filter_func(point): @classmethod def create_statistics_point( - cls, model, q_config, target_point, subset_size, algorithm_name, inplace_statistics, range_estimator, algo=None + cls, model, q_config, target_point, subset_size, algorithm_name, inplace_statistics, range_estimator, mocker ): - algo = ( - cls.get_min_max_algo_cls()( - subset_size=subset_size, - inplace_statistics=inplace_statistics, - activations_range_estimator_params=range_estimator, - ) - if algo is None - else algo + _ = mocker.patch( + "nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization._get_range_estimator_parameters", + return_value=range_estimator, + ) + algo = cls.get_min_max_algo_cls()( + subset_size=subset_size, + inplace_statistics=inplace_statistics, ) algo._set_backend_entity(model) nncf_graph = NNCFGraphFactory.create(model) @@ -657,7 +658,7 @@ def create_statistics_point( ), ), ) - def test_statistics_merging_simple(self, dataset_samples, inplace_statistics, statistic_point_params): + def test_statistics_merging_simple(self, dataset_samples, inplace_statistics, statistic_point_params, mocker): model = self.get_backend_model(dataset_samples) quantizer_config = QuantizerConfig(mode=QuantizationMode.SYMMETRIC, per_channel=False) subset_size = len(dataset_samples) @@ -670,7 +671,14 @@ def test_statistics_merging_simple(self, dataset_samples, inplace_statistics, st ref_val[algorithm_name] = (ref_min_val, ref_max_val) target_point = self.get_target_point(target_point_type) statistics_point = self.create_statistics_point( - model, quantizer_config, target_point, subset_size, algorithm_name, inplace_statistics, range_estimator + model, + quantizer_config, + target_point, + subset_size, + algorithm_name, + inplace_statistics, + range_estimator, + mocker, ) statistics_points.add_statistic_point(statistics_point) @@ -746,7 +754,7 @@ def _check_shared_convs_merged_stats(cls, merged_statistics): } @pytest.mark.parametrize("key", ["split_concat", "shared_conv"]) - def test_statistic_merging(self, test_params, key, dataset_samples, inplace_statistics): + def test_statistic_merging(self, test_params, key, dataset_samples, inplace_statistics, mocker): params = test_params["test_statistic_merging"][key] model = params["model"](dataset_samples) nncf_graph = NNCFGraphFactory.create(model) @@ -761,11 +769,6 @@ def test_statistic_merging(self, test_params, key, dataset_samples, inplace_stat (RangeEstimatorParametersSet.MINMAX, ref["min_max"]), (RangeEstimatorParametersSet.MEAN_MINMAX, ref["mean_min_max"]), ): - algo = self.get_min_max_algo_cls()( - subset_size=len(dataset_samples), - inplace_statistics=inplace_statistics, - activations_range_estimator_params=estimator, - ) s_p = self.create_statistics_point( model, quantizer_config, @@ -774,7 +777,7 @@ def test_statistic_merging(self, test_params, key, dataset_samples, inplace_stat "TEST", inplace_statistics, estimator, - algo, + mocker, ) statistics_points.add_statistic_point(s_p) sp_and_refs.append((s_p, ref_val)) @@ -866,7 +869,7 @@ def product_dict(**kwargs): ), ), ) - def test_register_statistics(self, dataset_samples, statistic_point_params): + def test_register_statistics(self, dataset_samples, statistic_point_params, mocker): model = self.get_backend_model(dataset_samples) quantizer_config = QuantizerConfig(mode=QuantizationMode.SYMMETRIC, per_channel=False) statistics_points = StatisticPointsContainer() @@ -877,7 +880,7 @@ def test_register_statistics(self, dataset_samples, statistic_point_params): ref_val[algorithm_name] = subset_size target_point = self.get_target_point(target_point_type) statistics_point = self.create_statistics_point( - model, quantizer_config, target_point, subset_size, algorithm_name, True, range_estimator + model, quantizer_config, target_point, subset_size, algorithm_name, True, range_estimator, mocker ) statistics_points.add_statistic_point(statistics_point) @@ -893,7 +896,7 @@ def test_register_statistics(self, dataset_samples, statistic_point_params): ref_subset_size = subset_size assert statistics_aggregator.stat_subset_size == ref_subset_size - def test_collect_with_empty_dataset(self, dataset_samples): + def test_collect_with_empty_dataset(self, dataset_samples, mocker): model = self.get_backend_model(dataset_samples) dataset_samples = [] dataset = self.get_dataset(dataset_samples) @@ -911,6 +914,7 @@ def test_collect_with_empty_dataset(self, dataset_samples): algorithm_name, inplace_statistics, RangeEstimatorParametersSet.MEAN_MINMAX, + mocker, ) statistics_points = StatisticPointsContainer() statistics_points.add_statistic_point(statistic_point) From 43fd72954b55705d453dfce63219f9bc52207f64 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 29 Jan 2024 11:59:08 +0100 Subject: [PATCH 054/108] add warning for model_type=transformer --- nncf/common/tensor_statistics/aggregator.py | 4 ++-- nncf/quantization/algorithms/min_max/algorithm.py | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 19cc488702d..2c8b847ef89 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -76,8 +76,8 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: return if self.batch_size > 1 and self.is_model_batch_size_limited_support(graph): nncf_logger.warning( - "The batch size > 1 for the particular model can lead to inaccurate collected statistics . \ - To get the appropriate statistics it is recommended to use batch size = 1." + "For the particular model the batch size > 1 can lead to inaccurate collected statistics . \ + The recomendation is to use batch_size = 1." ) model_transformer = factory.ModelTransformerFactory.create(model) merged_statistics = self._get_merged_statistic_points(self.statistic_points, model, graph) diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index 5f462581ade..597bb2b46ad 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -883,8 +883,13 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset self._reset_cache() quantization_target_points, _ = self._get_quantization_target_points(model, graph) output = StatisticPointsContainer() + is_many_samples = dataset.get_batch_size() is not None and dataset.get_batch_size() > 1 + if self._model_type == ModelType.TRANSFORMER and is_many_samples: + nncf_logger.warning( + "For transfomer-like models batch_size > 1 could result in inaccurate statistics. \ + The recomendation is to use batch_size = 1." + ) for quantization_target_point, qconfig in quantization_target_points.items(): - is_many_samples = dataset.get_batch_size() is not None and dataset.get_batch_size() > 1 stat_collector = self._get_stat_collector(graph, quantization_target_point, qconfig, is_many_samples) output.add_statistic_point( StatisticPoint( From c20f7d3deb39aeadb6936bbc228716186025b8d6 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 29 Jan 2024 12:01:56 +0100 Subject: [PATCH 055/108] fix torch test --- nncf/torch/quantization/init_range.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/nncf/torch/quantization/init_range.py b/nncf/torch/quantization/init_range.py index c8810460387..4b155884041 100644 --- a/nncf/torch/quantization/init_range.py +++ b/nncf/torch/quantization/init_range.py @@ -108,12 +108,7 @@ def __init__( self._channel_idx = channel_idx def get_reduction_aggregation_axes(self, per_sample_stats: bool) -> Tuple[ReductionAxes, AggregationAxes]: - reduction_axes, aggregation_axes = super().get_reduction_aggregation_axes( - self._input_shape, (self._channel_idx,) - ) - if per_sample_stats: - return (0,) + reduction_axes, (0,) - return reduction_axes, aggregation_axes + return super().get_reduction_aggregation_axes(self._input_shape, (self._channel_idx,), per_sample_stats) class StatCollectorGenerator: From 52203f07231143664e41e3a52126f76afdcb4a77 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 29 Jan 2024 14:38:36 +0100 Subject: [PATCH 056/108] fix torch tests --- nncf/torch/quantization/init_range.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nncf/torch/quantization/init_range.py b/nncf/torch/quantization/init_range.py index 4b155884041..87eb97cdf8e 100644 --- a/nncf/torch/quantization/init_range.py +++ b/nncf/torch/quantization/init_range.py @@ -107,8 +107,10 @@ def __init__( self._input_shape = input_shape self._channel_idx = channel_idx - def get_reduction_aggregation_axes(self, per_sample_stats: bool) -> Tuple[ReductionAxes, AggregationAxes]: - return super().get_reduction_aggregation_axes(self._input_shape, (self._channel_idx,), per_sample_stats) + def get_reduction_aggregation_axes(self, is_per_sample: bool) -> Tuple[ReductionAxes, AggregationAxes]: + if self.is_per_channel: + return super().get_reduction_aggregation_axes(self._input_shape, (self._channel_idx,), is_per_sample) + return super().get_reduction_aggregation_axes(self._input_shape, (), is_per_sample) class StatCollectorGenerator: @@ -157,7 +159,6 @@ def generate_stat_collector_for_range_init_config( use_per_sample_stats = collector_params.use_per_sample_stats(init_config.init_type == "mixed_min_max") reduction_axes, aggregation_axes = collector_params.get_reduction_aggregation_axes(use_per_sample_stats) - if init_config.init_type == "min_max": return get_min_max_statistic_collector( use_abs_max=collector_params.use_abs_max, From 48c8426f521a62c102b77df51c07bb5114166d8e Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 30 Jan 2024 10:14:20 +0100 Subject: [PATCH 057/108] final fix torch test --- tests/torch/test_statistics_aggregator.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/torch/test_statistics_aggregator.py b/tests/torch/test_statistics_aggregator.py index 11e5ce70942..10dfc149c1c 100644 --- a/tests/torch/test_statistics_aggregator.py +++ b/tests/torch/test_statistics_aggregator.py @@ -168,6 +168,7 @@ def test_successive_statistics_aggregation( dataset_samples, inplace_statistics, is_backend_support_custom_estimators, + mocker, ): is_stat_in_shape_of_scale = True model = self.get_backend_model(dataset_samples) @@ -194,7 +195,7 @@ def fn(x): ### Register and collect statistics after inserted operations statistic_points = self.__get_statistic_points( - test_parameters, model, quantizer_config, dataset_samples, inplace_statistics + test_parameters, model, quantizer_config, dataset_samples, inplace_statistics, mocker ) tensor_collector = self.__collect_statistics_get_collector(statistic_points, model, dataset_samples) ### Check values are changed because of the inserted operation @@ -254,6 +255,7 @@ def test_nested_statistics_aggregation( dataset_samples, inplace_statistics, is_backend_support_custom_estimators, + mocker, ): is_stat_in_shape_of_scale = True model = self.get_backend_model(dataset_samples) @@ -283,11 +285,7 @@ def fn(x): ### Register and collect statistics after inserted operations statistic_points = self.__get_statistic_points( - test_parameters, - model, - quantizer_config, - dataset_samples, - inplace_statistics, + test_parameters, model, quantizer_config, dataset_samples, inplace_statistics, mocker ) tensor_collector = self.__collect_statistics_get_collector(statistic_points, model, dataset_samples) ### Check values are changed because of the inserted operation @@ -312,7 +310,7 @@ def __add_fn_to_model(model, target_point, fn): @classmethod def __get_statistic_points( - cls, test_parameters: MinMaxTestParameters, model, quantizer_config, dataset_samples, inplace_statistics + cls, test_parameters: MinMaxTestParameters, model, quantizer_config, dataset_samples, inplace_statistics, mocker ) -> StatisticPointsContainer: statistics_points = StatisticPointsContainer() for target_type in [test_parameters.target_type]: @@ -325,6 +323,7 @@ def __get_statistic_points( "TEST_ALGO", inplace_statistics, test_parameters.range_estimator_params, + mocker, ) statistics_points.add_statistic_point(statistic_point) return statistics_points From 3fe8a37b31699b3ed49b0b0eca7738c720617c25 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 30 Jan 2024 10:37:50 +0100 Subject: [PATCH 058/108] comments --- .../quantization/initialization/range.py | 21 +++++++++++-------- nncf/common/tensor_statistics/aggregator.py | 9 ++++---- nncf/onnx/graph/metatypes/groups.py | 5 +++-- nncf/openvino/graph/metatypes/groups.py | 6 ++++-- .../pipelines/causal_language_model.py | 3 ++- .../pipelines/masked_language_modeling.py | 3 ++- tests/post_training/test_templates/helpers.py | 1 + 7 files changed, 29 insertions(+), 19 deletions(-) diff --git a/nncf/common/quantization/initialization/range.py b/nncf/common/quantization/initialization/range.py index 635c1b68087..7f232bad99e 100644 --- a/nncf/common/quantization/initialization/range.py +++ b/nncf/common/quantization/initialization/range.py @@ -209,15 +209,18 @@ def use_means_of_maxs(self) -> bool: return not self._is_weights and not self._is_per_channel def _get_reduction_axes( - self, shape_to_reduce: List[int], quantization_axes: Union[Tuple[int], List[int]], aggregation_axes: List[int] + self, + shape_to_reduce: Union[Tuple[int], List[int]], + quantization_axes: Union[Tuple[int], List[int]], + aggregation_axes: Union[Tuple[int], List[int]], ): """ - TODO + Returns axes for a reducer. - :param shape_to_reduce: - :param quantization_axes: - :param aggregation_axes: - :return: + :param shape_to_reduce: Shape of a reduced tensor. + :param quantization_axes: Axes of quantization. + :param aggregation_axes: Axes of aggregator which is applied onto reduced tensor. + :return: Axes for reducer. """ axes_to_keep = set(el - 1 for el in aggregation_axes if el != 0) axes_to_keep.update(quantization_axes) @@ -225,10 +228,10 @@ def _get_reduction_axes( def _get_aggregation_axes(self, is_per_sample: bool) -> Tuple[int]: """ - TODO + Returns axes for aggregator. - :param bool is_per_sample: _description_ - :return Tuple[int]: _description_ + :param is_per_sample: Whether to aggreagate tensor statistics per batch axis. + :return Tuple[int]: Aggregation axes. """ return (0, 1) if is_per_sample else (0,) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 2c8b847ef89..88e7f03a8b8 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -74,7 +74,7 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: """ if not self.statistic_points: return - if self.batch_size > 1 and self.is_model_batch_size_limited_support(graph): + if self.batch_size > 1 and self.is_model_has_no_batch_axis(graph): nncf_logger.warning( "For the particular model the batch size > 1 can lead to inaccurate collected statistics . \ The recomendation is to use batch_size = 1." @@ -118,7 +118,7 @@ def register_statistic_points(self, statistic_points: StatisticPointsContainer) elif tensor_collector.num_samples is not None: self.stat_subset_size = max(self.stat_subset_size, tensor_collector.num_samples) - def is_model_batch_size_limited_support(self, graph: NNCFGraph) -> bool: + def is_model_has_no_batch_axis(self, graph: NNCFGraph) -> bool: """ Returns True if NNCFGraph contains metatypes with no batch axis in output tensor. @@ -133,8 +133,9 @@ def is_model_batch_size_limited_support(self, graph: NNCFGraph) -> bool: def metatypes_output_has_no_batch_axis(self) -> List[OperatorMetatype]: """ These metatypes mix outputs for different samples into one axis. - When reducers reduce the tensor they get only 1 value instead of batch_size values. - This leads to inaccurate statistics. + If reducers and aggregators collect statistics at the output of the following operations, + assuming that 0-axis is batch axis, they get only 1 value instead of batch_size values. + It could lead to inaccurate/incorrect statistics result. """ @abstractmethod diff --git a/nncf/onnx/graph/metatypes/groups.py b/nncf/onnx/graph/metatypes/groups.py index 5eea302304f..2725d43b294 100644 --- a/nncf/onnx/graph/metatypes/groups.py +++ b/nncf/onnx/graph/metatypes/groups.py @@ -123,8 +123,9 @@ ] # These metatypes mix outputs for different samples into one axis. -# When reducers reduce the tensor they get only 1 value instead of batch_size values. -# This leads to inaccurate statistics. +# If reducers and aggregators collect statistics at the output of the following operations, +# assuming that 0-axis is batch axis, they get only 1 value instead of batch_size values. +# It could lead to inaccurate/incorrect statistics result. OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS = [ onnx_metatypes.ONNXROIAlignMetatype, onnx_metatypes.ONNXEmbeddingMetatype, diff --git a/nncf/openvino/graph/metatypes/groups.py b/nncf/openvino/graph/metatypes/groups.py index 67e62fdaa2a..cf92932741a 100644 --- a/nncf/openvino/graph/metatypes/groups.py +++ b/nncf/openvino/graph/metatypes/groups.py @@ -199,10 +199,12 @@ ] # These metatypes mix outputs for different samples into one axis. -# When reducers reduce the tensor they get only 1 value instead of batch_size values. -# This leads to inaccurate statistics. +# If reducers and aggregators collect statistics at the output of the following operations, +# assuming that 0-axis is batch axis, they get only 1 value instead of batch_size values. +# It could lead to inaccurate/incorrect statistics result. OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS = [ ov_metatypes.OVSpaceToBatchMetatype, + ov_metatypes.OVBatchToSpaceMetatype, ov_metatypes.OVROIPoolingMetatype, ov_metatypes.OVROIAlignMetatype, ov_metatypes.OVEmbeddingMetatype, diff --git a/tests/post_training/pipelines/causal_language_model.py b/tests/post_training/pipelines/causal_language_model.py index 1a711cd4ff8..14733e43a0e 100644 --- a/tests/post_training/pipelines/causal_language_model.py +++ b/tests/post_training/pipelines/causal_language_model.py @@ -41,7 +41,8 @@ def transform_func(examples): def prepare_calibration_dataset(self): if self.batch_size > 1: - print("Batch size > 1 is not supported for causal language models. Batch size = 1 will be used.") + print("Batch size > 1 is not supported for causal language models. Batch size = 1 is set.") + self.batch_size = 1 quantizer = OVQuantizer.from_pretrained(self.model_hf) num_samples = self.ptq_params.get("subset_size", 300) diff --git a/tests/post_training/pipelines/masked_language_modeling.py b/tests/post_training/pipelines/masked_language_modeling.py index 4c5db140497..ebeed3e5be4 100644 --- a/tests/post_training/pipelines/masked_language_modeling.py +++ b/tests/post_training/pipelines/masked_language_modeling.py @@ -87,7 +87,8 @@ def transform_func(data): def prepare_calibration_dataset(self): if self.batch_size > 1: - print("Batch size > 1 is not supported for masked language models. Batch size = 1 will be used.") + print("Batch size > 1 is not supported for masked language models. Batch size = 1 is set.") + self.batch_size = 1 quantizer = OVQuantizer.from_pretrained(self.model_hf) num_samples = self.ptq_params.get("subset_size", 300) diff --git a/tests/post_training/test_templates/helpers.py b/tests/post_training/test_templates/helpers.py index d0c0940e922..9f2c9a5b6e5 100644 --- a/tests/post_training/test_templates/helpers.py +++ b/tests/post_training/test_templates/helpers.py @@ -58,6 +58,7 @@ def get_static_dataset(input_size: Tuple, transform_fn: Callable, fn_to_type: Ca :param input_size: Size of generated tensors, :param transform_fn: Function to transformation dataset. :param fn_to_type: Function, defaults to None. + :param lenght: The lenght of dataset. :return: Instance of nncf.Dataset for StaticDatasetMock. """ return Dataset( From d228589906a3af0f599f3036585eebdfec9fff11 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 30 Jan 2024 10:51:57 +0100 Subject: [PATCH 059/108] comments x2 --- nncf/common/graph/utils.py | 15 +++++---- .../algorithms/min_max/algorithm.py | 2 +- .../algorithms/min_max/backend.py | 31 +++++++++---------- .../algorithms/min_max/onnx_backend.py | 6 ++-- .../algorithms/min_max/openvino_backend.py | 6 ++-- .../algorithms/min_max/torch_backend.py | 6 ++-- nncf/torch/graph/operator_metatypes.py | 5 +-- 7 files changed, 31 insertions(+), 40 deletions(-) diff --git a/nncf/common/graph/utils.py b/nncf/common/graph/utils.py index 6b90e365721..c3672ed7ef4 100644 --- a/nncf/common/graph/utils.py +++ b/nncf/common/graph/utils.py @@ -10,14 +10,13 @@ # limitations under the License. from functools import partial -from typing import List, Set, Union +from typing import List, Set, Tuple, Union from nncf.common.graph import NNCFGraph from nncf.common.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.logging import nncf_logger from nncf.common.pruning.utils import traverse_function -from nncf.common.tensor_statistics.collectors import ReductionAxes def get_concat_axis(input_shapes: List[List[int]], output_shapes: List[List[int]]) -> int: @@ -117,15 +116,15 @@ def get_number_of_quantized_ops( return len(quantized_ops) -def get_reduction_axes(axes_to_keep: Union[List[int], ReductionAxes], shape_to_reduce: List[int]) -> ReductionAxes: +def get_reduction_axes(channel_axes: Union[List[int], Tuple[int]], shape: Union[List[int], Tuple[int]]) -> Tuple[int]: """ - Returns reduction axes without axes needed to keep. + Returns filtered reduction axes without axes that corresponds channels. - :param axes_to_keep: Axes to keep. - :param shape_to_reduce: Shape to reduce. + :param channel_axes: Channel axes. + :param shape: Shape that need to be filtered. :return: Reduction axes. """ - reduction_axes = list(range(len(shape_to_reduce))) - for channel_axis in sorted(axes_to_keep, reverse=True): + reduction_axes = list(range(len(shape))) + for channel_axis in sorted(channel_axes, reverse=True): del reduction_axes[channel_axis] return tuple(reduction_axes) diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index 597bb2b46ad..ab8281a0da9 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -373,7 +373,7 @@ def _get_stat_collector( node = graph.get_node_by_name(target_point.target_node_name) shape = self._backend_entity.get_target_point_shape(graph, node, target_point) - channel_axes = self._backend_entity.get_channel_axes(node, target_point, is_weight, qconfig.per_channel) + channel_axes = self._backend_entity.get_channel_axes(node, target_point, qconfig.per_channel) range_estimator_params = self._get_range_estimator_parameters(target_point, qconfig) diff --git a/nncf/quantization/algorithms/min_max/backend.py b/nncf/quantization/algorithms/min_max/backend.py index 2dd65209884..64f022d4aba 100644 --- a/nncf/quantization/algorithms/min_max/backend.py +++ b/nncf/quantization/algorithms/min_max/backend.py @@ -186,27 +186,24 @@ def unify_statistics(statistics: List[MinMaxTensorStatistic]) -> MinMaxTensorSta @abstractmethod def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: TargetPoint) -> List[int]: """ - TODO + Returns shape of a targer point tensor. - :param NNCFGraph nncf_graph: - :param NNCFNode node: - :param TargetPoint target_point: - :return List[int]: + :param nncf_graph: NNCFGraph instance. + :param node: NNCFNode. + :param target_point: Target point of which tensor shape is seeked. + :return: Shape of target point tensor. """ @staticmethod @abstractmethod - def get_channel_axes( - node: NNCFNode, target_point: TargetPoint, is_weight: bool, is_per_channel: bool - ) -> Tuple[int]: + def get_channel_axes(node: NNCFNode, target_point: TargetPoint, is_per_channel: bool) -> Tuple[int]: """ - TODO + Returns axes for per-channel quantization. - :param NNCFNode node: - :param TargetPoint target_point: - :param bool is_weight: - :param bool is_per_channel: - :return Tuple[int]: + :param node: Quantized node. + :param target_point: Corresponding target point. + :param is_per_channel: Is per-channel quantization or not. + :return: Axes for per-channel quantization. """ @staticmethod @@ -223,9 +220,9 @@ def get_statistic_collector( Returns backend-specific statistic collector. :param range_estimator_params: Parameters that specify estimators types. - :param use_abs_max: - :param reduction_axes: TODO - :param aggregation_axes: + :param use_abs_max: Wheather reduce absolute values of input tensors or not. + :param reduction_axes: Axes for reducer. + :param aggregation_axes: Axes for aggregator. :param inplace: Whether to calculate statistic inplace or not. :param num_samples: Maximum number of samples to collect. :return: Backend-specific TensorStatisticCollectorBase for the statistics calculation. diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py index 52f38cb1321..c5bbd66f75d 100644 --- a/nncf/quantization/algorithms/min_max/onnx_backend.py +++ b/nncf/quantization/algorithms/min_max/onnx_backend.py @@ -161,12 +161,10 @@ def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: return get_quantized_tensor_shape(nncf_graph, node, target_point) @staticmethod - def get_channel_axes( - node: NNCFNode, target_point: ONNXTargetPoint, is_weight: bool, is_per_channel: bool - ) -> Tuple[int]: + def get_channel_axes(node: NNCFNode, target_point: ONNXTargetPoint, is_per_channel: bool) -> Tuple[int]: if not is_per_channel: return () - if is_weight: + if target_point.is_weight_target_point(): return (get_weight_quantization_axis(node, target_point.port_id),) return (1,) diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py index e0a74eac6cd..947659127ca 100644 --- a/nncf/quantization/algorithms/min_max/openvino_backend.py +++ b/nncf/quantization/algorithms/min_max/openvino_backend.py @@ -148,12 +148,10 @@ def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: raise NotImplementedError(f"Unsupported target point type {target_point.type}.") @staticmethod - def get_channel_axes( - node: NNCFNode, target_point: OVTargetPoint, is_weight: bool, is_per_channel: bool - ) -> Tuple[int]: + def get_channel_axes(node: NNCFNode, target_point: OVTargetPoint, is_per_channel: bool) -> Tuple[int]: if not is_per_channel: return () - if is_weight: + if target_point.is_weight_target_point(): return get_weight_channel_axes(node) return (1,) diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py index d5e45ef803c..6c0ca8221a1 100644 --- a/nncf/quantization/algorithms/min_max/torch_backend.py +++ b/nncf/quantization/algorithms/min_max/torch_backend.py @@ -163,12 +163,10 @@ def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: return nncf_graph.get_input_shape_for_insertion_point(target_point) @staticmethod - def get_channel_axes( - node: NNCFNode, target_point: PTTargetPoint, is_weight: bool, is_per_channel: bool - ) -> Tuple[int]: + def get_channel_axes(node: NNCFNode, target_point: PTTargetPoint, is_per_channel: bool) -> Tuple[int]: if not is_per_channel: return () - if is_weight: + if target_point.is_weight_target_point(): return (node.layer_attributes.get_target_dim_for_compression(),) return (1,) diff --git a/nncf/torch/graph/operator_metatypes.py b/nncf/torch/graph/operator_metatypes.py index 5777e45ee27..af4c575a7a9 100644 --- a/nncf/torch/graph/operator_metatypes.py +++ b/nncf/torch/graph/operator_metatypes.py @@ -1053,8 +1053,9 @@ def get_operator_metatypes() -> List[Type[OperatorMetatype]]: OP_NAMES_QUANTIZE_NODE = ["symmetric_quantize", "asymmetric_quantize"] # These metatypes mix outputs for different samples into one axis. -# When reducers reduce the tensor they get only 1 value instead of batch_size values. -# This leads to inaccurate statistics. +# If reducers and aggregators collect statistics at the output of the following operations, +# assuming that 0-axis is batch axis, they get only 1 value instead of batch_size values. +# It could lead to inaccurate/incorrect statistics result. OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS = [ PTEmbeddingMetatype, PTEmbeddingBagMetatype, From b7de5640c7da19dfe55925fc20267e611d698f7c Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 30 Jan 2024 10:59:12 +0100 Subject: [PATCH 060/108] comments x3 --- nncf/common/tensor_statistics/aggregator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 88e7f03a8b8..d312fcd7ae1 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -86,13 +86,13 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: engine = factory.EngineFactory.create(model_with_outputs) calibration_samples_num = self._get_total_statistics_samples() - iterataions_num = ( + iterations_num = ( self._get_iterations_num(calibration_samples_num) if calibration_samples_num is not None else None ) - if iterataions_num is not None and iterataions_num == 0: + if iterations_num is not None and iterations_num == 0: raise nncf.ValidationError("Batch size > length of dataset or batch size > stat_subset_size.") with track(total=calibration_samples_num, description="Statistics collection") as pbar: - for input_data in islice(self.dataset.get_inference_data(), iterataions_num): + for input_data in islice(self.dataset.get_inference_data(), iterations_num): outputs = engine.infer(input_data) processed_outputs = self._process_outputs(outputs) self._register_statistics(processed_outputs, merged_statistics) From 489d60306868e2288f0c4343e96fdf40c153c6d6 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 30 Jan 2024 13:00:53 +0100 Subject: [PATCH 061/108] fix tests after merge --- nncf/quantization/algorithms/min_max/onnx_backend.py | 2 +- nncf/quantization/algorithms/smooth_quant/torch_backend.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py index c5bbd66f75d..2a91c0242c8 100644 --- a/nncf/quantization/algorithms/min_max/onnx_backend.py +++ b/nncf/quantization/algorithms/min_max/onnx_backend.py @@ -125,7 +125,7 @@ def create_quantizer_insertion_command( tensor_type = np.int8 # The weight is restricted to have only signed range nncf_input_node_next_nodes = ONNXMinMaxAlgoBackend._get_input_edges_mapping(nncf_graph) node = nncf_graph.get_node_by_name(target_point.target_node_name) - axis = ONNXMinMaxAlgoBackend.get_channel_axes(node, target_point, is_weight, quantizer_config.per_channel) + axis = ONNXMinMaxAlgoBackend.get_channel_axes(node, target_point, quantizer_config.per_channel) if not axis: axis = None else: diff --git a/nncf/quantization/algorithms/smooth_quant/torch_backend.py b/nncf/quantization/algorithms/smooth_quant/torch_backend.py index a486be98a4f..3b125c196ee 100644 --- a/nncf/quantization/algorithms/smooth_quant/torch_backend.py +++ b/nncf/quantization/algorithms/smooth_quant/torch_backend.py @@ -19,12 +19,12 @@ from nncf.common.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType +from nncf.common.graph.utils import get_reduction_axes from nncf.common.quantization.quantizer_propagation.structs import QuantizationTrait from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.experimental.tensor import Tensor -from nncf.openvino.graph.node_utils import get_channel_agnostic_reduction_axes from nncf.openvino.graph.transformations.commands import OVMultiplyInsertionCommand from nncf.openvino.graph.transformations.commands import OVWeightUpdateCommand from nncf.quantization.algorithms.smooth_quant.backend import SmoothQuantAlgoBackend @@ -89,7 +89,7 @@ def get_activations_port_id(node: NNCFNode, nncf_graph: NNCFGraph) -> int: @staticmethod def get_channel_agnostic_reduction_axes(channel_axis: int, shape: Tuple[int]) -> Tuple[int]: - return get_channel_agnostic_reduction_axes([channel_axis], shape) + return get_reduction_axes([channel_axis], shape) @staticmethod def get_abs_max_channel_collector( From 1f0cb94325822788852ab5c90d9fc395ce92c003 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 31 Jan 2024 14:28:42 +0100 Subject: [PATCH 062/108] improve test --- .../test_tensor_collector_batch_size.py | 31 +++++++++++++++--- .../test_tensor_collector_batch_size.py | 30 +++-------------- .../test_tensor_collector_batch_size.py | 30 +++-------------- .../ptq/test_tensor_collector_batch_size.py | 32 +++---------------- 4 files changed, 40 insertions(+), 83 deletions(-) diff --git a/tests/common/experimental/test_tensor_collector_batch_size.py b/tests/common/experimental/test_tensor_collector_batch_size.py index a392c4c07c6..a7418f5910b 100644 --- a/tests/common/experimental/test_tensor_collector_batch_size.py +++ b/tests/common/experimental/test_tensor_collector_batch_size.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 Intel Corporation +# Copyright (c) 2024 Intel Corporation # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -50,13 +50,35 @@ def aggregators(self): def inplace(self): pass + @staticmethod @abstractmethod - def create_dataitems_without_batch_dim(self, input_shape: List[int], length: int = 100) -> List[np.ndarray]: + def to_backend_tensor(self, tensor: np.ndarray): pass - @abstractmethod + def create_dataitems_without_batch_dim(self, input_shape: List[int], length: int = 100) -> List[np.ndarray]: + rng = np.random.default_rng(seed=0) + data_items = [] + for _ in range(length): + data_items.append(rng.uniform(0, 1, input_shape)) + return data_items + def add_batch_dim_to_dataitems(self, data_items: List[np.ndarray], batch_size: int) -> List[np.ndarray]: - pass + assert batch_size >= 1 + dataset = [] + item = [] + cnt = 0 + for data_item in data_items: + if batch_size == 1: + dataset.append(np.expand_dims(data_item, 0)) + else: + item.append(data_item) + if cnt == batch_size - 1: + dataset.append(np.array(item)) + item = [] + cnt = -1 + cnt += 1 + + return dataset def _create_tensor_collector(self, shape, inplace, reducer, aggregator) -> TensorCollector: batch_axis = 0 @@ -75,6 +97,7 @@ def _create_tensor_collector(self, shape, inplace, reducer, aggregator) -> Tenso def _register_inputs(self, collector, dataitems, reducer): for item in dataitems: + item = self.to_backend_tensor(item) input_ = {hash(reducer): [self.get_nncf_tensor_class()(item)]} collector.register_inputs(input_) diff --git a/tests/onnx/quantization/test_tensor_collector_batch_size.py b/tests/onnx/quantization/test_tensor_collector_batch_size.py index 8904dcc096f..6b9a13addcb 100644 --- a/tests/onnx/quantization/test_tensor_collector_batch_size.py +++ b/tests/onnx/quantization/test_tensor_collector_batch_size.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 Intel Corporation +# Copyright (c) 2024 Intel Corporation # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -8,7 +8,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import List import numpy as np import pytest @@ -46,27 +45,6 @@ def aggregators(self, request) -> bool: def inplace(self, request): return request.param - def create_dataitems_without_batch_dim(self, input_shape: List[int], length: int = 100) -> List[np.ndarray]: - rng = np.random.default_rng(seed=0) - data_items = [] - for _ in range(length): - data_items.append(rng.uniform(0, 1, input_shape)) - return data_items - - def add_batch_dim_to_dataitems(self, data_items: List[np.ndarray], batch_size: int) -> List[np.ndarray]: - assert batch_size >= 1 - dataset = [] - item = [] - cnt = 0 - for data_item in data_items: - if batch_size == 1: - dataset.append(np.expand_dims(data_item, 0)) - else: - item.append(data_item) - if cnt == batch_size - 1: - dataset.append(np.array(item)) - item = [] - cnt = -1 - cnt += 1 - - return dataset + @staticmethod + def to_backend_tensor(tensor: np.ndarray): + return tensor diff --git a/tests/openvino/native/test_tensor_collector_batch_size.py b/tests/openvino/native/test_tensor_collector_batch_size.py index 04e4fa188ac..b9cf472e498 100644 --- a/tests/openvino/native/test_tensor_collector_batch_size.py +++ b/tests/openvino/native/test_tensor_collector_batch_size.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 Intel Corporation +# Copyright (c) 2024 Intel Corporation # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -8,7 +8,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import List import numpy as np import pytest @@ -46,27 +45,6 @@ def aggregators(self, request) -> bool: def inplace(self, request): return request.param - def create_dataitems_without_batch_dim(self, input_shape: List[int], length: int = 100) -> List[np.ndarray]: - rng = np.random.default_rng(seed=0) - data_items = [] - for _ in range(length): - data_items.append(rng.uniform(0, 1, input_shape)) - return data_items - - def add_batch_dim_to_dataitems(self, data_items: List[np.ndarray], batch_size: int) -> List[np.ndarray]: - assert batch_size >= 1 - dataset = [] - item = [] - cnt = 0 - for data_item in data_items: - if batch_size == 1: - dataset.append(np.expand_dims(data_item, 0)) - else: - item.append(data_item) - if cnt == batch_size - 1: - dataset.append(np.array(item)) - item = [] - cnt = -1 - cnt += 1 - - return dataset + @staticmethod + def to_backend_tensor(tensor: np.ndarray): + return tensor diff --git a/tests/torch/ptq/test_tensor_collector_batch_size.py b/tests/torch/ptq/test_tensor_collector_batch_size.py index 8f9b5582fc0..5beff90e67a 100644 --- a/tests/torch/ptq/test_tensor_collector_batch_size.py +++ b/tests/torch/ptq/test_tensor_collector_batch_size.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 Intel Corporation +# Copyright (c) 2024 Intel Corporation # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -8,8 +8,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import List +import numpy as np import pytest import torch @@ -46,28 +46,6 @@ def aggregators(self, request) -> bool: def inplace(self, request): return request.param - def create_dataitems_without_batch_dim(self, input_shape: List[int], length: int = 100) -> List[torch.TensorType]: - torch.random.manual_seed(seed=0) - - data_items = [] - for _ in range(length): - data_items.append(torch.rand(input_shape)) - return data_items - - def add_batch_dim_to_dataitems(self, data_items: List[torch.TensorType], batch_size: int) -> List[torch.TensorType]: - assert batch_size >= 1 - dataset = [] - item = [] - cnt = 0 - for data_item in data_items: - if batch_size == 1: - dataset.append(torch.unsqueeze(data_item, 0)) - else: - item.append(data_item) - if cnt == batch_size - 1: - dataset.append(torch.stack(item)) - item = [] - cnt = -1 - cnt += 1 - - return dataset + @staticmethod + def to_backend_tensor(tensor: np.ndarray): + return torch.tensor(tensor) From 532e8ebdbf3d9e6250a8b8b64ae46e259203093f Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 6 Feb 2024 15:47:06 +0100 Subject: [PATCH 063/108] fix test --- tests/common/test_statistics_aggregator.py | 164 ++++++++++----------- 1 file changed, 82 insertions(+), 82 deletions(-) diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py index 432c7122bc3..78877e27a8b 100644 --- a/tests/common/test_statistics_aggregator.py +++ b/tests/common/test_statistics_aggregator.py @@ -13,7 +13,7 @@ from dataclasses import dataclass from enum import Enum from itertools import product -from typing import Any, List, Type, Union +from typing import List, Tuple, Type, Union import numpy as np import pytest @@ -453,13 +453,16 @@ def filter_func(point): assert stat.max_values.shape == ref_shape @dataclass - class BCTestParameters: + class RawBCTestParameters: algo: BiasCorrectionAlgos - collector_type: BCStatsCollectors target_type: TargetType - ref_values: Any = None axis: int = 1 + @dataclass + class MeanBCTestParameters(RawBCTestParameters): + ref_values: np.ndarray = None + ref_shape: Tuple[int] = None + MEAN_ACT_AXIS_0_REF = np.array( [ [ @@ -484,112 +487,108 @@ class BCTestParameters: "test_params", [ # TargeType: activations - BCTestParameters( - BiasCorrectionAlgos.FAST_BIAS_CORRECTION, - BCStatsCollectors.MEAN, - TargetType.POST_LAYER_OPERATION, - (MEAN_ACT_AXIS_0_REF, (1, 3, 3, 3)), + MeanBCTestParameters( + algo=BiasCorrectionAlgos.FAST_BIAS_CORRECTION, + target_type=TargetType.POST_LAYER_OPERATION, + ref_values=MEAN_ACT_AXIS_0_REF, + ref_shape=(1, 3, 3, 3), axis=0, ), - BCTestParameters( - BiasCorrectionAlgos.BIAS_CORRECTION, - BCStatsCollectors.MEAN, - TargetType.POST_LAYER_OPERATION, - (MEAN_ACT_AXIS_0_REF, (1, 3, 3, 3)), + MeanBCTestParameters( + algo=BiasCorrectionAlgos.BIAS_CORRECTION, + target_type=TargetType.POST_LAYER_OPERATION, + ref_values=MEAN_ACT_AXIS_0_REF, + ref_shape=(1, 3, 3, 3), axis=0, ), - BCTestParameters( - BiasCorrectionAlgos.FAST_BIAS_CORRECTION, - BCStatsCollectors.MEAN, - TargetType.POST_LAYER_OPERATION, - (np.array((0.0, 0.45, 0.5)), (1, 3, 3, 3)), + MeanBCTestParameters( + algo=BiasCorrectionAlgos.FAST_BIAS_CORRECTION, + target_type=TargetType.POST_LAYER_OPERATION, + ref_values=(np.array((0.0, 0.45, 0.5))), + ref_shape=(1, 3, 3, 3), axis=1, ), - BCTestParameters( - BiasCorrectionAlgos.BIAS_CORRECTION, - BCStatsCollectors.MEAN, - TargetType.POST_LAYER_OPERATION, - (np.array((0.0, 0.45, 0.5)), (1, 3, 3, 3)), + MeanBCTestParameters( + algo=BiasCorrectionAlgos.BIAS_CORRECTION, + target_type=TargetType.POST_LAYER_OPERATION, + ref_values=(np.array((0.0, 0.45, 0.5))), + ref_shape=(1, 3, 3, 3), axis=1, ), - BCTestParameters( - BiasCorrectionAlgos.FAST_BIAS_CORRECTION, - BCStatsCollectors.MEAN, - TargetType.POST_LAYER_OPERATION, - (np.array([-0.04999995, 0.5, 0.5]), (1, 3, 3, 3)), + MeanBCTestParameters( + algo=BiasCorrectionAlgos.FAST_BIAS_CORRECTION, + target_type=TargetType.POST_LAYER_OPERATION, + ref_values=(np.array([-0.04999995, 0.5, 0.5])), + ref_shape=(1, 3, 3, 3), axis=2, ), - BCTestParameters( - BiasCorrectionAlgos.BIAS_CORRECTION, - BCStatsCollectors.MEAN, - TargetType.POST_LAYER_OPERATION, - (np.array([-0.04999995, 0.5, 0.5]), (1, 3, 3, 3)), + MeanBCTestParameters( + algo=BiasCorrectionAlgos.BIAS_CORRECTION, + target_type=TargetType.POST_LAYER_OPERATION, + ref_values=(np.array([-0.04999995, 0.5, 0.5])), + ref_shape=(1, 3, 3, 3), axis=2, ), - BCTestParameters( - BiasCorrectionAlgos.BIAS_CORRECTION, BCStatsCollectors.RAW, TargetType.POST_LAYER_OPERATION - ), + RawBCTestParameters(algo=BiasCorrectionAlgos.BIAS_CORRECTION, target_type=TargetType.POST_LAYER_OPERATION), # TargeType: weights - BCTestParameters( - BiasCorrectionAlgos.FAST_BIAS_CORRECTION, - BCStatsCollectors.MEAN, - TargetType.OPERATION_WITH_WEIGHTS, - (MEAN_WEIGHTS_AXIS_0_REF, (3, 3, 3, 3)), + MeanBCTestParameters( + algo=BiasCorrectionAlgos.FAST_BIAS_CORRECTION, + target_type=TargetType.OPERATION_WITH_WEIGHTS, + ref_values=(MEAN_WEIGHTS_AXIS_0_REF), + ref_shape=(3, 3, 3, 3), axis=0, ), - BCTestParameters( - BiasCorrectionAlgos.BIAS_CORRECTION, - BCStatsCollectors.MEAN, - TargetType.OPERATION_WITH_WEIGHTS, - (MEAN_WEIGHTS_AXIS_0_REF, (3, 3, 3, 3)), + MeanBCTestParameters( + algo=BiasCorrectionAlgos.BIAS_CORRECTION, + target_type=TargetType.OPERATION_WITH_WEIGHTS, + ref_values=(MEAN_WEIGHTS_AXIS_0_REF), + ref_shape=(3, 3, 3, 3), axis=0, ), - BCTestParameters( - BiasCorrectionAlgos.FAST_BIAS_CORRECTION, - BCStatsCollectors.MEAN, - TargetType.OPERATION_WITH_WEIGHTS, - (np.array([-0.36666664, -0.36666664, -0.36666664]), (3, 3, 3, 3)), + MeanBCTestParameters( + algo=BiasCorrectionAlgos.FAST_BIAS_CORRECTION, + target_type=TargetType.OPERATION_WITH_WEIGHTS, + ref_values=(np.array([-0.36666664, -0.36666664, -0.36666664])), + ref_shape=(3, 3, 3, 3), axis=1, ), - BCTestParameters( - BiasCorrectionAlgos.BIAS_CORRECTION, - BCStatsCollectors.MEAN, - TargetType.OPERATION_WITH_WEIGHTS, - (np.array([-0.36666664, -0.36666664, -0.36666664]), (3, 3, 3, 3)), + MeanBCTestParameters( + algo=BiasCorrectionAlgos.BIAS_CORRECTION, + target_type=TargetType.OPERATION_WITH_WEIGHTS, + ref_values=(np.array([-0.36666664, -0.36666664, -0.36666664])), + ref_shape=(3, 3, 3, 3), axis=1, ), - BCTestParameters( - BiasCorrectionAlgos.FAST_BIAS_CORRECTION, - BCStatsCollectors.MEAN, - TargetType.OPERATION_WITH_WEIGHTS, - (np.array([-1.1, 0.0, 0.0]), (3, 3, 3, 3)), + MeanBCTestParameters( + algo=BiasCorrectionAlgos.FAST_BIAS_CORRECTION, + target_type=TargetType.OPERATION_WITH_WEIGHTS, + ref_values=(np.array([-1.1, 0.0, 0.0])), + ref_shape=(3, 3, 3, 3), axis=2, ), - BCTestParameters( - BiasCorrectionAlgos.BIAS_CORRECTION, - BCStatsCollectors.MEAN, - TargetType.OPERATION_WITH_WEIGHTS, - (np.array([-1.1, 0.0, 0.0]), (3, 3, 3, 3)), + MeanBCTestParameters( + algo=BiasCorrectionAlgos.BIAS_CORRECTION, + target_type=TargetType.OPERATION_WITH_WEIGHTS, + ref_values=(np.array([-1.1, 0.0, 0.0])), + ref_shape=(3, 3, 3, 3), axis=2, ), ], ) def test_statistics_aggregator_bias_correction( - self, dataset_samples, test_params: BCTestParameters, inplace_statistics + self, dataset_samples, test_params: RawBCTestParameters, inplace_statistics ): name_to_algo_backend_map = { BiasCorrectionAlgos.BIAS_CORRECTION: self.get_bias_correction_algo_backend_cls, BiasCorrectionAlgos.FAST_BIAS_CORRECTION: self.get_fast_bias_correction_algo_backend_cls, } algo_backend = name_to_algo_backend_map[test_params.algo]() - if test_params.collector_type == BCStatsCollectors.MEAN: + if isinstance(test_params, self.MeanBCTestParameters): tensor_collector = algo_backend.mean_statistic_collector( test_params.axis, inplace_statistics, len(dataset_samples) ) - elif test_params.collector_type == BCStatsCollectors.RAW: + elif isinstance(test_params, self.RawBCTestParameters): tensor_collector = algo_backend.raw_statistic_collector(len(dataset_samples)) - else: - raise nncf.InvalidCollectorTypeError(f"Invalid collector type: {test_params.collector_type}") target_point = self.get_target_point(test_params.target_type) @@ -617,18 +616,19 @@ def filter_func(point): for tensor_collector in tensor_collectors: stat = tensor_collector.get_statistics() - if test_params.collector_type == BCStatsCollectors.MEAN: - ret_val = [stat.mean_values, stat.shape] - elif test_params.collector_type == BCStatsCollectors.RAW: - ret_val = stat.values - test_params.ref_values = dataset_samples + if isinstance(test_params, self.MeanBCTestParameters): + self._check_params_mean_collector(stat, test_params.ref_shape, test_params.ref_values) + elif isinstance(test_params, self.RawBCTestParameters): + self._check_params_raw_collector(stat, dataset_samples) else: - raise nncf.InvalidCollectorTypeError(f"Invalid collector type: {test_params.collector_type}") + assert False + + def _check_params_raw_collector(self, stat, ref_values): + assert np.allclose(stat.values, ref_values) - for val, ref in zip(ret_val, test_params.ref_values): - if isinstance(ref, np.ndarray): - assert ref.shape == val.shape - assert np.allclose(val, ref) + def _check_params_mean_collector(self, stat, ref_shape, ref_values): + assert ref_shape == stat.shape + assert np.allclose(stat.mean_values, ref_values) @classmethod def create_statistics_point( From 38d71b844c94ebe75d2d504956538d362cbe8024 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 6 Feb 2024 15:49:13 +0100 Subject: [PATCH 064/108] upd fbs method calculations --- .../common/tensor_statistics/collectors.py | 5 ++- .../statistical_functions.py | 30 ---------------- nncf/experimental/tensor/functions/numeric.py | 7 ++-- .../tensor/functions/numpy_numeric.py | 9 +++-- .../tensor/functions/torch_numeric.py | 6 ++-- nncf/openvino/graph/node_utils.py | 36 ++++++------------- nncf/openvino/statistics/collectors.py | 10 +++--- .../fast_bias_correction/algorithm.py | 19 ++++++++-- 8 files changed, 52 insertions(+), 70 deletions(-) delete mode 100644 nncf/experimental/common/tensor_statistics/statistical_functions.py diff --git a/nncf/experimental/common/tensor_statistics/collectors.py b/nncf/experimental/common/tensor_statistics/collectors.py index 1017877bf24..3db7bd7c5c9 100644 --- a/nncf/experimental/common/tensor_statistics/collectors.py +++ b/nncf/experimental/common/tensor_statistics/collectors.py @@ -660,8 +660,11 @@ def _aggregate_impl(self) -> NNCFTensor: # Case when all registered tensors have identical shape if all(self._container[0].shape == x.shape for x in self._container): stacked_value = self._tensor_processor.stack(self._container) + if self._keepdims: + aggregated = self._aggregation_fn(stacked_value, axis=self._aggregation_axes, keepdims=self._keepdims) + return self._tensor_processor.squeeze(aggregated, 0).tensor aggregated = self._aggregation_fn(stacked_value, axis=self._aggregation_axes, keepdims=self._keepdims) - return self._tensor_processor.squeeze(aggregated, 0).tensor + return aggregated.tensor online_axes = tuple(x - 1 for x in self._aggregation_axes if x > 0) # Case when some registered tensors have different shapes and diff --git a/nncf/experimental/common/tensor_statistics/statistical_functions.py b/nncf/experimental/common/tensor_statistics/statistical_functions.py deleted file mode 100644 index ea49c58ecd1..00000000000 --- a/nncf/experimental/common/tensor_statistics/statistical_functions.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from nncf.experimental.tensor import Tensor -from nncf.experimental.tensor.functions import numeric as fns - - -def mean_per_channel(x: Tensor, axis: int) -> Tensor: - """ - Computes the mean of elements across given channel dimension of Tensor. - - :param x: Tensor to reduce. - :param axis: The channel dimensions to reduce. - :return: Reduced Tensor. - """ - if len(x.shape) < 3: - return fns.mean(x, axis=0) - pos_axis = axis + x.ndim if axis < 0 else axis - if pos_axis < 0 or pos_axis >= x.ndim: - raise ValueError(f"axis {axis} is out of bounds for array of dimension {x.ndim}") - axis = tuple(i for i in range(x.ndim) if i != pos_axis) - return fns.mean(x, axis=axis) diff --git a/nncf/experimental/tensor/functions/numeric.py b/nncf/experimental/tensor/functions/numeric.py index 3dd2c0d8815..95547fa6bea 100644 --- a/nncf/experimental/tensor/functions/numeric.py +++ b/nncf/experimental/tensor/functions/numeric.py @@ -355,16 +355,19 @@ def moveaxis(a: Tensor, source: Union[int, Tuple[int, ...]], destination: Union[ @functools.singledispatch @tensor_guard -def mean(a: Tensor, axis: Optional[Union[int, Tuple[int, ...]]] = None, keepdims: bool = False) -> Tensor: +def mean( + a: Tensor, axis: Optional[Union[int, Tuple[int, ...]]] = None, keepdims: bool = False, dtype: TensorDataType = None +) -> Tensor: """ Compute the arithmetic mean along the specified axis. :param a: Array containing numbers whose mean is desired. :param axis: Axis or axes along which the means are computed. :param keepdims: Destination positions for each of the original axes. These must also be unique. + :param dtype: Type to use in computing the mean. :return: Array with moved axes. """ - return Tensor(mean(a.data, axis, keepdims)) + return Tensor(mean(a.data, axis, keepdims, dtype)) @functools.singledispatch diff --git a/nncf/experimental/tensor/functions/numpy_numeric.py b/nncf/experimental/tensor/functions/numpy_numeric.py index 3aef3df1e5f..8dd7e640046 100644 --- a/nncf/experimental/tensor/functions/numpy_numeric.py +++ b/nncf/experimental/tensor/functions/numpy_numeric.py @@ -170,8 +170,13 @@ def _(a: np.ndarray, source: Union[int, Tuple[int, ...]], destination: Union[int @register_numpy_types(numeric.mean) -def _(a: Union[np.ndarray, np.generic], axis: Union[int, Tuple[int, ...]] = None, keepdims: bool = False) -> np.ndarray: - return np.array(np.mean(a, axis=axis, keepdims=keepdims)) +def _( + a: Union[np.ndarray, np.generic], + axis: Union[int, Tuple[int, ...]] = None, + keepdims: bool = False, + dtype: Optional[TensorDataType] = None, +) -> np.ndarray: + return np.array(np.mean(a, axis=axis, keepdims=keepdims, dtype=DTYPE_MAP[dtype])) @register_numpy_types(numeric.round) diff --git a/nncf/experimental/tensor/functions/torch_numeric.py b/nncf/experimental/tensor/functions/torch_numeric.py index 781e1ce49e8..797a4579ee4 100644 --- a/nncf/experimental/tensor/functions/torch_numeric.py +++ b/nncf/experimental/tensor/functions/torch_numeric.py @@ -183,8 +183,10 @@ def _(a: torch.Tensor, source: Union[int, Tuple[int, ...]], destination: Union[i @numeric.mean.register(torch.Tensor) -def _(a: torch.Tensor, axis: Union[int, Tuple[int, ...]] = None, keepdims: bool = False) -> torch.Tensor: - return torch.mean(a, dim=axis, keepdim=keepdims) +def _( + a: torch.Tensor, axis: Union[int, Tuple[int, ...]] = None, keepdims: bool = False, dtype: TensorDataType = None +) -> torch.Tensor: + return torch.mean(a, dim=axis, keepdim=keepdims, dtype=DTYPE_MAP[dtype]) @numeric.round.register(torch.Tensor) diff --git a/nncf/openvino/graph/node_utils.py b/nncf/openvino/graph/node_utils.py index 88fafc899b5..4013c365796 100644 --- a/nncf/openvino/graph/node_utils.py +++ b/nncf/openvino/graph/node_utils.py @@ -22,6 +22,7 @@ from nncf.common.graph.layer_attributes import GenericWeightedLayerAttributes from nncf.common.graph.layer_attributes import LinearLayerAttributes from nncf.common.graph.layer_attributes import WeightedLayerAttributes +from nncf.common.graph.utils import get_reduction_axes from nncf.common.tensor_statistics.collectors import ReductionAxes from nncf.openvino.graph.layout import OVLayoutElem from nncf.openvino.graph.layout import get_conv_weights_layout @@ -276,38 +277,21 @@ def get_inplace_mean_per_ch(axis: int) -> InplaceInsertionFnType: def get_reduce_op(node: ov.Node, output_port_id: int, output_node_name: str) -> ov.Node: input_shape = get_partial_shape_safe(node, output_port_id) input_shape = [dim.get_length() if dim.is_static else -1 for dim in input_shape] + reduction_axes = get_reduction_axes( + ( + 0, + axis, + ), + input_shape, + ) if len(input_shape) < 3: return opset.reduce_mean( node.output(output_port_id), - reduction_axes=0, - keep_dims=False, name=output_node_name, ) - - ch_dim = 1 - if axis != ch_dim: - transpose_dims = list(range(len(input_shape))) - transpose_dims[axis], transpose_dims[ch_dim] = transpose_dims[ch_dim], transpose_dims[axis] - transposed_shape = [input_shape[dim] for dim in transpose_dims] - - reshape_input_node = opset.transpose(node.output(output_port_id), transpose_dims) - output_port_id = 0 - else: - reshape_input_node = node - transposed_shape = input_shape - - keeped_dims = transposed_shape[:2] - keeped_dims = [0 if dim < 0 else dim for dim in keeped_dims] - squized_dims = -1 if -1 in transposed_shape[2:] else np.prod(transposed_shape[2:]) - reshape_op = opset.reshape( - reshape_input_node.output(output_port_id), - output_shape=np.array((keeped_dims[0], keeped_dims[1], squized_dims)), - special_zero=True, - ) return opset.reduce_mean( - reshape_op, - reduction_axes=np.array((0, 2)), - keep_dims=False, + node.output(output_port_id), + reduction_axes=reduction_axes, name=output_node_name, ) diff --git a/nncf/openvino/statistics/collectors.py b/nncf/openvino/statistics/collectors.py index beaac8d2cba..6940957e270 100644 --- a/nncf/openvino/statistics/collectors.py +++ b/nncf/openvino/statistics/collectors.py @@ -13,6 +13,7 @@ import numpy as np +from nncf.common.graph.utils import get_reduction_axes from nncf.common.tensor import NNCFTensor from nncf.common.tensor import TensorElementsType from nncf.common.tensor_statistics.collectors import NNCFCollectorTensorProcessor @@ -110,10 +111,9 @@ def masked_median( @staticmethod def mean_per_channel(x: NNCFTensor, axis: int) -> NNCFTensor: if len(x.shape) < 3: - return OVNNCFTensor(np.mean(x.tensor, axis=0)) - x = np.moveaxis(x.tensor, axis, 1) - t = x.reshape(x.shape[0], x.shape[1], -1) - return OVNNCFTensor(np.mean(t, axis=(0, 2))) + return OVNNCFTensor(x) + red_axes = get_reduction_axes((0, axis), x.shape) + return OVNNCFTensor(np.mean(x.tensor, axis=red_axes)) @staticmethod def transpose(x: NNCFTensor, axes: Tuple[int, ...]) -> NNCFTensor: @@ -276,8 +276,10 @@ def get_mean_statistic_collector( "tensor_processor": OVNNCFCollectorTensorProcessor, "num_samples": num_samples, "window_size": window_size, + "aggregation_axes": (0, 1), } aggregate_mean = MeanAggregator(**kwargs) + aggregate_mean._keepdims = False aggregate_shape = ShapeAggregator() collector = TensorCollector(OVMeanTensorStatistic) diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py index 0ba1e2b1524..ca277753f0c 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py @@ -21,14 +21,15 @@ from nncf.common.graph.transformations.commands import TargetPoint from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout +from nncf.common.graph.utils import get_reduction_axes from nncf.common.logging import nncf_logger from nncf.common.logging.track_progress import track from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType from nncf.common.utils.backend import get_backend -from nncf.experimental.common.tensor_statistics.statistical_functions import mean_per_channel from nncf.experimental.tensor import Tensor +from nncf.experimental.tensor import TensorDataType from nncf.experimental.tensor import functions as fns from nncf.quantization.algorithms.algorithm import Algorithm @@ -305,7 +306,10 @@ def _get_bias_shift( output_name: str, ) -> TTensor: """ - Calculates updated bias. + Calculates updated bias: + 1) Infers the quantized submodel using as input created blob. + 2) Calculates the mean of quantized output by channel axis. + 3) Calculates the difference between float output and the tensor from step 2. :param engine: Backend-specific engine instance for the model execution. :param model: Backend-specific sub-model for the execution. @@ -318,7 +322,16 @@ def _get_bias_shift( engine = EngineFactory.create(model) raw_output = engine.infer(input_blob) q_outputs = self._backend_entity.process_model_output(raw_output, output_name) - q_outputs = mean_per_channel(q_outputs, channel_axis) + if len(q_outputs.shape) < 3: + return fns.mean(q_outputs, axis=0) + + reduction_axes = get_reduction_axes( + (channel_axis,), + q_outputs.shape, + ) + q_outputs = fns.mean( + q_outputs, axis=reduction_axes, dtype=TensorDataType.float64 + ) # Use float64 to vanish issues with computing sum for float32. bias_shift = fns.stack(output_fp) - q_outputs return bias_shift From c49036225c77dd000fc4befdfb9eb9bd8b88bd32 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 7 Feb 2024 17:43:18 +0100 Subject: [PATCH 065/108] revert changes with statistics collection --- .../statistical_functions.py | 32 +++++++++++++++++ .../tensor/functions/numpy_numeric.py | 3 +- .../tensor/functions/torch_numeric.py | 8 +++-- nncf/onnx/statistics/collectors.py | 1 - nncf/openvino/graph/node_utils.py | 36 +++++++++++++------ nncf/openvino/statistics/collectors.py | 12 +++---- .../fast_bias_correction/algorithm.py | 14 +++----- 7 files changed, 76 insertions(+), 30 deletions(-) create mode 100644 nncf/experimental/common/tensor_statistics/statistical_functions.py diff --git a/nncf/experimental/common/tensor_statistics/statistical_functions.py b/nncf/experimental/common/tensor_statistics/statistical_functions.py new file mode 100644 index 00000000000..3dcf978259f --- /dev/null +++ b/nncf/experimental/common/tensor_statistics/statistical_functions.py @@ -0,0 +1,32 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from nncf.experimental.tensor import Tensor +from nncf.experimental.tensor import TensorDataType +from nncf.experimental.tensor.functions import numeric as fns + + +def mean_per_channel(x: Tensor, axis: int, dtype: TensorDataType) -> Tensor: + """ + Computes the mean of elements across given channel dimension of Tensor. + + :param x: Tensor to reduce. + :param axis: The channel dimensions to reduce. + :param dtype: Type to use in computing the mean. + :return: Reduced Tensor. + """ + if len(x.shape) < 3: + return fns.mean(x, axis=0) + pos_axis = axis + x.ndim if axis < 0 else axis + if pos_axis < 0 or pos_axis >= x.ndim: + raise ValueError(f"axis {axis} is out of bounds for array of dimension {x.ndim}") + axis = tuple(i for i in range(x.ndim) if i != pos_axis) + return fns.mean(x, axis=axis, dtype=dtype) diff --git a/nncf/experimental/tensor/functions/numpy_numeric.py b/nncf/experimental/tensor/functions/numpy_numeric.py index 8dd7e640046..a975c58072f 100644 --- a/nncf/experimental/tensor/functions/numpy_numeric.py +++ b/nncf/experimental/tensor/functions/numpy_numeric.py @@ -176,7 +176,8 @@ def _( keepdims: bool = False, dtype: Optional[TensorDataType] = None, ) -> np.ndarray: - return np.array(np.mean(a, axis=axis, keepdims=keepdims, dtype=DTYPE_MAP[dtype])) + dtype = DTYPE_MAP[dtype] if dtype else None + return np.array(np.mean(a, axis=axis, keepdims=keepdims, dtype=dtype)) @register_numpy_types(numeric.round) diff --git a/nncf/experimental/tensor/functions/torch_numeric.py b/nncf/experimental/tensor/functions/torch_numeric.py index 797a4579ee4..5b04a5a5aa2 100644 --- a/nncf/experimental/tensor/functions/torch_numeric.py +++ b/nncf/experimental/tensor/functions/torch_numeric.py @@ -184,9 +184,13 @@ def _(a: torch.Tensor, source: Union[int, Tuple[int, ...]], destination: Union[i @numeric.mean.register(torch.Tensor) def _( - a: torch.Tensor, axis: Union[int, Tuple[int, ...]] = None, keepdims: bool = False, dtype: TensorDataType = None + a: torch.Tensor, + axis: Union[int, Tuple[int, ...]] = None, + keepdims: bool = False, + dtype: Optional[TensorDataType] = None, ) -> torch.Tensor: - return torch.mean(a, dim=axis, keepdim=keepdims, dtype=DTYPE_MAP[dtype]) + dtype = DTYPE_MAP[dtype] if dtype else None + return torch.mean(a, dim=axis, keepdim=keepdims, dtype=dtype) @numeric.round.register(torch.Tensor) diff --git a/nncf/onnx/statistics/collectors.py b/nncf/onnx/statistics/collectors.py index c7ea4fbf471..4976594dc4b 100644 --- a/nncf/onnx/statistics/collectors.py +++ b/nncf/onnx/statistics/collectors.py @@ -242,7 +242,6 @@ def get_mean_statistic_collector( :param inplace: Whether the mean reducer should be calculated inplace or out of place. :return: Mean statistic collector. """ - inplace = False if channel_axis == 0: reducer = ONNXBatchMeanReducer(inplace) else: diff --git a/nncf/openvino/graph/node_utils.py b/nncf/openvino/graph/node_utils.py index 4013c365796..88fafc899b5 100644 --- a/nncf/openvino/graph/node_utils.py +++ b/nncf/openvino/graph/node_utils.py @@ -22,7 +22,6 @@ from nncf.common.graph.layer_attributes import GenericWeightedLayerAttributes from nncf.common.graph.layer_attributes import LinearLayerAttributes from nncf.common.graph.layer_attributes import WeightedLayerAttributes -from nncf.common.graph.utils import get_reduction_axes from nncf.common.tensor_statistics.collectors import ReductionAxes from nncf.openvino.graph.layout import OVLayoutElem from nncf.openvino.graph.layout import get_conv_weights_layout @@ -277,21 +276,38 @@ def get_inplace_mean_per_ch(axis: int) -> InplaceInsertionFnType: def get_reduce_op(node: ov.Node, output_port_id: int, output_node_name: str) -> ov.Node: input_shape = get_partial_shape_safe(node, output_port_id) input_shape = [dim.get_length() if dim.is_static else -1 for dim in input_shape] - reduction_axes = get_reduction_axes( - ( - 0, - axis, - ), - input_shape, - ) if len(input_shape) < 3: return opset.reduce_mean( node.output(output_port_id), + reduction_axes=0, + keep_dims=False, name=output_node_name, ) + + ch_dim = 1 + if axis != ch_dim: + transpose_dims = list(range(len(input_shape))) + transpose_dims[axis], transpose_dims[ch_dim] = transpose_dims[ch_dim], transpose_dims[axis] + transposed_shape = [input_shape[dim] for dim in transpose_dims] + + reshape_input_node = opset.transpose(node.output(output_port_id), transpose_dims) + output_port_id = 0 + else: + reshape_input_node = node + transposed_shape = input_shape + + keeped_dims = transposed_shape[:2] + keeped_dims = [0 if dim < 0 else dim for dim in keeped_dims] + squized_dims = -1 if -1 in transposed_shape[2:] else np.prod(transposed_shape[2:]) + reshape_op = opset.reshape( + reshape_input_node.output(output_port_id), + output_shape=np.array((keeped_dims[0], keeped_dims[1], squized_dims)), + special_zero=True, + ) return opset.reduce_mean( - node.output(output_port_id), - reduction_axes=reduction_axes, + reshape_op, + reduction_axes=np.array((0, 2)), + keep_dims=False, name=output_node_name, ) diff --git a/nncf/openvino/statistics/collectors.py b/nncf/openvino/statistics/collectors.py index 6940957e270..f546d82f4a5 100644 --- a/nncf/openvino/statistics/collectors.py +++ b/nncf/openvino/statistics/collectors.py @@ -13,7 +13,6 @@ import numpy as np -from nncf.common.graph.utils import get_reduction_axes from nncf.common.tensor import NNCFTensor from nncf.common.tensor import TensorElementsType from nncf.common.tensor_statistics.collectors import NNCFCollectorTensorProcessor @@ -111,9 +110,10 @@ def masked_median( @staticmethod def mean_per_channel(x: NNCFTensor, axis: int) -> NNCFTensor: if len(x.shape) < 3: - return OVNNCFTensor(x) - red_axes = get_reduction_axes((0, axis), x.shape) - return OVNNCFTensor(np.mean(x.tensor, axis=red_axes)) + return OVNNCFTensor(np.mean(x.tensor, axis=0)) + x = np.moveaxis(x.tensor, axis, 1) + t = x.reshape(x.shape[0], x.shape[1], -1) + return OVNNCFTensor(np.mean(t, axis=(0, 2))) @staticmethod def transpose(x: NNCFTensor, axes: Tuple[int, ...]) -> NNCFTensor: @@ -276,10 +276,10 @@ def get_mean_statistic_collector( "tensor_processor": OVNNCFCollectorTensorProcessor, "num_samples": num_samples, "window_size": window_size, - "aggregation_axes": (0, 1), + # "aggregation_axes": (0,), } aggregate_mean = MeanAggregator(**kwargs) - aggregate_mean._keepdims = False + # aggregate_mean._keepdims = True aggregate_shape = ShapeAggregator() collector = TensorCollector(OVMeanTensorStatistic) diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py index ca277753f0c..ebfe5ef660f 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py @@ -21,13 +21,13 @@ from nncf.common.graph.transformations.commands import TargetPoint from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout -from nncf.common.graph.utils import get_reduction_axes from nncf.common.logging import nncf_logger from nncf.common.logging.track_progress import track from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType from nncf.common.utils.backend import get_backend +from nncf.experimental.common.tensor_statistics.statistical_functions import mean_per_channel from nncf.experimental.tensor import Tensor from nncf.experimental.tensor import TensorDataType from nncf.experimental.tensor import functions as fns @@ -322,16 +322,10 @@ def _get_bias_shift( engine = EngineFactory.create(model) raw_output = engine.infer(input_blob) q_outputs = self._backend_entity.process_model_output(raw_output, output_name) - if len(q_outputs.shape) < 3: - return fns.mean(q_outputs, axis=0) - - reduction_axes = get_reduction_axes( - (channel_axis,), - q_outputs.shape, - ) - q_outputs = fns.mean( - q_outputs, axis=reduction_axes, dtype=TensorDataType.float64 + q_outputs = mean_per_channel( + q_outputs, channel_axis, TensorDataType.float64 ) # Use float64 to vanish issues with computing sum for float32. + bias_shift = fns.stack(output_fp) - q_outputs return bias_shift From b778c0c1dece06dcbef5ee1ecf3531107cc2311a Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 13 Feb 2024 15:52:36 +0100 Subject: [PATCH 066/108] updates aggregators, reducers for BC and FBC --- .../common/tensor_statistics/collectors.py | 8 ----- .../statistical_functions.py | 7 +++-- nncf/onnx/statistics/collectors.py | 17 ++++------- nncf/openvino/graph/node_utils.py | 11 +------ nncf/openvino/statistics/collectors.py | 24 ++++----------- .../algorithms/bias_correction/algorithm.py | 25 +++++++++++++--- .../bias_correction/onnx_backend.py | 1 + .../fast_bias_correction/algorithm.py | 18 ++++------- .../fast_bias_correction/onnx_backend.py | 16 ++++++---- .../fast_bias_correction/openvino_backend.py | 15 ++++++---- .../fast_bias_correction/torch_backend.py | 12 +++++--- nncf/torch/tensor_statistics/collectors.py | 16 ++++------ .../test_reducers_and_aggregators.py | 30 ++++++++----------- tests/common/test_statistics_aggregator.py | 9 ++++-- .../test_reducers_and_aggregators.py | 2 -- .../test_reducers_and_aggregators.py | 2 -- .../openvino/native/test_model_transformer.py | 4 +-- .../native/test_statistics_aggregator.py | 3 +- .../ptq/test_reducers_and_aggregators.py | 2 -- 19 files changed, 100 insertions(+), 122 deletions(-) diff --git a/nncf/experimental/common/tensor_statistics/collectors.py b/nncf/experimental/common/tensor_statistics/collectors.py index 3db7bd7c5c9..7e7ededfc62 100644 --- a/nncf/experimental/common/tensor_statistics/collectors.py +++ b/nncf/experimental/common/tensor_statistics/collectors.py @@ -555,14 +555,6 @@ def _reduce_out_of_place(self, x: List[NNCFTensor]) -> List[NNCFTensor]: return self._tensor_processor.quantile(x, self._quantile, reduction_axes, keepdims=self._keepdims) -class BatchMeanReducer(TensorReducerBase): - def __init__(self, inplace: bool = False): - super().__init__(None, inplace) - - def _reduce_out_of_place(self, x: List[NNCFTensor]) -> List[NNCFTensor]: - return [self._tensor_processor.batch_mean(x[0])] - - class MeanPerChReducer(TensorReducerBase): def __init__(self, channel_axis: int = 1, inplace: bool = False): super().__init__(inplace=inplace) diff --git a/nncf/experimental/common/tensor_statistics/statistical_functions.py b/nncf/experimental/common/tensor_statistics/statistical_functions.py index 3dcf978259f..1253dfb8279 100644 --- a/nncf/experimental/common/tensor_statistics/statistical_functions.py +++ b/nncf/experimental/common/tensor_statistics/statistical_functions.py @@ -9,12 +9,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Optional + from nncf.experimental.tensor import Tensor from nncf.experimental.tensor import TensorDataType from nncf.experimental.tensor.functions import numeric as fns -def mean_per_channel(x: Tensor, axis: int, dtype: TensorDataType) -> Tensor: +def mean_per_channel(x: Tensor, axis: int, dtype: Optional[TensorDataType] = None) -> Tensor: """ Computes the mean of elements across given channel dimension of Tensor. @@ -24,7 +26,8 @@ def mean_per_channel(x: Tensor, axis: int, dtype: TensorDataType) -> Tensor: :return: Reduced Tensor. """ if len(x.shape) < 3: - return fns.mean(x, axis=0) + return fns.mean(x, axis=0, dtype=dtype) + pos_axis = axis + x.ndim if axis < 0 else axis if pos_axis < 0 or pos_axis >= x.ndim: raise ValueError(f"axis {axis} is out of bounds for array of dimension {x.ndim}") diff --git a/nncf/onnx/statistics/collectors.py b/nncf/onnx/statistics/collectors.py index 4976594dc4b..3b91f46aeab 100644 --- a/nncf/onnx/statistics/collectors.py +++ b/nncf/onnx/statistics/collectors.py @@ -17,7 +17,6 @@ from nncf.common.tensor_statistics.collectors import NNCFCollectorTensorProcessor from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer from nncf.experimental.common.tensor_statistics.collectors import AbsQuantileReducer -from nncf.experimental.common.tensor_statistics.collectors import BatchMeanReducer from nncf.experimental.common.tensor_statistics.collectors import MaxReducer from nncf.experimental.common.tensor_statistics.collectors import MeanAggregator from nncf.experimental.common.tensor_statistics.collectors import MeanPerChReducer @@ -159,7 +158,7 @@ def mean_per_channel(x: ONNXNNCFTensor, axis: int) -> ONNXNNCFTensor: return ONNXNNCFTensor(np.mean(x.tensor, axis=0)) x = np.moveaxis(x.tensor, axis, 1) t = x.reshape(x.shape[0], x.shape[1], -1) - return ONNXNNCFTensor(np.mean(t, axis=(0, 2))) + return ONNXNNCFTensor(np.mean(t, axis=(2,))) @staticmethod def transpose(x: ONNXNNCFTensor, axes: Tuple[int, ...]) -> ONNXNNCFTensor: @@ -221,10 +220,6 @@ class ONNXAbsQuantileReducer(ONNXBasicReducer, AbsQuantileReducer): pass -class ONNXBatchMeanReducer(ONNXBasicReducer, BatchMeanReducer): - pass - - class ONNXMeanPerChanelReducer(ONNXBasicReducer, MeanPerChReducer): pass @@ -242,16 +237,16 @@ def get_mean_statistic_collector( :param inplace: Whether the mean reducer should be calculated inplace or out of place. :return: Mean statistic collector. """ - if channel_axis == 0: - reducer = ONNXBatchMeanReducer(inplace) - else: - reducer = ONNXMeanPerChanelReducer(channel_axis=channel_axis, inplace=inplace) + reducer = ONNXMeanPerChanelReducer(channel_axis=channel_axis, inplace=inplace) noop_reducer = NoopReducer() - + aggregation_axes = ( + (0,) if channel_axis == -1 else (0, 1) + ) # Assume that batch is on 0-axis for Convolutions and No batch axis for MatMul kwargs = { "tensor_processor": ONNXNNCFCollectorTensorProcessor, "num_samples": num_samples, "window_size": window_size, + "aggregation_axes": aggregation_axes, } aggregate_mean = MeanAggregator(**kwargs) diff --git a/nncf/openvino/graph/node_utils.py b/nncf/openvino/graph/node_utils.py index 88fafc899b5..bad3c3dae06 100644 --- a/nncf/openvino/graph/node_utils.py +++ b/nncf/openvino/graph/node_utils.py @@ -255,15 +255,6 @@ def get_inplace_mean_op(reduction_axes: Optional[ReductionAxes]) -> InplaceInser return get_inplace_reduce_op(opset.reduce_mean, reduction_axes, False) -def get_inplace_batch_mean_op() -> InplaceInsertionFnType: - """ - Returns inplace batch mean function that adds reduce batch mean node to a passed node. - - :returns: Inplace insertion function to use in ModelTransformer. - """ - return get_inplace_reduce_op(opset.reduce_mean, np.array(0), False) - - def get_inplace_mean_per_ch(axis: int) -> InplaceInsertionFnType: """ Returns inplace mean per channel function that adds reduce mean per channel node @@ -306,7 +297,7 @@ def get_reduce_op(node: ov.Node, output_port_id: int, output_node_name: str) -> ) return opset.reduce_mean( reshape_op, - reduction_axes=np.array((0, 2)), + reduction_axes=np.array((2)), keep_dims=False, name=output_node_name, ) diff --git a/nncf/openvino/statistics/collectors.py b/nncf/openvino/statistics/collectors.py index f546d82f4a5..8ac7a94ca77 100644 --- a/nncf/openvino/statistics/collectors.py +++ b/nncf/openvino/statistics/collectors.py @@ -18,7 +18,6 @@ from nncf.common.tensor_statistics.collectors import NNCFCollectorTensorProcessor from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer from nncf.experimental.common.tensor_statistics.collectors import AbsQuantileReducer -from nncf.experimental.common.tensor_statistics.collectors import BatchMeanReducer from nncf.experimental.common.tensor_statistics.collectors import InplaceInsertionFNType from nncf.experimental.common.tensor_statistics.collectors import MaxReducer from nncf.experimental.common.tensor_statistics.collectors import MeanAggregator @@ -31,7 +30,6 @@ from nncf.experimental.common.tensor_statistics.collectors import RawReducer from nncf.experimental.common.tensor_statistics.collectors import ShapeAggregator from nncf.experimental.common.tensor_statistics.collectors import TensorCollector -from nncf.openvino.graph.node_utils import get_inplace_batch_mean_op from nncf.openvino.graph.node_utils import get_inplace_max_op from nncf.openvino.graph.node_utils import get_inplace_mean_op from nncf.openvino.graph.node_utils import get_inplace_mean_per_ch @@ -113,7 +111,7 @@ def mean_per_channel(x: NNCFTensor, axis: int) -> NNCFTensor: return OVNNCFTensor(np.mean(x.tensor, axis=0)) x = np.moveaxis(x.tensor, axis, 1) t = x.reshape(x.shape[0], x.shape[1], -1) - return OVNNCFTensor(np.mean(t, axis=(0, 2))) + return OVNNCFTensor(np.mean(t, axis=(2,))) @staticmethod def transpose(x: NNCFTensor, axes: Tuple[int, ...]) -> NNCFTensor: @@ -221,14 +219,6 @@ def get_inplace_fn(self): return get_inplace_mean_op(self._reduction_axes) -class OVBatchMeanReducer(BatchMeanReducer): - def _get_processor(self): - return OVNNCFCollectorTensorProcessor - - def get_inplace_fn(self): - return get_inplace_batch_mean_op() - - class OVMeanPerChanelReducer(MeanPerChReducer): def _get_processor(self): return OVNNCFCollectorTensorProcessor @@ -266,20 +256,18 @@ def get_mean_statistic_collector( :param inplace: Whether the mean reducer should be calculated inplace or out of place. :return: Mean statistic collector. """ - if channel_axis == 0: - reducer = OVBatchMeanReducer(inplace) - else: - reducer = OVMeanPerChanelReducer(channel_axis=channel_axis, inplace=inplace) + reducer = OVMeanPerChanelReducer(channel_axis=channel_axis, inplace=inplace) noop_reducer = NoopReducer() - + aggregation_axes = ( + (0,) if channel_axis == -1 else (0, 1) + ) # Assume that batch is on 0-axis for Convolutions and No batch axis for MatMul kwargs = { "tensor_processor": OVNNCFCollectorTensorProcessor, "num_samples": num_samples, "window_size": window_size, - # "aggregation_axes": (0,), + "aggregation_axes": aggregation_axes, } aggregate_mean = MeanAggregator(**kwargs) - # aggregate_mean._keepdims = True aggregate_shape = ShapeAggregator() collector = TensorCollector(OVMeanTensorStatistic) diff --git a/nncf/quantization/algorithms/bias_correction/algorithm.py b/nncf/quantization/algorithms/bias_correction/algorithm.py index a45e807ec35..f4d4bffdd36 100644 --- a/nncf/quantization/algorithms/bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/bias_correction/algorithm.py @@ -42,7 +42,6 @@ class BiasCorrection(Algorithm): - """ Post-training BiasCorrection algorithm implementation @@ -180,6 +179,7 @@ def apply( axes = [i for i in range(current_bias.ndim) if i != channel_axis] bias_shift = np.expand_dims(bias_shift, axes) + bias_shift = self._reshape_bias_shift(bias_shift, current_bias, channel_axis) updated_bias = current_bias + bias_shift magnitude = self._get_bias_shift_magnitude(current_bias, updated_bias) @@ -204,6 +204,24 @@ def apply( return main_model_transformer.transform(main_transformations_layout) + @staticmethod + def _reshape_bias_shift(bias_shift, bias_value, channel_axis: int): + """ + Reshape bias_shift tensor in case of dimensions of bias_value is more then 1. + + :param bias_shift: Bias shift tensor. + :param bias_value: Bias value tensor. + :param channel_axis: Axis to update bias. + + :return TTensor: Updated bias_shift. + """ + bias_shift = bias_shift.squeeze() + if bias_value.ndim > 1: + new_shape = [1] * bias_value.ndim + new_shape[channel_axis] = bias_shift.shape[0] + bias_shift = bias_shift.reshape(new_shape) + return bias_shift + def _is_node_correctable(self, node: NNCFNode, nncf_graph: NNCFGraph) -> bool: """ Verify if node bias can be corrected or not. @@ -476,11 +494,10 @@ def input_filter_func(point): if input_id in self._fp_inputs: return self._fp_inputs[input_id] - input_fp = [] for tensor_collector in statistic_points.get_algo_statistics_for_node( node_name, input_filter_func, self._algorithm_key ): - input_fp.extend(tensor_collector.get_statistics().values) + input_fp = tensor_collector.get_statistics().values self._fp_inputs[input_id] = input_fp return self._fp_inputs[input_id] @@ -503,7 +520,7 @@ def output_filter_func(point): for tensor_collector in statistic_points.get_algo_statistics_for_node( node_name, output_filter_func, self._algorithm_key ): - output_fp.extend(tensor_collector.get_statistics().mean_values) + output_fp = tensor_collector.get_statistics().mean_values return np.array(output_fp) def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: diff --git a/nncf/quantization/algorithms/bias_correction/onnx_backend.py b/nncf/quantization/algorithms/bias_correction/onnx_backend.py index 3e1b7f0b906..7b33eda21ac 100644 --- a/nncf/quantization/algorithms/bias_correction/onnx_backend.py +++ b/nncf/quantization/algorithms/bias_correction/onnx_backend.py @@ -71,6 +71,7 @@ def mean_statistic_collector( num_samples: Optional[int] = None, window_size: Optional[int] = None, ) -> TensorCollector: + inplace = False return get_mean_statistic_collector(num_samples, channel_axis, window_size, inplace) @staticmethod diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py index ebfe5ef660f..7d5233cf320 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py @@ -29,7 +29,6 @@ from nncf.common.utils.backend import get_backend from nncf.experimental.common.tensor_statistics.statistical_functions import mean_per_channel from nncf.experimental.tensor import Tensor -from nncf.experimental.tensor import TensorDataType from nncf.experimental.tensor import functions as fns from nncf.quantization.algorithms.algorithm import Algorithm @@ -215,6 +214,7 @@ def _reshape_bias_shift(bias_shift: Tensor, bias_value: Tensor, channel_axis: in :return TTensor: Updated bias_shift. """ + bias_shift = bias_shift.squeeze() if bias_value.ndim > 1: new_shape = [1] * bias_value.ndim new_shape[channel_axis] = bias_shift.shape[0] @@ -236,14 +236,12 @@ def input_filter_func(point): TargetType.OPERATOR_PRE_HOOK, ] - input_fp = [] - input_shape = [] for tensor_collector in statistic_points.get_algo_statistics_for_node( node_name, input_filter_func, self._algorithm_key ): statistics = tensor_collector.get_statistics() - input_fp.extend(Tensor(statistics.mean_values)) - input_shape.extend(statistics.shape) + input_fp = Tensor(statistics.mean_values) + input_shape = statistics.shape return input_fp, input_shape def _get_fp_outputs(self, statistic_points: StatisticPointsContainer, node_name: str) -> List[TTensor]: @@ -261,11 +259,10 @@ def output_filter_func(point): TargetType.OPERATOR_POST_HOOK, ] - output_fp = [] for tensor_collector in statistic_points.get_algo_statistics_for_node( node_name, output_filter_func, self._algorithm_key ): - output_fp.extend(Tensor(tensor_collector.get_statistics().mean_values)) + output_fp = Tensor(tensor_collector.get_statistics().mean_values) return output_fp def _extract_submodel(self, model_transformer: ModelTransformer, node_name: str) -> TModel: @@ -322,11 +319,8 @@ def _get_bias_shift( engine = EngineFactory.create(model) raw_output = engine.infer(input_blob) q_outputs = self._backend_entity.process_model_output(raw_output, output_name) - q_outputs = mean_per_channel( - q_outputs, channel_axis, TensorDataType.float64 - ) # Use float64 to vanish issues with computing sum for float32. - - bias_shift = fns.stack(output_fp) - q_outputs + q_outputs = mean_per_channel(q_outputs, channel_axis) + bias_shift = output_fp - q_outputs return bias_shift def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: diff --git a/nncf/quantization/algorithms/fast_bias_correction/onnx_backend.py b/nncf/quantization/algorithms/fast_bias_correction/onnx_backend.py index f78fac3eb9a..d954949432b 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/onnx_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/onnx_backend.py @@ -54,6 +54,7 @@ def mean_statistic_collector( num_samples: Optional[int] = None, window_size: Optional[int] = None, ) -> TensorCollector: + inplace = False return get_mean_statistic_collector(num_samples, channel_axis, window_size, inplace) @staticmethod @@ -61,13 +62,16 @@ def get_sub_input_output_names(subgraph: onnx.ModelProto) -> Tuple[str, str]: return subgraph.graph.input[0].name, subgraph.graph.output[0].name @staticmethod - def create_input_data( - shape: Tuple[int], data: List[Tensor], input_name: str, channel_axis: int - ) -> Dict[str, np.array]: + def create_input_data(shape: Tuple[int], data: Tensor, input_name: str, channel_axis: int) -> Dict[str, np.ndarray]: blob = np.zeros(shape, dtype=data[0].data.dtype) - for j, idx in enumerate(np.ndindex(blob.shape[channel_axis])): - index = tuple(slice(None) if i != channel_axis else idx for i in range(blob.ndim)) - blob[index] = data[j].data + blob = np.moveaxis(blob, channel_axis, 0) + data = data.squeeze() + if data.size == 1: + blob[0] = data.item() + else: + for i, tensor in enumerate(data): + blob[i] = tensor.data + blob = np.moveaxis(blob, 0, channel_axis) input_data = {input_name: blob} return input_data diff --git a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py index 58cc3f04ff2..2bf01b3b6f6 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py @@ -61,13 +61,16 @@ def get_sub_input_output_names(subgraph: ov.Model) -> Tuple[str, str]: return subgraph.inputs[0].get_any_name(), subgraph.outputs[0].get_any_name() @staticmethod - def create_input_data( - shape: Tuple[int], data: List[Tensor], input_name: str, channel_axis: int - ) -> Dict[str, np.ndarray]: + def create_input_data(shape: Tuple[int], data: Tensor, input_name: str, channel_axis: int) -> Dict[str, np.ndarray]: blob = np.zeros(shape, dtype=data[0].data.dtype) - for j, idx in enumerate(np.ndindex(blob.shape[channel_axis])): - index = tuple(slice(None) if i != channel_axis else idx for i in range(blob.ndim)) - blob[index] = data[j].data + blob = np.moveaxis(blob, channel_axis, 0) + data = data.squeeze() + if data.size == 1: + blob[0] = data.item() + else: + for i, tensor in enumerate(data): + blob[i] = tensor.data + blob = np.moveaxis(blob, 0, channel_axis) input_data = {input_name: blob} return input_data diff --git a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py index 7623fdb0012..372541d3286 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py @@ -11,7 +11,6 @@ from typing import Dict, List, Optional, Tuple -import numpy as np import torch from nncf.common.graph import NNCFGraph @@ -76,9 +75,14 @@ def get_sub_input_output_names(subgraph: NNCFNetwork) -> Tuple[str, str]: @staticmethod def create_input_data(shape: Tuple[int], data: List[Tensor], input_name: str, channel_axis: int) -> torch.Tensor: blob = torch.zeros(shape, dtype=data[0].data.dtype, device=data[0].data.device) - for j, idx in enumerate(np.ndindex(blob.shape[channel_axis])): - index = tuple(slice(None) if i != channel_axis else idx for i in range(blob.ndim)) - blob[index] = data[j].data + blob = torch.moveaxis(blob, channel_axis, 0) + data = data.squeeze() + if data.size == 1: + blob[0] = data.item() + else: + for i, tensor in enumerate(data): + blob[i] = tensor.data + blob = torch.moveaxis(blob, 0, channel_axis) return blob @staticmethod diff --git a/nncf/torch/tensor_statistics/collectors.py b/nncf/torch/tensor_statistics/collectors.py index 968a6afbd69..e0974989446 100644 --- a/nncf/torch/tensor_statistics/collectors.py +++ b/nncf/torch/tensor_statistics/collectors.py @@ -21,7 +21,6 @@ from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer from nncf.experimental.common.tensor_statistics.collectors import AbsQuantileReducer from nncf.experimental.common.tensor_statistics.collectors import AggregatorBase -from nncf.experimental.common.tensor_statistics.collectors import BatchMeanReducer from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator from nncf.experimental.common.tensor_statistics.collectors import MaxReducer from nncf.experimental.common.tensor_statistics.collectors import MeanAggregator @@ -248,10 +247,6 @@ class PTAbsQuantileReducer(PTReducerMixIn, AbsQuantileReducer): pass -class PTBatchMeanReducer(PTReducerMixIn, BatchMeanReducer): - pass - - class PTMeanPerChanelReducer(PTReducerMixIn, MeanPerChReducer): pass @@ -525,16 +520,17 @@ def get_mean_statistic_collector( Aggregates all available collected statistics in case parameter is None. :return: Mean statistic collector. """ - if channel_axis == 0: - reducer = PTBatchMeanReducer() - else: - reducer = PTMeanPerChanelReducer(channel_axis=channel_axis) - noop_reducer = NoopReducer() + reducer = PTMeanPerChanelReducer(channel_axis=channel_axis) + noop_reducer = NoopReducer() + aggregation_axes = ( + (0,) if channel_axis == -1 else (0, 1) + ) # Assume that batch is on 0-axis for Convolutions and No batch axis for MatMul kwargs = { "tensor_processor": PTNNCFCollectorTensorProcessor, "num_samples": num_samples, "window_size": window_size, + "aggregation_axes": aggregation_axes, } aggregate_mean = MeanAggregator(**kwargs) aggregate_shape = ShapeAggregator() diff --git a/tests/common/experimental/test_reducers_and_aggregators.py b/tests/common/experimental/test_reducers_and_aggregators.py index ffc36d134e4..0d579397234 100644 --- a/tests/common/experimental/test_reducers_and_aggregators.py +++ b/tests/common/experimental/test_reducers_and_aggregators.py @@ -225,20 +225,6 @@ def test_quantile_reducers(self, reducer_name, ref, reducers): for i, ref_ in enumerate(ref): assert self.all_close(val[i].tensor, self.cast_tensor(ref_, Dtype.FLOAT)) - @pytest.mark.parametrize( - "reducer_name,ref,kwargs", - [ - ("batch_mean", [[[[-12.5, -11.5, -10.5], [-9.5, -8.5, -7.5], [-6.5, -5.5, -4.5]]]], {}), - ("mean_per_ch", [-22.0, -13.0, -4.0, 5.0], {"channel_axis": 0}), - ], - ) - def test_batch_mean_mean_per_ch_reducers(self, reducer_name, ref, reducers, kwargs): - input_ = np.arange(-26, 10).reshape((4, 1, 3, 3)) - reducer = reducers[reducer_name](inplace=False, **kwargs) - val = reducer([self.get_nncf_tensor(input_, Dtype.FLOAT)]) - assert len(val) == 1 - assert self.all_close(val[0].tensor, self.cast_tensor(ref, Dtype.FLOAT)) - def test_noop_aggregator(self): aggregator = NoopAggregator(None) @@ -343,7 +329,17 @@ def _get_inputs_for_mean_median_aggregators( ) -> Iterator[NNCFTensor]: input_ = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]) input_with_outliers = np.array( - [100_000, -100_000, 200_000, -200_000, 300_000, -300_000, 400_000, -400_000, 500_000] + [ + 100_000, + -100_000, + 200_000, + -200_000, + 300_000, + -300_000, + 400_000, + -400_000, + 500_000, + ] ) if dims == 2: input_ = input_.reshape((3, 3)) @@ -514,7 +510,7 @@ def test_mad_percentile_aggregators_not_implemented_aggregation_axes( @pytest.mark.parametrize( "reducer_name", - ["min", "max", "abs_max", "mean", "quantile", "abs_quantile", "batch_mean", "mean_per_ch"], + ["min", "max", "abs_max", "mean", "quantile", "abs_quantile", "mean_per_ch"], ) def test_reducers_name_hash_equal(self, reducer_name, reducers): params = {} @@ -524,8 +520,6 @@ def test_reducers_name_hash_equal(self, reducer_name, reducers): elif reducer_name in ["quantile", "abs_quantile"]: params["reduction_axes"] = [None, (0, 1, 3), (1, 2, 3)] params["quantile"] = [[0.01, 0.99], [0.001, 0.999]] - elif reducer_name == "batch_mean": - params["inplace"] = [False, True] elif reducer_name == "mean_per_ch": params["inplace"] = [False, True] params["channel_axis"] = [1, 2] diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py index 78877e27a8b..3a27eb7c20b 100644 --- a/tests/common/test_statistics_aggregator.py +++ b/tests/common/test_statistics_aggregator.py @@ -463,7 +463,7 @@ class MeanBCTestParameters(RawBCTestParameters): ref_values: np.ndarray = None ref_shape: Tuple[int] = None - MEAN_ACT_AXIS_0_REF = np.array( + MEAN_ACT_AXIS_0_REF = np.mean( [ [ [[1.0, -4.5, 0.5], [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], @@ -473,16 +473,19 @@ class MeanBCTestParameters(RawBCTestParameters): ] ) - MEAN_WEIGHTS_AXIS_0_REF = np.array( + MEAN_WEIGHTS_AXIS_0_REF = np.mean( [ [ [[43.033337, -46.333332, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[43.033337, -46.333332, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[43.033337, -46.333332, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], ] - ] + ], + axis=(0), ) + MEAN_WEIGHTS_AXIS_0_REF = np.array([[-1, -0.09999999, 0]]) + @pytest.mark.parametrize( "test_params", [ diff --git a/tests/onnx/quantization/test_reducers_and_aggregators.py b/tests/onnx/quantization/test_reducers_and_aggregators.py index 3e1c3ac988a..ed9016067c4 100644 --- a/tests/onnx/quantization/test_reducers_and_aggregators.py +++ b/tests/onnx/quantization/test_reducers_and_aggregators.py @@ -17,7 +17,6 @@ from nncf.common.graph.layer_attributes import Dtype from nncf.onnx.statistics.collectors import ONNXAbsMaxReducer from nncf.onnx.statistics.collectors import ONNXAbsQuantileReducer -from nncf.onnx.statistics.collectors import ONNXBatchMeanReducer from nncf.onnx.statistics.collectors import ONNXMaxReducer from nncf.onnx.statistics.collectors import ONNXMeanPerChanelReducer from nncf.onnx.statistics.collectors import ONNXMeanReducer @@ -49,7 +48,6 @@ def reducers(self): "mean": ONNXMeanReducer, "quantile": ONNXQuantileReducer, "abs_quantile": ONNXAbsQuantileReducer, - "batch_mean": ONNXBatchMeanReducer, "mean_per_ch": ONNXMeanPerChanelReducer, } diff --git a/tests/openvino/native/quantization/test_reducers_and_aggregators.py b/tests/openvino/native/quantization/test_reducers_and_aggregators.py index c29218550ac..44b4d190409 100644 --- a/tests/openvino/native/quantization/test_reducers_and_aggregators.py +++ b/tests/openvino/native/quantization/test_reducers_and_aggregators.py @@ -17,7 +17,6 @@ from nncf.common.graph.layer_attributes import Dtype from nncf.openvino.statistics.collectors import OVAbsMaxReducer from nncf.openvino.statistics.collectors import OVAbsQuantileReducer -from nncf.openvino.statistics.collectors import OVBatchMeanReducer from nncf.openvino.statistics.collectors import OVMaxReducer from nncf.openvino.statistics.collectors import OVMeanPerChanelReducer from nncf.openvino.statistics.collectors import OVMeanReducer @@ -49,7 +48,6 @@ def reducers(self): "mean": OVMeanReducer, "quantile": OVQuantileReducer, "abs_quantile": OVAbsQuantileReducer, - "batch_mean": OVBatchMeanReducer, "mean_per_ch": OVMeanPerChanelReducer, } diff --git a/tests/openvino/native/test_model_transformer.py b/tests/openvino/native/test_model_transformer.py index 84980635e8a..675b9be22cb 100644 --- a/tests/openvino/native/test_model_transformer.py +++ b/tests/openvino/native/test_model_transformer.py @@ -149,13 +149,13 @@ def __str__(self) -> str: ), # Batch mean and mean per ch operations InplaceOpTestCase("batch_mean", None, lambda r: get_inplace_batch_mean_op(), ["ReduceMean"], [0]), - InplaceOpTestCase("mean_per_ch", 1, get_inplace_mean_per_ch, ["Reshape", "ReduceMean"], [(1, 3, 16), (0, 2)]), + InplaceOpTestCase("mean_per_ch", 1, get_inplace_mean_per_ch, ["Reshape", "ReduceMean"], [(1, 3, 16), (2)]), InplaceOpTestCase( "mean_per_ch", 2, get_inplace_mean_per_ch, ["Transpose", "Reshape", "ReduceMean"], - [(0, 2, 1, 3), (1, 4, 12), (0, 2)], + [(0, 2, 1, 3), (1, 4, 12), (2)], ), InplaceOpTestCase( "mean_per_ch", diff --git a/tests/openvino/native/test_statistics_aggregator.py b/tests/openvino/native/test_statistics_aggregator.py index ff573310080..7f3612cfbc8 100644 --- a/tests/openvino/native/test_statistics_aggregator.py +++ b/tests/openvino/native/test_statistics_aggregator.py @@ -23,7 +23,6 @@ from nncf.openvino.graph.transformations.commands import OVTargetPoint from nncf.openvino.statistics.aggregator import OVStatisticsAggregator from nncf.openvino.statistics.collectors import OV_REDUCERS_MAP -from nncf.openvino.statistics.collectors import OVBatchMeanReducer from nncf.openvino.statistics.collectors import OVMeanPerChanelReducer from nncf.quantization.algorithms.bias_correction.openvino_backend import OVBiasCorrectionAlgoBackend from nncf.quantization.algorithms.fast_bias_correction.openvino_backend import OVFastBiasCorrectionAlgoBackend @@ -122,5 +121,5 @@ def _get_shared_conv_model(self, dataset_samples): def reducers_map(self) -> List[TensorReducerBase]: map_ = OV_REDUCERS_MAP.copy() - map_.update({"batch_mean": OVBatchMeanReducer, "mean_per_ch": OVMeanPerChanelReducer}) + map_.update({"mean_per_ch": OVMeanPerChanelReducer}) return map_ diff --git a/tests/torch/ptq/test_reducers_and_aggregators.py b/tests/torch/ptq/test_reducers_and_aggregators.py index 1af7b4e4683..a195b2a2398 100644 --- a/tests/torch/ptq/test_reducers_and_aggregators.py +++ b/tests/torch/ptq/test_reducers_and_aggregators.py @@ -23,7 +23,6 @@ from nncf.torch.tensor_statistics.algo import create_register_input_hook from nncf.torch.tensor_statistics.collectors import PTAbsMaxReducer from nncf.torch.tensor_statistics.collectors import PTAbsQuantileReducer -from nncf.torch.tensor_statistics.collectors import PTBatchMeanReducer from nncf.torch.tensor_statistics.collectors import PTMaxReducer from nncf.torch.tensor_statistics.collectors import PTMeanPerChanelReducer from nncf.torch.tensor_statistics.collectors import PTMeanReducer @@ -55,7 +54,6 @@ def reducers(self): "mean": PTMeanReducer, "quantile": PTQuantileReducer, "abs_quantile": PTAbsQuantileReducer, - "batch_mean": PTBatchMeanReducer, "mean_per_ch": PTMeanPerChanelReducer, } From 1a96012495a709a69b93f25e4e06c46e31c1cc64 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 14 Feb 2024 10:34:51 +0100 Subject: [PATCH 067/108] upd torch mean_per_channel --- nncf/torch/tensor_statistics/collectors.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nncf/torch/tensor_statistics/collectors.py b/nncf/torch/tensor_statistics/collectors.py index e0974989446..d10b1c8f4ff 100644 --- a/nncf/torch/tensor_statistics/collectors.py +++ b/nncf/torch/tensor_statistics/collectors.py @@ -118,7 +118,7 @@ def mean_per_channel(x: NNCFTensor, axis: int) -> NNCFTensor: return PTNNCFTensor(torch.mean(x.tensor, axis=0)) x = torch.moveaxis(x.tensor, axis, 1) t = x.reshape(x.shape[0], x.shape[1], -1) - return PTNNCFTensor(torch.mean(t, axis=(0, 2))) + return PTNNCFTensor(torch.mean(t, axis=2)) @staticmethod def batch_mean(x: NNCFTensor) -> NNCFTensor: @@ -520,7 +520,6 @@ def get_mean_statistic_collector( Aggregates all available collected statistics in case parameter is None. :return: Mean statistic collector. """ - reducer = PTMeanPerChanelReducer(channel_axis=channel_axis) noop_reducer = NoopReducer() aggregation_axes = ( From 2f89913618d056fe34a373ad481c9537323b49f7 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 14 Feb 2024 12:00:11 +0100 Subject: [PATCH 068/108] fix BC --- nncf/quantization/algorithms/bias_correction/algorithm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nncf/quantization/algorithms/bias_correction/algorithm.py b/nncf/quantization/algorithms/bias_correction/algorithm.py index f4d4bffdd36..2a34f4678e2 100644 --- a/nncf/quantization/algorithms/bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/bias_correction/algorithm.py @@ -397,7 +397,7 @@ def _compute_bias_shift( q_output = self._backend_entity.process_model_output(q_output, output_tensor_name) q_outputs.append(self._backend_entity.tensor_processor.mean_per_channel(q_output, channel_axis).tensor) # Here we get the per-sample average, so the axis is 0. - q_output = np.mean(q_outputs, axis=0) + q_output = np.mean(q_outputs, axis=(0, 1)) return output_fp - q_output @staticmethod @@ -487,7 +487,6 @@ def input_filter_func(point): return ( self._algorithm_key in point.algorithm_to_tensor_collectors and point.target_point.type == TargetType.POST_LAYER_OPERATION - and point.target_point.port_id == port_id ) input_id = (node_name, port_id) From 74594c7f17642ee70f9d709cbb3c69f67edaab43 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 14 Feb 2024 14:05:40 +0100 Subject: [PATCH 069/108] fixes after merge --- nncf/experimental/common/tensor_statistics/collectors.py | 5 +---- nncf/openvino/engine.py | 6 ------ tests/common/test_statistics_aggregator.py | 1 - tests/openvino/native/test_model_transformer.py | 2 -- 4 files changed, 1 insertion(+), 13 deletions(-) diff --git a/nncf/experimental/common/tensor_statistics/collectors.py b/nncf/experimental/common/tensor_statistics/collectors.py index 7e7ededfc62..44cda0854c5 100644 --- a/nncf/experimental/common/tensor_statistics/collectors.py +++ b/nncf/experimental/common/tensor_statistics/collectors.py @@ -652,11 +652,8 @@ def _aggregate_impl(self) -> NNCFTensor: # Case when all registered tensors have identical shape if all(self._container[0].shape == x.shape for x in self._container): stacked_value = self._tensor_processor.stack(self._container) - if self._keepdims: - aggregated = self._aggregation_fn(stacked_value, axis=self._aggregation_axes, keepdims=self._keepdims) - return self._tensor_processor.squeeze(aggregated, 0).tensor aggregated = self._aggregation_fn(stacked_value, axis=self._aggregation_axes, keepdims=self._keepdims) - return aggregated.tensor + return self._tensor_processor.squeeze(aggregated, 0).tensor online_axes = tuple(x - 1 for x in self._aggregation_axes if x > 0) # Case when some registered tensors have different shapes and diff --git a/nncf/openvino/engine.py b/nncf/openvino/engine.py index 84aca770d8a..d5db5fb6c94 100644 --- a/nncf/openvino/engine.py +++ b/nncf/openvino/engine.py @@ -31,10 +31,6 @@ class OVCompiledModelEngine(Engine): def __init__(self, compiled_model: ov.CompiledModel, stateful: bool): self.infer_request = compiled_model.create_infer_request() self.reset_state = stateful and hasattr(self.infer_request, "reset_state") - self.input_tensor_names = set() - self.number_of_inputs = len(compiled_model.inputs) - for model_input in compiled_model.inputs: - self.input_tensor_names.update(model_input.get_names()) def infer( self, input_data: Union[np.ndarray, List[np.ndarray], Tuple[np.ndarray], Dict[str, np.ndarray]] @@ -46,8 +42,6 @@ def infer( :param input_data: Inputs for the model. :return output_data: Model's output. """ - model_outputs = self.compiled_model(input_data) - if self.reset_state: self.infer_request.reset_state() diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py index 3a27eb7c20b..98b1054ac1d 100644 --- a/tests/common/test_statistics_aggregator.py +++ b/tests/common/test_statistics_aggregator.py @@ -817,7 +817,6 @@ def test_statistic_merging(self, test_params, key, dataset_samples, inplace_stat StatisticsType.MEAN, StatisticsType.QUANTILE, StatisticsType.ABS_QUANTILE, - "batch_mean", "mean_per_ch", ], ) diff --git a/tests/openvino/native/test_model_transformer.py b/tests/openvino/native/test_model_transformer.py index 675b9be22cb..c0a24819262 100644 --- a/tests/openvino/native/test_model_transformer.py +++ b/tests/openvino/native/test_model_transformer.py @@ -23,7 +23,6 @@ from nncf.common.graph.transformations.layout import TransformationLayout from nncf.experimental.tensor import Tensor from nncf.openvino.graph.model_transformer import OVModelTransformer -from nncf.openvino.graph.node_utils import get_inplace_batch_mean_op from nncf.openvino.graph.node_utils import get_inplace_max_op from nncf.openvino.graph.node_utils import get_inplace_mean_op from nncf.openvino.graph.node_utils import get_inplace_mean_per_ch @@ -148,7 +147,6 @@ def __str__(self) -> str: "abs_max", None, lambda r: get_inplace_max_op(r, True), ["Abs", "ReduceMax"], [None, (0, 1, 2, 3)] ), # Batch mean and mean per ch operations - InplaceOpTestCase("batch_mean", None, lambda r: get_inplace_batch_mean_op(), ["ReduceMean"], [0]), InplaceOpTestCase("mean_per_ch", 1, get_inplace_mean_per_ch, ["Reshape", "ReduceMean"], [(1, 3, 16), (2)]), InplaceOpTestCase( "mean_per_ch", From d760caf6ede95a9e78a62c2fdfc3a09bbf727d9b Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 15 Feb 2024 11:32:13 +0100 Subject: [PATCH 070/108] Fix BC calculations --- .../algorithms/bias_correction/algorithm.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/nncf/quantization/algorithms/bias_correction/algorithm.py b/nncf/quantization/algorithms/bias_correction/algorithm.py index 2a34f4678e2..5b46a2f1e30 100644 --- a/nncf/quantization/algorithms/bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/bias_correction/algorithm.py @@ -27,6 +27,7 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.commands import TransformationCommand from nncf.common.graph.transformations.layout import TransformationLayout +from nncf.common.graph.utils import get_reduction_axes from nncf.common.logging.track_progress import track from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer @@ -173,13 +174,7 @@ def apply( current_bias = self._backend_entity.get_bias_value(node, model_copy, nncf_graph) - channel_axis = node.metatype.output_channel_axis - if current_bias.ndim > 1: - channel_axis = range(current_bias.ndim)[channel_axis] - axes = [i for i in range(current_bias.ndim) if i != channel_axis] - bias_shift = np.expand_dims(bias_shift, axes) - - bias_shift = self._reshape_bias_shift(bias_shift, current_bias, channel_axis) + bias_shift = self._reshape_bias_shift(bias_shift, current_bias, node.metatype.output_channel_axis) updated_bias = current_bias + bias_shift magnitude = self._get_bias_shift_magnitude(current_bias, updated_bias) @@ -205,7 +200,7 @@ def apply( return main_model_transformer.transform(main_transformations_layout) @staticmethod - def _reshape_bias_shift(bias_shift, bias_value, channel_axis: int): + def _reshape_bias_shift(bias_shift: np.ndarray, bias_value: np.ndarray, channel_axis: int) -> np.ndarray: """ Reshape bias_shift tensor in case of dimensions of bias_value is more then 1. @@ -392,12 +387,17 @@ def _compute_bias_shift( engine = EngineFactory.create(model) channel_axis = node.metatype.output_channel_axis q_outputs = [] + axis = None for feed_dict in feed_dicts: q_output = engine.infer(feed_dict) q_output = self._backend_entity.process_model_output(q_output, output_tensor_name) - q_outputs.append(self._backend_entity.tensor_processor.mean_per_channel(q_output, channel_axis).tensor) + if len(q_output.shape) < 3: + axis = 0 + else: + axis = get_reduction_axes((channel_axis,), q_output.shape) if axis is None else axis + q_outputs.append((np.mean(q_output.tensor, axis=axis))) # Here we get the per-sample average, so the axis is 0. - q_output = np.mean(q_outputs, axis=(0, 1)) + q_output = np.mean(q_outputs, axis=(0)) return output_fp - q_output @staticmethod From 50ac6b4ead66038590141504bf93359939f5cc23 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 20 Feb 2024 10:47:22 +0100 Subject: [PATCH 071/108] revert FBC and BC changes --- .../common/tensor_statistics/collectors.py | 8 + nncf/onnx/statistics/collectors.py | 18 +- nncf/openvino/graph/node_utils.py | 11 +- nncf/openvino/statistics/collectors.py | 22 ++- .../algorithms/bias_correction/algorithm.py | 44 ++--- .../fast_bias_correction/algorithm.py | 17 +- .../fast_bias_correction/onnx_backend.py | 15 +- .../fast_bias_correction/openvino_backend.py | 15 +- .../fast_bias_correction/torch_backend.py | 12 +- nncf/torch/tensor_statistics/collectors.py | 17 +- .../test_reducers_and_aggregators.py | 30 +-- tests/common/test_statistics_aggregator.py | 173 +++++++++--------- .../test_reducers_and_aggregators.py | 2 + .../test_reducers_and_aggregators.py | 2 + .../openvino/native/test_model_transformer.py | 6 +- .../native/test_statistics_aggregator.py | 3 +- .../ptq/test_reducers_and_aggregators.py | 2 + 17 files changed, 210 insertions(+), 187 deletions(-) diff --git a/nncf/experimental/common/tensor_statistics/collectors.py b/nncf/experimental/common/tensor_statistics/collectors.py index 44cda0854c5..1017877bf24 100644 --- a/nncf/experimental/common/tensor_statistics/collectors.py +++ b/nncf/experimental/common/tensor_statistics/collectors.py @@ -555,6 +555,14 @@ def _reduce_out_of_place(self, x: List[NNCFTensor]) -> List[NNCFTensor]: return self._tensor_processor.quantile(x, self._quantile, reduction_axes, keepdims=self._keepdims) +class BatchMeanReducer(TensorReducerBase): + def __init__(self, inplace: bool = False): + super().__init__(None, inplace) + + def _reduce_out_of_place(self, x: List[NNCFTensor]) -> List[NNCFTensor]: + return [self._tensor_processor.batch_mean(x[0])] + + class MeanPerChReducer(TensorReducerBase): def __init__(self, channel_axis: int = 1, inplace: bool = False): super().__init__(inplace=inplace) diff --git a/nncf/onnx/statistics/collectors.py b/nncf/onnx/statistics/collectors.py index 3b91f46aeab..c7ea4fbf471 100644 --- a/nncf/onnx/statistics/collectors.py +++ b/nncf/onnx/statistics/collectors.py @@ -17,6 +17,7 @@ from nncf.common.tensor_statistics.collectors import NNCFCollectorTensorProcessor from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer from nncf.experimental.common.tensor_statistics.collectors import AbsQuantileReducer +from nncf.experimental.common.tensor_statistics.collectors import BatchMeanReducer from nncf.experimental.common.tensor_statistics.collectors import MaxReducer from nncf.experimental.common.tensor_statistics.collectors import MeanAggregator from nncf.experimental.common.tensor_statistics.collectors import MeanPerChReducer @@ -158,7 +159,7 @@ def mean_per_channel(x: ONNXNNCFTensor, axis: int) -> ONNXNNCFTensor: return ONNXNNCFTensor(np.mean(x.tensor, axis=0)) x = np.moveaxis(x.tensor, axis, 1) t = x.reshape(x.shape[0], x.shape[1], -1) - return ONNXNNCFTensor(np.mean(t, axis=(2,))) + return ONNXNNCFTensor(np.mean(t, axis=(0, 2))) @staticmethod def transpose(x: ONNXNNCFTensor, axes: Tuple[int, ...]) -> ONNXNNCFTensor: @@ -220,6 +221,10 @@ class ONNXAbsQuantileReducer(ONNXBasicReducer, AbsQuantileReducer): pass +class ONNXBatchMeanReducer(ONNXBasicReducer, BatchMeanReducer): + pass + + class ONNXMeanPerChanelReducer(ONNXBasicReducer, MeanPerChReducer): pass @@ -237,16 +242,17 @@ def get_mean_statistic_collector( :param inplace: Whether the mean reducer should be calculated inplace or out of place. :return: Mean statistic collector. """ - reducer = ONNXMeanPerChanelReducer(channel_axis=channel_axis, inplace=inplace) + inplace = False + if channel_axis == 0: + reducer = ONNXBatchMeanReducer(inplace) + else: + reducer = ONNXMeanPerChanelReducer(channel_axis=channel_axis, inplace=inplace) noop_reducer = NoopReducer() - aggregation_axes = ( - (0,) if channel_axis == -1 else (0, 1) - ) # Assume that batch is on 0-axis for Convolutions and No batch axis for MatMul + kwargs = { "tensor_processor": ONNXNNCFCollectorTensorProcessor, "num_samples": num_samples, "window_size": window_size, - "aggregation_axes": aggregation_axes, } aggregate_mean = MeanAggregator(**kwargs) diff --git a/nncf/openvino/graph/node_utils.py b/nncf/openvino/graph/node_utils.py index bad3c3dae06..88fafc899b5 100644 --- a/nncf/openvino/graph/node_utils.py +++ b/nncf/openvino/graph/node_utils.py @@ -255,6 +255,15 @@ def get_inplace_mean_op(reduction_axes: Optional[ReductionAxes]) -> InplaceInser return get_inplace_reduce_op(opset.reduce_mean, reduction_axes, False) +def get_inplace_batch_mean_op() -> InplaceInsertionFnType: + """ + Returns inplace batch mean function that adds reduce batch mean node to a passed node. + + :returns: Inplace insertion function to use in ModelTransformer. + """ + return get_inplace_reduce_op(opset.reduce_mean, np.array(0), False) + + def get_inplace_mean_per_ch(axis: int) -> InplaceInsertionFnType: """ Returns inplace mean per channel function that adds reduce mean per channel node @@ -297,7 +306,7 @@ def get_reduce_op(node: ov.Node, output_port_id: int, output_node_name: str) -> ) return opset.reduce_mean( reshape_op, - reduction_axes=np.array((2)), + reduction_axes=np.array((0, 2)), keep_dims=False, name=output_node_name, ) diff --git a/nncf/openvino/statistics/collectors.py b/nncf/openvino/statistics/collectors.py index 8ac7a94ca77..beaac8d2cba 100644 --- a/nncf/openvino/statistics/collectors.py +++ b/nncf/openvino/statistics/collectors.py @@ -18,6 +18,7 @@ from nncf.common.tensor_statistics.collectors import NNCFCollectorTensorProcessor from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer from nncf.experimental.common.tensor_statistics.collectors import AbsQuantileReducer +from nncf.experimental.common.tensor_statistics.collectors import BatchMeanReducer from nncf.experimental.common.tensor_statistics.collectors import InplaceInsertionFNType from nncf.experimental.common.tensor_statistics.collectors import MaxReducer from nncf.experimental.common.tensor_statistics.collectors import MeanAggregator @@ -30,6 +31,7 @@ from nncf.experimental.common.tensor_statistics.collectors import RawReducer from nncf.experimental.common.tensor_statistics.collectors import ShapeAggregator from nncf.experimental.common.tensor_statistics.collectors import TensorCollector +from nncf.openvino.graph.node_utils import get_inplace_batch_mean_op from nncf.openvino.graph.node_utils import get_inplace_max_op from nncf.openvino.graph.node_utils import get_inplace_mean_op from nncf.openvino.graph.node_utils import get_inplace_mean_per_ch @@ -111,7 +113,7 @@ def mean_per_channel(x: NNCFTensor, axis: int) -> NNCFTensor: return OVNNCFTensor(np.mean(x.tensor, axis=0)) x = np.moveaxis(x.tensor, axis, 1) t = x.reshape(x.shape[0], x.shape[1], -1) - return OVNNCFTensor(np.mean(t, axis=(2,))) + return OVNNCFTensor(np.mean(t, axis=(0, 2))) @staticmethod def transpose(x: NNCFTensor, axes: Tuple[int, ...]) -> NNCFTensor: @@ -219,6 +221,14 @@ def get_inplace_fn(self): return get_inplace_mean_op(self._reduction_axes) +class OVBatchMeanReducer(BatchMeanReducer): + def _get_processor(self): + return OVNNCFCollectorTensorProcessor + + def get_inplace_fn(self): + return get_inplace_batch_mean_op() + + class OVMeanPerChanelReducer(MeanPerChReducer): def _get_processor(self): return OVNNCFCollectorTensorProcessor @@ -256,16 +266,16 @@ def get_mean_statistic_collector( :param inplace: Whether the mean reducer should be calculated inplace or out of place. :return: Mean statistic collector. """ - reducer = OVMeanPerChanelReducer(channel_axis=channel_axis, inplace=inplace) + if channel_axis == 0: + reducer = OVBatchMeanReducer(inplace) + else: + reducer = OVMeanPerChanelReducer(channel_axis=channel_axis, inplace=inplace) noop_reducer = NoopReducer() - aggregation_axes = ( - (0,) if channel_axis == -1 else (0, 1) - ) # Assume that batch is on 0-axis for Convolutions and No batch axis for MatMul + kwargs = { "tensor_processor": OVNNCFCollectorTensorProcessor, "num_samples": num_samples, "window_size": window_size, - "aggregation_axes": aggregation_axes, } aggregate_mean = MeanAggregator(**kwargs) aggregate_shape = ShapeAggregator() diff --git a/nncf/quantization/algorithms/bias_correction/algorithm.py b/nncf/quantization/algorithms/bias_correction/algorithm.py index 5b46a2f1e30..4dc605c85e6 100644 --- a/nncf/quantization/algorithms/bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/bias_correction/algorithm.py @@ -27,7 +27,6 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.commands import TransformationCommand from nncf.common.graph.transformations.layout import TransformationLayout -from nncf.common.graph.utils import get_reduction_axes from nncf.common.logging.track_progress import track from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer @@ -43,6 +42,7 @@ class BiasCorrection(Algorithm): + """ Post-training BiasCorrection algorithm implementation @@ -174,7 +174,12 @@ def apply( current_bias = self._backend_entity.get_bias_value(node, model_copy, nncf_graph) - bias_shift = self._reshape_bias_shift(bias_shift, current_bias, node.metatype.output_channel_axis) + channel_axis = node.metatype.output_channel_axis + if current_bias.ndim > 1: + channel_axis = range(current_bias.ndim)[channel_axis] + axes = [i for i in range(current_bias.ndim) if i != channel_axis] + bias_shift = np.expand_dims(bias_shift, axes) + updated_bias = current_bias + bias_shift magnitude = self._get_bias_shift_magnitude(current_bias, updated_bias) @@ -199,24 +204,6 @@ def apply( return main_model_transformer.transform(main_transformations_layout) - @staticmethod - def _reshape_bias_shift(bias_shift: np.ndarray, bias_value: np.ndarray, channel_axis: int) -> np.ndarray: - """ - Reshape bias_shift tensor in case of dimensions of bias_value is more then 1. - - :param bias_shift: Bias shift tensor. - :param bias_value: Bias value tensor. - :param channel_axis: Axis to update bias. - - :return TTensor: Updated bias_shift. - """ - bias_shift = bias_shift.squeeze() - if bias_value.ndim > 1: - new_shape = [1] * bias_value.ndim - new_shape[channel_axis] = bias_shift.shape[0] - bias_shift = bias_shift.reshape(new_shape) - return bias_shift - def _is_node_correctable(self, node: NNCFNode, nncf_graph: NNCFGraph) -> bool: """ Verify if node bias can be corrected or not. @@ -387,17 +374,12 @@ def _compute_bias_shift( engine = EngineFactory.create(model) channel_axis = node.metatype.output_channel_axis q_outputs = [] - axis = None for feed_dict in feed_dicts: q_output = engine.infer(feed_dict) q_output = self._backend_entity.process_model_output(q_output, output_tensor_name) - if len(q_output.shape) < 3: - axis = 0 - else: - axis = get_reduction_axes((channel_axis,), q_output.shape) if axis is None else axis - q_outputs.append((np.mean(q_output.tensor, axis=axis))) + q_outputs.append(self._backend_entity.tensor_processor.mean_per_channel(q_output, channel_axis).tensor) # Here we get the per-sample average, so the axis is 0. - q_output = np.mean(q_outputs, axis=(0)) + q_output = np.mean(q_outputs, axis=0) return output_fp - q_output @staticmethod @@ -487,16 +469,18 @@ def input_filter_func(point): return ( self._algorithm_key in point.algorithm_to_tensor_collectors and point.target_point.type == TargetType.POST_LAYER_OPERATION + and point.target_point.port_id == port_id ) input_id = (node_name, port_id) if input_id in self._fp_inputs: return self._fp_inputs[input_id] + input_fp = [] for tensor_collector in statistic_points.get_algo_statistics_for_node( node_name, input_filter_func, self._algorithm_key ): - input_fp = tensor_collector.get_statistics().values + input_fp.extend(tensor_collector.get_statistics().values) self._fp_inputs[input_id] = input_fp return self._fp_inputs[input_id] @@ -519,10 +503,10 @@ def output_filter_func(point): for tensor_collector in statistic_points.get_algo_statistics_for_node( node_name, output_filter_func, self._algorithm_key ): - output_fp = tensor_collector.get_statistics().mean_values + output_fp.extend(tensor_collector.get_statistics().mean_values) return np.array(output_fp) - def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: self._set_backend_entity(model) statistic_container = StatisticPointsContainer() diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py index 7d5233cf320..0ba1e2b1524 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py @@ -214,7 +214,6 @@ def _reshape_bias_shift(bias_shift: Tensor, bias_value: Tensor, channel_axis: in :return TTensor: Updated bias_shift. """ - bias_shift = bias_shift.squeeze() if bias_value.ndim > 1: new_shape = [1] * bias_value.ndim new_shape[channel_axis] = bias_shift.shape[0] @@ -236,12 +235,14 @@ def input_filter_func(point): TargetType.OPERATOR_PRE_HOOK, ] + input_fp = [] + input_shape = [] for tensor_collector in statistic_points.get_algo_statistics_for_node( node_name, input_filter_func, self._algorithm_key ): statistics = tensor_collector.get_statistics() - input_fp = Tensor(statistics.mean_values) - input_shape = statistics.shape + input_fp.extend(Tensor(statistics.mean_values)) + input_shape.extend(statistics.shape) return input_fp, input_shape def _get_fp_outputs(self, statistic_points: StatisticPointsContainer, node_name: str) -> List[TTensor]: @@ -259,10 +260,11 @@ def output_filter_func(point): TargetType.OPERATOR_POST_HOOK, ] + output_fp = [] for tensor_collector in statistic_points.get_algo_statistics_for_node( node_name, output_filter_func, self._algorithm_key ): - output_fp = Tensor(tensor_collector.get_statistics().mean_values) + output_fp.extend(Tensor(tensor_collector.get_statistics().mean_values)) return output_fp def _extract_submodel(self, model_transformer: ModelTransformer, node_name: str) -> TModel: @@ -303,10 +305,7 @@ def _get_bias_shift( output_name: str, ) -> TTensor: """ - Calculates updated bias: - 1) Infers the quantized submodel using as input created blob. - 2) Calculates the mean of quantized output by channel axis. - 3) Calculates the difference between float output and the tensor from step 2. + Calculates updated bias. :param engine: Backend-specific engine instance for the model execution. :param model: Backend-specific sub-model for the execution. @@ -320,7 +319,7 @@ def _get_bias_shift( raw_output = engine.infer(input_blob) q_outputs = self._backend_entity.process_model_output(raw_output, output_name) q_outputs = mean_per_channel(q_outputs, channel_axis) - bias_shift = output_fp - q_outputs + bias_shift = fns.stack(output_fp) - q_outputs return bias_shift def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: diff --git a/nncf/quantization/algorithms/fast_bias_correction/onnx_backend.py b/nncf/quantization/algorithms/fast_bias_correction/onnx_backend.py index d59f560267c..fcbd1e3ee18 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/onnx_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/onnx_backend.py @@ -62,16 +62,13 @@ def get_sub_input_output_names(subgraph: onnx.ModelProto) -> Tuple[str, str]: return subgraph.graph.input[0].name, subgraph.graph.output[0].name @staticmethod - def create_input_data(shape: Tuple[int], data: Tensor, input_name: str, channel_axis: int) -> Dict[str, np.ndarray]: + def create_input_data( + shape: Tuple[int], data: List[Tensor], input_name: str, channel_axis: int + ) -> Dict[str, np.array]: blob = np.zeros(shape, dtype=data[0].data.dtype) - blob = np.moveaxis(blob, channel_axis, 0) - data = data.squeeze() - if data.size == 1: - blob[0] = data.item() - else: - for i, tensor in enumerate(data): - blob[i] = tensor.data - blob = np.moveaxis(blob, 0, channel_axis) + for j, idx in enumerate(np.ndindex(blob.shape[channel_axis])): + index = tuple(slice(None) if i != channel_axis else idx for i in range(blob.ndim)) + blob[index] = data[j].data input_data = {input_name: blob} return input_data diff --git a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py index 2bf01b3b6f6..58cc3f04ff2 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py @@ -61,16 +61,13 @@ def get_sub_input_output_names(subgraph: ov.Model) -> Tuple[str, str]: return subgraph.inputs[0].get_any_name(), subgraph.outputs[0].get_any_name() @staticmethod - def create_input_data(shape: Tuple[int], data: Tensor, input_name: str, channel_axis: int) -> Dict[str, np.ndarray]: + def create_input_data( + shape: Tuple[int], data: List[Tensor], input_name: str, channel_axis: int + ) -> Dict[str, np.ndarray]: blob = np.zeros(shape, dtype=data[0].data.dtype) - blob = np.moveaxis(blob, channel_axis, 0) - data = data.squeeze() - if data.size == 1: - blob[0] = data.item() - else: - for i, tensor in enumerate(data): - blob[i] = tensor.data - blob = np.moveaxis(blob, 0, channel_axis) + for j, idx in enumerate(np.ndindex(blob.shape[channel_axis])): + index = tuple(slice(None) if i != channel_axis else idx for i in range(blob.ndim)) + blob[index] = data[j].data input_data = {input_name: blob} return input_data diff --git a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py index 372541d3286..7623fdb0012 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py @@ -11,6 +11,7 @@ from typing import Dict, List, Optional, Tuple +import numpy as np import torch from nncf.common.graph import NNCFGraph @@ -75,14 +76,9 @@ def get_sub_input_output_names(subgraph: NNCFNetwork) -> Tuple[str, str]: @staticmethod def create_input_data(shape: Tuple[int], data: List[Tensor], input_name: str, channel_axis: int) -> torch.Tensor: blob = torch.zeros(shape, dtype=data[0].data.dtype, device=data[0].data.device) - blob = torch.moveaxis(blob, channel_axis, 0) - data = data.squeeze() - if data.size == 1: - blob[0] = data.item() - else: - for i, tensor in enumerate(data): - blob[i] = tensor.data - blob = torch.moveaxis(blob, 0, channel_axis) + for j, idx in enumerate(np.ndindex(blob.shape[channel_axis])): + index = tuple(slice(None) if i != channel_axis else idx for i in range(blob.ndim)) + blob[index] = data[j].data return blob @staticmethod diff --git a/nncf/torch/tensor_statistics/collectors.py b/nncf/torch/tensor_statistics/collectors.py index d10b1c8f4ff..968a6afbd69 100644 --- a/nncf/torch/tensor_statistics/collectors.py +++ b/nncf/torch/tensor_statistics/collectors.py @@ -21,6 +21,7 @@ from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer from nncf.experimental.common.tensor_statistics.collectors import AbsQuantileReducer from nncf.experimental.common.tensor_statistics.collectors import AggregatorBase +from nncf.experimental.common.tensor_statistics.collectors import BatchMeanReducer from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator from nncf.experimental.common.tensor_statistics.collectors import MaxReducer from nncf.experimental.common.tensor_statistics.collectors import MeanAggregator @@ -118,7 +119,7 @@ def mean_per_channel(x: NNCFTensor, axis: int) -> NNCFTensor: return PTNNCFTensor(torch.mean(x.tensor, axis=0)) x = torch.moveaxis(x.tensor, axis, 1) t = x.reshape(x.shape[0], x.shape[1], -1) - return PTNNCFTensor(torch.mean(t, axis=2)) + return PTNNCFTensor(torch.mean(t, axis=(0, 2))) @staticmethod def batch_mean(x: NNCFTensor) -> NNCFTensor: @@ -247,6 +248,10 @@ class PTAbsQuantileReducer(PTReducerMixIn, AbsQuantileReducer): pass +class PTBatchMeanReducer(PTReducerMixIn, BatchMeanReducer): + pass + + class PTMeanPerChanelReducer(PTReducerMixIn, MeanPerChReducer): pass @@ -520,16 +525,16 @@ def get_mean_statistic_collector( Aggregates all available collected statistics in case parameter is None. :return: Mean statistic collector. """ - reducer = PTMeanPerChanelReducer(channel_axis=channel_axis) + if channel_axis == 0: + reducer = PTBatchMeanReducer() + else: + reducer = PTMeanPerChanelReducer(channel_axis=channel_axis) noop_reducer = NoopReducer() - aggregation_axes = ( - (0,) if channel_axis == -1 else (0, 1) - ) # Assume that batch is on 0-axis for Convolutions and No batch axis for MatMul + kwargs = { "tensor_processor": PTNNCFCollectorTensorProcessor, "num_samples": num_samples, "window_size": window_size, - "aggregation_axes": aggregation_axes, } aggregate_mean = MeanAggregator(**kwargs) aggregate_shape = ShapeAggregator() diff --git a/tests/common/experimental/test_reducers_and_aggregators.py b/tests/common/experimental/test_reducers_and_aggregators.py index 0d579397234..ffc36d134e4 100644 --- a/tests/common/experimental/test_reducers_and_aggregators.py +++ b/tests/common/experimental/test_reducers_and_aggregators.py @@ -225,6 +225,20 @@ def test_quantile_reducers(self, reducer_name, ref, reducers): for i, ref_ in enumerate(ref): assert self.all_close(val[i].tensor, self.cast_tensor(ref_, Dtype.FLOAT)) + @pytest.mark.parametrize( + "reducer_name,ref,kwargs", + [ + ("batch_mean", [[[[-12.5, -11.5, -10.5], [-9.5, -8.5, -7.5], [-6.5, -5.5, -4.5]]]], {}), + ("mean_per_ch", [-22.0, -13.0, -4.0, 5.0], {"channel_axis": 0}), + ], + ) + def test_batch_mean_mean_per_ch_reducers(self, reducer_name, ref, reducers, kwargs): + input_ = np.arange(-26, 10).reshape((4, 1, 3, 3)) + reducer = reducers[reducer_name](inplace=False, **kwargs) + val = reducer([self.get_nncf_tensor(input_, Dtype.FLOAT)]) + assert len(val) == 1 + assert self.all_close(val[0].tensor, self.cast_tensor(ref, Dtype.FLOAT)) + def test_noop_aggregator(self): aggregator = NoopAggregator(None) @@ -329,17 +343,7 @@ def _get_inputs_for_mean_median_aggregators( ) -> Iterator[NNCFTensor]: input_ = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]) input_with_outliers = np.array( - [ - 100_000, - -100_000, - 200_000, - -200_000, - 300_000, - -300_000, - 400_000, - -400_000, - 500_000, - ] + [100_000, -100_000, 200_000, -200_000, 300_000, -300_000, 400_000, -400_000, 500_000] ) if dims == 2: input_ = input_.reshape((3, 3)) @@ -510,7 +514,7 @@ def test_mad_percentile_aggregators_not_implemented_aggregation_axes( @pytest.mark.parametrize( "reducer_name", - ["min", "max", "abs_max", "mean", "quantile", "abs_quantile", "mean_per_ch"], + ["min", "max", "abs_max", "mean", "quantile", "abs_quantile", "batch_mean", "mean_per_ch"], ) def test_reducers_name_hash_equal(self, reducer_name, reducers): params = {} @@ -520,6 +524,8 @@ def test_reducers_name_hash_equal(self, reducer_name, reducers): elif reducer_name in ["quantile", "abs_quantile"]: params["reduction_axes"] = [None, (0, 1, 3), (1, 2, 3)] params["quantile"] = [[0.01, 0.99], [0.001, 0.999]] + elif reducer_name == "batch_mean": + params["inplace"] = [False, True] elif reducer_name == "mean_per_ch": params["inplace"] = [False, True] params["channel_axis"] = [1, 2] diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py index 98b1054ac1d..a7b88a6dfc5 100644 --- a/tests/common/test_statistics_aggregator.py +++ b/tests/common/test_statistics_aggregator.py @@ -13,7 +13,7 @@ from dataclasses import dataclass from enum import Enum from itertools import product -from typing import List, Tuple, Type, Union +from typing import Any, List, Type, Union import numpy as np import pytest @@ -453,17 +453,14 @@ def filter_func(point): assert stat.max_values.shape == ref_shape @dataclass - class RawBCTestParameters: + class BCTestParameters: algo: BiasCorrectionAlgos + collector_type: BCStatsCollectors target_type: TargetType + ref_values: Any = None axis: int = 1 - @dataclass - class MeanBCTestParameters(RawBCTestParameters): - ref_values: np.ndarray = None - ref_shape: Tuple[int] = None - - MEAN_ACT_AXIS_0_REF = np.mean( + MEAN_ACT_AXIS_0_REF = np.array( [ [ [[1.0, -4.5, 0.5], [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], @@ -473,125 +470,126 @@ class MeanBCTestParameters(RawBCTestParameters): ] ) - MEAN_WEIGHTS_AXIS_0_REF = np.mean( + MEAN_WEIGHTS_AXIS_0_REF = np.array( [ [ [[43.033337, -46.333332, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[43.033337, -46.333332, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[43.033337, -46.333332, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], ] - ], - axis=(0), + ] ) - MEAN_WEIGHTS_AXIS_0_REF = np.array([[-1, -0.09999999, 0]]) - @pytest.mark.parametrize( "test_params", [ # TargeType: activations - MeanBCTestParameters( - algo=BiasCorrectionAlgos.FAST_BIAS_CORRECTION, - target_type=TargetType.POST_LAYER_OPERATION, - ref_values=MEAN_ACT_AXIS_0_REF, - ref_shape=(1, 3, 3, 3), + BCTestParameters( + BiasCorrectionAlgos.FAST_BIAS_CORRECTION, + BCStatsCollectors.MEAN, + TargetType.POST_LAYER_OPERATION, + (MEAN_ACT_AXIS_0_REF, (1, 3, 3, 3)), axis=0, ), - MeanBCTestParameters( - algo=BiasCorrectionAlgos.BIAS_CORRECTION, - target_type=TargetType.POST_LAYER_OPERATION, - ref_values=MEAN_ACT_AXIS_0_REF, - ref_shape=(1, 3, 3, 3), + BCTestParameters( + BiasCorrectionAlgos.BIAS_CORRECTION, + BCStatsCollectors.MEAN, + TargetType.POST_LAYER_OPERATION, + (MEAN_ACT_AXIS_0_REF, (1, 3, 3, 3)), axis=0, ), - MeanBCTestParameters( - algo=BiasCorrectionAlgos.FAST_BIAS_CORRECTION, - target_type=TargetType.POST_LAYER_OPERATION, - ref_values=(np.array((0.0, 0.45, 0.5))), - ref_shape=(1, 3, 3, 3), + BCTestParameters( + BiasCorrectionAlgos.FAST_BIAS_CORRECTION, + BCStatsCollectors.MEAN, + TargetType.POST_LAYER_OPERATION, + (np.array((0.0, 0.45, 0.5)), (1, 3, 3, 3)), axis=1, ), - MeanBCTestParameters( - algo=BiasCorrectionAlgos.BIAS_CORRECTION, - target_type=TargetType.POST_LAYER_OPERATION, - ref_values=(np.array((0.0, 0.45, 0.5))), - ref_shape=(1, 3, 3, 3), + BCTestParameters( + BiasCorrectionAlgos.BIAS_CORRECTION, + BCStatsCollectors.MEAN, + TargetType.POST_LAYER_OPERATION, + (np.array((0.0, 0.45, 0.5)), (1, 3, 3, 3)), axis=1, ), - MeanBCTestParameters( - algo=BiasCorrectionAlgos.FAST_BIAS_CORRECTION, - target_type=TargetType.POST_LAYER_OPERATION, - ref_values=(np.array([-0.04999995, 0.5, 0.5])), - ref_shape=(1, 3, 3, 3), + BCTestParameters( + BiasCorrectionAlgos.FAST_BIAS_CORRECTION, + BCStatsCollectors.MEAN, + TargetType.POST_LAYER_OPERATION, + (np.array([-0.04999995, 0.5, 0.5]), (1, 3, 3, 3)), axis=2, ), - MeanBCTestParameters( - algo=BiasCorrectionAlgos.BIAS_CORRECTION, - target_type=TargetType.POST_LAYER_OPERATION, - ref_values=(np.array([-0.04999995, 0.5, 0.5])), - ref_shape=(1, 3, 3, 3), + BCTestParameters( + BiasCorrectionAlgos.BIAS_CORRECTION, + BCStatsCollectors.MEAN, + TargetType.POST_LAYER_OPERATION, + (np.array([-0.04999995, 0.5, 0.5]), (1, 3, 3, 3)), axis=2, ), - RawBCTestParameters(algo=BiasCorrectionAlgos.BIAS_CORRECTION, target_type=TargetType.POST_LAYER_OPERATION), + BCTestParameters( + BiasCorrectionAlgos.BIAS_CORRECTION, BCStatsCollectors.RAW, TargetType.POST_LAYER_OPERATION + ), # TargeType: weights - MeanBCTestParameters( - algo=BiasCorrectionAlgos.FAST_BIAS_CORRECTION, - target_type=TargetType.OPERATION_WITH_WEIGHTS, - ref_values=(MEAN_WEIGHTS_AXIS_0_REF), - ref_shape=(3, 3, 3, 3), + BCTestParameters( + BiasCorrectionAlgos.FAST_BIAS_CORRECTION, + BCStatsCollectors.MEAN, + TargetType.OPERATION_WITH_WEIGHTS, + (MEAN_WEIGHTS_AXIS_0_REF, (3, 3, 3, 3)), axis=0, ), - MeanBCTestParameters( - algo=BiasCorrectionAlgos.BIAS_CORRECTION, - target_type=TargetType.OPERATION_WITH_WEIGHTS, - ref_values=(MEAN_WEIGHTS_AXIS_0_REF), - ref_shape=(3, 3, 3, 3), + BCTestParameters( + BiasCorrectionAlgos.BIAS_CORRECTION, + BCStatsCollectors.MEAN, + TargetType.OPERATION_WITH_WEIGHTS, + (MEAN_WEIGHTS_AXIS_0_REF, (3, 3, 3, 3)), axis=0, ), - MeanBCTestParameters( - algo=BiasCorrectionAlgos.FAST_BIAS_CORRECTION, - target_type=TargetType.OPERATION_WITH_WEIGHTS, - ref_values=(np.array([-0.36666664, -0.36666664, -0.36666664])), - ref_shape=(3, 3, 3, 3), + BCTestParameters( + BiasCorrectionAlgos.FAST_BIAS_CORRECTION, + BCStatsCollectors.MEAN, + TargetType.OPERATION_WITH_WEIGHTS, + (np.array([-0.36666664, -0.36666664, -0.36666664]), (3, 3, 3, 3)), axis=1, ), - MeanBCTestParameters( - algo=BiasCorrectionAlgos.BIAS_CORRECTION, - target_type=TargetType.OPERATION_WITH_WEIGHTS, - ref_values=(np.array([-0.36666664, -0.36666664, -0.36666664])), - ref_shape=(3, 3, 3, 3), + BCTestParameters( + BiasCorrectionAlgos.BIAS_CORRECTION, + BCStatsCollectors.MEAN, + TargetType.OPERATION_WITH_WEIGHTS, + (np.array([-0.36666664, -0.36666664, -0.36666664]), (3, 3, 3, 3)), axis=1, ), - MeanBCTestParameters( - algo=BiasCorrectionAlgos.FAST_BIAS_CORRECTION, - target_type=TargetType.OPERATION_WITH_WEIGHTS, - ref_values=(np.array([-1.1, 0.0, 0.0])), - ref_shape=(3, 3, 3, 3), + BCTestParameters( + BiasCorrectionAlgos.FAST_BIAS_CORRECTION, + BCStatsCollectors.MEAN, + TargetType.OPERATION_WITH_WEIGHTS, + (np.array([-1.1, 0.0, 0.0]), (3, 3, 3, 3)), axis=2, ), - MeanBCTestParameters( - algo=BiasCorrectionAlgos.BIAS_CORRECTION, - target_type=TargetType.OPERATION_WITH_WEIGHTS, - ref_values=(np.array([-1.1, 0.0, 0.0])), - ref_shape=(3, 3, 3, 3), + BCTestParameters( + BiasCorrectionAlgos.BIAS_CORRECTION, + BCStatsCollectors.MEAN, + TargetType.OPERATION_WITH_WEIGHTS, + (np.array([-1.1, 0.0, 0.0]), (3, 3, 3, 3)), axis=2, ), ], ) def test_statistics_aggregator_bias_correction( - self, dataset_samples, test_params: RawBCTestParameters, inplace_statistics + self, dataset_samples, test_params: BCTestParameters, inplace_statistics ): name_to_algo_backend_map = { BiasCorrectionAlgos.BIAS_CORRECTION: self.get_bias_correction_algo_backend_cls, BiasCorrectionAlgos.FAST_BIAS_CORRECTION: self.get_fast_bias_correction_algo_backend_cls, } algo_backend = name_to_algo_backend_map[test_params.algo]() - if isinstance(test_params, self.MeanBCTestParameters): + if test_params.collector_type == BCStatsCollectors.MEAN: tensor_collector = algo_backend.mean_statistic_collector( test_params.axis, inplace_statistics, len(dataset_samples) ) - elif isinstance(test_params, self.RawBCTestParameters): + elif test_params.collector_type == BCStatsCollectors.RAW: tensor_collector = algo_backend.raw_statistic_collector(len(dataset_samples)) + else: + raise nncf.InvalidCollectorTypeError(f"Invalid collector type: {test_params.collector_type}") target_point = self.get_target_point(test_params.target_type) @@ -619,19 +617,18 @@ def filter_func(point): for tensor_collector in tensor_collectors: stat = tensor_collector.get_statistics() - if isinstance(test_params, self.MeanBCTestParameters): - self._check_params_mean_collector(stat, test_params.ref_shape, test_params.ref_values) - elif isinstance(test_params, self.RawBCTestParameters): - self._check_params_raw_collector(stat, dataset_samples) + if test_params.collector_type == BCStatsCollectors.MEAN: + ret_val = [stat.mean_values, stat.shape] + elif test_params.collector_type == BCStatsCollectors.RAW: + ret_val = stat.values + test_params.ref_values = dataset_samples else: - assert False - - def _check_params_raw_collector(self, stat, ref_values): - assert np.allclose(stat.values, ref_values) + raise nncf.InvalidCollectorTypeError(f"Invalid collector type: {test_params.collector_type}") - def _check_params_mean_collector(self, stat, ref_shape, ref_values): - assert ref_shape == stat.shape - assert np.allclose(stat.mean_values, ref_values) + for val, ref in zip(ret_val, test_params.ref_values): + if isinstance(ref, np.ndarray): + assert ref.shape == val.shape + assert np.allclose(val, ref) @classmethod def create_statistics_point( diff --git a/tests/onnx/quantization/test_reducers_and_aggregators.py b/tests/onnx/quantization/test_reducers_and_aggregators.py index ed9016067c4..3e1c3ac988a 100644 --- a/tests/onnx/quantization/test_reducers_and_aggregators.py +++ b/tests/onnx/quantization/test_reducers_and_aggregators.py @@ -17,6 +17,7 @@ from nncf.common.graph.layer_attributes import Dtype from nncf.onnx.statistics.collectors import ONNXAbsMaxReducer from nncf.onnx.statistics.collectors import ONNXAbsQuantileReducer +from nncf.onnx.statistics.collectors import ONNXBatchMeanReducer from nncf.onnx.statistics.collectors import ONNXMaxReducer from nncf.onnx.statistics.collectors import ONNXMeanPerChanelReducer from nncf.onnx.statistics.collectors import ONNXMeanReducer @@ -48,6 +49,7 @@ def reducers(self): "mean": ONNXMeanReducer, "quantile": ONNXQuantileReducer, "abs_quantile": ONNXAbsQuantileReducer, + "batch_mean": ONNXBatchMeanReducer, "mean_per_ch": ONNXMeanPerChanelReducer, } diff --git a/tests/openvino/native/quantization/test_reducers_and_aggregators.py b/tests/openvino/native/quantization/test_reducers_and_aggregators.py index 44b4d190409..c29218550ac 100644 --- a/tests/openvino/native/quantization/test_reducers_and_aggregators.py +++ b/tests/openvino/native/quantization/test_reducers_and_aggregators.py @@ -17,6 +17,7 @@ from nncf.common.graph.layer_attributes import Dtype from nncf.openvino.statistics.collectors import OVAbsMaxReducer from nncf.openvino.statistics.collectors import OVAbsQuantileReducer +from nncf.openvino.statistics.collectors import OVBatchMeanReducer from nncf.openvino.statistics.collectors import OVMaxReducer from nncf.openvino.statistics.collectors import OVMeanPerChanelReducer from nncf.openvino.statistics.collectors import OVMeanReducer @@ -48,6 +49,7 @@ def reducers(self): "mean": OVMeanReducer, "quantile": OVQuantileReducer, "abs_quantile": OVAbsQuantileReducer, + "batch_mean": OVBatchMeanReducer, "mean_per_ch": OVMeanPerChanelReducer, } diff --git a/tests/openvino/native/test_model_transformer.py b/tests/openvino/native/test_model_transformer.py index c0a24819262..84980635e8a 100644 --- a/tests/openvino/native/test_model_transformer.py +++ b/tests/openvino/native/test_model_transformer.py @@ -23,6 +23,7 @@ from nncf.common.graph.transformations.layout import TransformationLayout from nncf.experimental.tensor import Tensor from nncf.openvino.graph.model_transformer import OVModelTransformer +from nncf.openvino.graph.node_utils import get_inplace_batch_mean_op from nncf.openvino.graph.node_utils import get_inplace_max_op from nncf.openvino.graph.node_utils import get_inplace_mean_op from nncf.openvino.graph.node_utils import get_inplace_mean_per_ch @@ -147,13 +148,14 @@ def __str__(self) -> str: "abs_max", None, lambda r: get_inplace_max_op(r, True), ["Abs", "ReduceMax"], [None, (0, 1, 2, 3)] ), # Batch mean and mean per ch operations - InplaceOpTestCase("mean_per_ch", 1, get_inplace_mean_per_ch, ["Reshape", "ReduceMean"], [(1, 3, 16), (2)]), + InplaceOpTestCase("batch_mean", None, lambda r: get_inplace_batch_mean_op(), ["ReduceMean"], [0]), + InplaceOpTestCase("mean_per_ch", 1, get_inplace_mean_per_ch, ["Reshape", "ReduceMean"], [(1, 3, 16), (0, 2)]), InplaceOpTestCase( "mean_per_ch", 2, get_inplace_mean_per_ch, ["Transpose", "Reshape", "ReduceMean"], - [(0, 2, 1, 3), (1, 4, 12), (2)], + [(0, 2, 1, 3), (1, 4, 12), (0, 2)], ), InplaceOpTestCase( "mean_per_ch", diff --git a/tests/openvino/native/test_statistics_aggregator.py b/tests/openvino/native/test_statistics_aggregator.py index 7f3612cfbc8..ff573310080 100644 --- a/tests/openvino/native/test_statistics_aggregator.py +++ b/tests/openvino/native/test_statistics_aggregator.py @@ -23,6 +23,7 @@ from nncf.openvino.graph.transformations.commands import OVTargetPoint from nncf.openvino.statistics.aggregator import OVStatisticsAggregator from nncf.openvino.statistics.collectors import OV_REDUCERS_MAP +from nncf.openvino.statistics.collectors import OVBatchMeanReducer from nncf.openvino.statistics.collectors import OVMeanPerChanelReducer from nncf.quantization.algorithms.bias_correction.openvino_backend import OVBiasCorrectionAlgoBackend from nncf.quantization.algorithms.fast_bias_correction.openvino_backend import OVFastBiasCorrectionAlgoBackend @@ -121,5 +122,5 @@ def _get_shared_conv_model(self, dataset_samples): def reducers_map(self) -> List[TensorReducerBase]: map_ = OV_REDUCERS_MAP.copy() - map_.update({"mean_per_ch": OVMeanPerChanelReducer}) + map_.update({"batch_mean": OVBatchMeanReducer, "mean_per_ch": OVMeanPerChanelReducer}) return map_ diff --git a/tests/torch/ptq/test_reducers_and_aggregators.py b/tests/torch/ptq/test_reducers_and_aggregators.py index a195b2a2398..1af7b4e4683 100644 --- a/tests/torch/ptq/test_reducers_and_aggregators.py +++ b/tests/torch/ptq/test_reducers_and_aggregators.py @@ -23,6 +23,7 @@ from nncf.torch.tensor_statistics.algo import create_register_input_hook from nncf.torch.tensor_statistics.collectors import PTAbsMaxReducer from nncf.torch.tensor_statistics.collectors import PTAbsQuantileReducer +from nncf.torch.tensor_statistics.collectors import PTBatchMeanReducer from nncf.torch.tensor_statistics.collectors import PTMaxReducer from nncf.torch.tensor_statistics.collectors import PTMeanPerChanelReducer from nncf.torch.tensor_statistics.collectors import PTMeanReducer @@ -54,6 +55,7 @@ def reducers(self): "mean": PTMeanReducer, "quantile": PTQuantileReducer, "abs_quantile": PTAbsQuantileReducer, + "batch_mean": PTBatchMeanReducer, "mean_per_ch": PTMeanPerChanelReducer, } From 532ca55374de1ff6f1a1830b248f0918d885ef80 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 20 Feb 2024 15:50:46 +0100 Subject: [PATCH 072/108] fix merge --- nncf/torch/graph/operator_metatypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nncf/torch/graph/operator_metatypes.py b/nncf/torch/graph/operator_metatypes.py index 342c82b256c..beddbe5f3d7 100644 --- a/nncf/torch/graph/operator_metatypes.py +++ b/nncf/torch/graph/operator_metatypes.py @@ -1050,7 +1050,7 @@ def get_operator_metatypes() -> List[Type[OperatorMetatype]]: PTModuleBatchNormMetatype, ] -OP_NAMES_QUANTIZE_NODE = ["symmetric_quantize", "asymmetric_quantize"] +QUANTIZE_NODE_TYPES = ["symmetric_quantize", "asymmetric_quantize"] # These metatypes mix outputs for different samples into one axis. # If reducers and aggregators collect statistics at the output of the following operations, From 54f8ca3d9439bc59c0d1a87907be3ee8d2502f32 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 20 Feb 2024 17:05:14 +0100 Subject: [PATCH 073/108] fix revert typo --- nncf/quantization/algorithms/bias_correction/algorithm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nncf/quantization/algorithms/bias_correction/algorithm.py b/nncf/quantization/algorithms/bias_correction/algorithm.py index 4dc605c85e6..a45e807ec35 100644 --- a/nncf/quantization/algorithms/bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/bias_correction/algorithm.py @@ -506,7 +506,7 @@ def output_filter_func(point): output_fp.extend(tensor_collector.get_statistics().mean_values) return np.array(output_fp) - def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: self._set_backend_entity(model) statistic_container = StatisticPointsContainer() From 7637d4b2c474f5a20afe4a8de4f0489d5f1346dc Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 21 Feb 2024 10:26:04 +0100 Subject: [PATCH 074/108] fix export of torch model --- .../pipelines/image_classification_timm.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/tests/post_training/pipelines/image_classification_timm.py b/tests/post_training/pipelines/image_classification_timm.py index 1749cbfe46a..5438b4656b5 100644 --- a/tests/post_training/pipelines/image_classification_timm.py +++ b/tests/post_training/pipelines/image_classification_timm.py @@ -45,33 +45,20 @@ def prepare_model(self) -> None: timm_model.eval() timm_model = replace_timm_custom_modules_with_torch_native(timm_model) self.model_cfg = timm_model.default_cfg - self.input_size = [1] + list(timm_model.default_cfg["input_size"]) - self.dynamic_input_size = [-1] + list(timm_model.default_cfg["input_size"]) + self.input_size = [self.batch_size] + list(timm_model.default_cfg["input_size"]) self.dummy_tensor = torch.rand(self.input_size) if self.backend in PT_BACKENDS: self.model = timm_model if self.backend == BackendType.ONNX: - onnx_path = self.output_model_dir / "model_fp32.onnx" - torch.onnx.export( - timm_model, - self.dummy_tensor, - onnx_path, - export_params=True, - opset_version=13, - input_names=["image"], - dynamic_axes={ - "image": {0: "batch"}, - }, - ) onnx_path = self.fp32_model_dir / "model_fp32.onnx" torch.onnx.export(timm_model, self.dummy_tensor, onnx_path, export_params=True, opset_version=13) self.model = onnx.load(onnx_path) self.input_name = self.model.graph.input[0].name if self.backend in OV_BACKENDS + [BackendType.FP32]: - self.model = ov.convert_model(timm_model, example_input=self.dummy_tensor, input=self.dynamic_input_size) + self.model = ov.convert_model(timm_model, example_input=self.dummy_tensor, input=self.input_size) self.input_name = list(inp.get_any_name() for inp in self.model.inputs)[0] self._dump_model_fp32() From 976255fe965334c874cbb671cf5e728b160160db Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 23 Feb 2024 16:36:35 +0100 Subject: [PATCH 075/108] comments --- .../quantization/initialization/range.py | 3 ++- nncf/common/tensor_statistics/aggregator.py | 2 +- .../onnx/quantization/quantizer_parameters.py | 9 +++++-- nncf/quantization/advanced_parameters.py | 4 +++ nncf/quantization/algorithms/algorithm.py | 3 +-- .../algorithms/bias_correction/algorithm.py | 2 +- .../algorithms/channel_alignment/algorithm.py | 2 +- .../fast_bias_correction/algorithm.py | 2 +- .../hyperparameter_tuner/algorithm.py | 4 +-- .../algorithms/min_max/algorithm.py | 26 +++++++++++-------- .../algorithms/min_max/backend.py | 3 +-- .../algorithms/min_max/onnx_backend.py | 11 +++----- .../algorithms/min_max/openvino_backend.py | 4 +-- .../algorithms/min_max/torch_backend.py | 4 +-- nncf/quantization/algorithms/pipeline.py | 7 +++-- .../algorithms/post_training/algorithm.py | 4 +-- .../algorithms/post_training/pipeline.py | 1 + .../algorithms/smooth_quant/algorithm.py | 2 +- .../weight_compression/algorithm.py | 2 +- nncf/quantization/quantize_model.py | 16 ++++++++++++ tests/common/test_statistics_aggregator.py | 2 +- .../test_fq_params_calculation.py | 2 +- .../native/quantization/test_graphs.py | 2 +- tests/openvino/tools/calibrate.py | 3 +-- .../pipelines/image_classification_timm.py | 15 ++++++----- .../test_quantize_conformance.py | 1 + tests/post_training/test_templates/helpers.py | 2 +- .../test_templates/test_bias_correction.py | 2 +- .../test_templates/test_channel_alignment.py | 2 +- .../test_templates/test_ptq_params.py | 4 +-- .../test_templates/test_quantizer_config.py | 4 +-- .../test_templates/test_smooth_quant.py | 2 +- .../ptq/test_calculation_quantizer_params.py | 2 +- 33 files changed, 87 insertions(+), 67 deletions(-) diff --git a/nncf/common/quantization/initialization/range.py b/nncf/common/quantization/initialization/range.py index 7f232bad99e..35bd178f36c 100644 --- a/nncf/common/quantization/initialization/range.py +++ b/nncf/common/quantization/initialization/range.py @@ -215,7 +215,8 @@ def _get_reduction_axes( aggregation_axes: Union[Tuple[int], List[int]], ): """ - Returns axes for a reducer. + Returns axes for a reducer regarding aggregation axes. As aggregator takes axes counting from stacked tensors, + from these axes only tensor related axes should be used for reducer. :param shape_to_reduce: Shape of a reduced tensor. :param quantization_axes: Axes of quantization. diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index d312fcd7ae1..e6ebeef7aad 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -77,7 +77,7 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: if self.batch_size > 1 and self.is_model_has_no_batch_axis(graph): nncf_logger.warning( "For the particular model the batch size > 1 can lead to inaccurate collected statistics . \ - The recomendation is to use batch_size = 1." + The recomendation is to provide dataloader instance with the batch_size = 1." ) model_transformer = factory.ModelTransformerFactory.create(model) merged_statistics = self._get_merged_statistic_points(self.statistic_points, model, graph) diff --git a/nncf/onnx/quantization/quantizer_parameters.py b/nncf/onnx/quantization/quantizer_parameters.py index 1a9570d758d..8d224318d27 100644 --- a/nncf/onnx/quantization/quantizer_parameters.py +++ b/nncf/onnx/quantization/quantizer_parameters.py @@ -37,7 +37,7 @@ class ONNXQuantizerLayerParameters: def convert_fq_params_to_onnx_params( - parameters: FakeQuantizeParameters, num_bits: int, tensor_type: np.dtype, axis: Optional[int] = None + parameters: FakeQuantizeParameters, num_bits: int, tensor_type: np.dtype, axis: Tuple[int] ) -> ONNXQuantizerLayerParameters: """ Converts common FakeQuantizeParameters to ONNXQuantizerLayerParameters. @@ -45,7 +45,7 @@ def convert_fq_params_to_onnx_params( :param parameters: FakeQuantizeParameters representation. :param num_bits: Number of quantizer bits. :param tensor_type: Value type of the tensor. Could be INT8 or UINT8. - :param axis: Axis for per-channel quantization. Should be none in case of per-tensor. + :param axis: Axis for per-channel quantization. :return: Quantizer layer attributes. """ if num_bits != 8: @@ -68,6 +68,11 @@ def convert_fq_params_to_onnx_params( # ONNX demands parameters to be a scalar or 1-D Tensor. scale = np.squeeze(scale) zero_point = np.squeeze(zero_point) + # ONNX axis parameter format specification. + if not axis: + axis = None + else: + axis = axis[0] return ONNXQuantizerLayerParameters(scale.data, zero_point.data, tensor_type, axis) diff --git a/nncf/quantization/advanced_parameters.py b/nncf/quantization/advanced_parameters.py index b28edce49e0..751b2c76006 100644 --- a/nncf/quantization/advanced_parameters.py +++ b/nncf/quantization/advanced_parameters.py @@ -190,6 +190,8 @@ class AdvancedQuantizationParameters: :type disable_channel_alignment: bool :param disable_bias_correction: Whether to disable the bias correction. :type disable_bias_correction: bool + :param statistics_per_sample: Whether calculate statistics regarding batch axis. + :type statistics_per_sample: bool :param activations_quantization_params: Quantization parameters for activations. :type activations_quantization_params: nncf.quantization.advanced_parameters.QuantizationParameters :param weights_quantization_params: Quantization parameters for weights. @@ -207,6 +209,7 @@ class AdvancedQuantizationParameters: :type smooth_quant_alpha: AdvancedSmoothQuantParameters :param smooth_quant_alpha: Deprecated SmoothQuant-related parameter. :type smooth_quant_alpha: float + :param backend_params: Backend-specific parameters. :type backend_params: Dict[str, Any] """ @@ -217,6 +220,7 @@ class AdvancedQuantizationParameters: inplace_statistics: bool = True disable_channel_alignment: bool = True disable_bias_correction: bool = False + statistics_per_sample: bool = None # Advanced Quantization parameters activations_quantization_params: Union[QuantizationParameters, FP8QuantizationParameters] = None diff --git a/nncf/quantization/algorithms/algorithm.py b/nncf/quantization/algorithms/algorithm.py index 1efc6c21133..befe0a82f9d 100644 --- a/nncf/quantization/algorithms/algorithm.py +++ b/nncf/quantization/algorithms/algorithm.py @@ -54,12 +54,11 @@ def apply( """ @abstractmethod - def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: """ Returns statistic points, for which StatisticsCollector should collect statistics. :param model: Model for statistics collection. :param graph: Model graph. - :param dataset: A representative dataset for the calibration process. :return: Statistic points, for which StatisticsCollector should collect statistics. """ diff --git a/nncf/quantization/algorithms/bias_correction/algorithm.py b/nncf/quantization/algorithms/bias_correction/algorithm.py index a45e807ec35..4dc605c85e6 100644 --- a/nncf/quantization/algorithms/bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/bias_correction/algorithm.py @@ -506,7 +506,7 @@ def output_filter_func(point): output_fp.extend(tensor_collector.get_statistics().mean_values) return np.array(output_fp) - def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: self._set_backend_entity(model) statistic_container = StatisticPointsContainer() diff --git a/nncf/quantization/algorithms/channel_alignment/algorithm.py b/nncf/quantization/algorithms/channel_alignment/algorithm.py index e4b315f7925..542cb8801f3 100644 --- a/nncf/quantization/algorithms/channel_alignment/algorithm.py +++ b/nncf/quantization/algorithms/channel_alignment/algorithm.py @@ -381,7 +381,7 @@ def _get_target_point_and_node_in(self, conv_in, add_in) -> Tuple[TargetPoint, N node_in, ) - def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: self._set_backend_entity(model) statistic_container = StatisticPointsContainer() diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py index 0ba1e2b1524..15f9a2f05b1 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py @@ -322,7 +322,7 @@ def _get_bias_shift( bias_shift = fns.stack(output_fp) - q_outputs return bias_shift - def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: self._set_backend_entity(model) nodes_with_bias = [ node for node in graph.get_all_nodes() if self._backend_entity.is_node_with_bias(node, graph) diff --git a/nncf/quantization/algorithms/hyperparameter_tuner/algorithm.py b/nncf/quantization/algorithms/hyperparameter_tuner/algorithm.py index 8c043a4c34c..4f12fb76f51 100644 --- a/nncf/quantization/algorithms/hyperparameter_tuner/algorithm.py +++ b/nncf/quantization/algorithms/hyperparameter_tuner/algorithm.py @@ -292,9 +292,7 @@ def apply(self, model: TModel, validation_dataset: Dataset) -> TModel: # TODO(andrey-churkin): Think about how it can be avoided. params = apply_combination(self._init_params, best_settings) pipeline = self._pipeline_fn(**params) - container = pipeline.get_statistic_points_for_step( - step_index, step_model, step_graph, self._calibration_dataset - ) + container = pipeline.get_statistic_points_for_step(step_index, step_model, step_graph) step_statistics = collect_statistics(container, step_model, step_graph, self._calibration_dataset) step_model = pipeline.run_step(step_index, step_statistics, step_model, step_graph) continue diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index 7f0546b9494..31d42d56eb2 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -141,6 +141,7 @@ def __init__( overflow_fix: Optional[OverflowFix] = None, quantize_outputs: bool = False, inplace_statistics: bool = True, + statistics_per_sample: bool = False, activations_quantization_params: Union[QuantizationParameters, FP8QuantizationParameters] = None, weights_quantization_params: Union[QuantizationParameters, FP8QuantizationParameters] = None, activations_range_estimator_params: Optional[RangeEstimatorParameters] = None, @@ -171,6 +172,7 @@ def __init__( :param inplace_statistics: Defines wheather to calculate quantizers statistics by backend graph operations or by default Python implementation, defaults to True. + :param statistics_per_sample: Whether calculate statistics regarding batch axis. :param activations_quantization_params: Quantization parameters for model activations. :param weights_quantization_params: Quantization parameters for model weights. @@ -187,6 +189,7 @@ def __init__( self._overflow_fix = overflow_fix self._quantize_outputs = quantize_outputs self._inplace_statistics = inplace_statistics + self._statistics_per_sample = statistics_per_sample self._backend_params = backend_params self._activations_quantization_params = activations_quantization_params self._weights_quantization_params = weights_quantization_params @@ -398,7 +401,7 @@ def _get_stat_collector( graph: NNCFGraph, target_point: TargetPoint, qconfig: QuantizerConfig, - is_many_samples: bool, + is_per_sample: bool, ) -> TensorStatisticCollectorBase: """ Creates and returns a statistic collector based on the quantizer's configuration. @@ -407,22 +410,22 @@ def _get_stat_collector( :param target_point: Target point indicates where statistics should be collected. :param qconfig: Configuration of a quantizer layer, defining the configuration of created statistic collector. - :param is_many_samples: True meaning that one data tensor consists of some samples. - False - data tnesor has onle one sample. + :param is_per_sample: True meaning that statistics is collected per sample. False - per tensor. :return: Statistic Collector. """ is_weight = target_point.is_weight_target_point() node = graph.get_node_by_name(target_point.target_node_name) - shape = self._backend_entity.get_target_point_shape(graph, node, target_point) - channel_axes = self._backend_entity.get_channel_axes(node, target_point, qconfig.per_channel) - range_estimator_params = self._get_range_estimator_parameters(target_point, qconfig) + channel_axes = () + if qconfig.per_channel: + channel_axes = self._backend_entity.get_channel_axes(node, target_point) + # Weight statistics is constant, so only one collection is enough. num_samples = self._subset_size if not is_weight else 1 - is_per_sample = is_many_samples and not is_weight + is_per_sample = is_per_sample and not is_weight collector_params = RangeInitCollectorParams( is_weights=is_weight, scheme=qconfig.mode, per_channel=qconfig.per_channel @@ -920,19 +923,20 @@ def filter_func(point: StatisticPoint) -> bool: quantized_model = model_transformer.transform(transformation_layout) return quantized_model - def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: self._set_backend_entity(model) self._reset_cache() quantization_target_points, _ = self._get_quantization_target_points(model, graph) output = StatisticPointsContainer() - is_many_samples = dataset.get_batch_size() is not None and dataset.get_batch_size() > 1 - if self._model_type == ModelType.TRANSFORMER and is_many_samples: + if self._model_type == ModelType.TRANSFORMER and self._statistics_per_sample: nncf_logger.warning( "For transfomer-like models batch_size > 1 could result in inaccurate statistics. \ The recomendation is to use batch_size = 1." ) for quantization_target_point, qconfig in quantization_target_points.items(): - stat_collector = self._get_stat_collector(graph, quantization_target_point, qconfig, is_many_samples) + stat_collector = self._get_stat_collector( + graph, quantization_target_point, qconfig, self._statistics_per_sample + ) output.add_statistic_point( StatisticPoint( target_point=quantization_target_point, diff --git a/nncf/quantization/algorithms/min_max/backend.py b/nncf/quantization/algorithms/min_max/backend.py index 64f022d4aba..0ee27f12218 100644 --- a/nncf/quantization/algorithms/min_max/backend.py +++ b/nncf/quantization/algorithms/min_max/backend.py @@ -196,13 +196,12 @@ def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: @staticmethod @abstractmethod - def get_channel_axes(node: NNCFNode, target_point: TargetPoint, is_per_channel: bool) -> Tuple[int]: + def get_channel_axes(node: NNCFNode, target_point: TargetPoint) -> Tuple[int]: """ Returns axes for per-channel quantization. :param node: Quantized node. :param target_point: Corresponding target point. - :param is_per_channel: Is per-channel quantization or not. :return: Axes for per-channel quantization. """ diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py index 2a91c0242c8..1b48c4f40d2 100644 --- a/nncf/quantization/algorithms/min_max/onnx_backend.py +++ b/nncf/quantization/algorithms/min_max/onnx_backend.py @@ -125,11 +125,10 @@ def create_quantizer_insertion_command( tensor_type = np.int8 # The weight is restricted to have only signed range nncf_input_node_next_nodes = ONNXMinMaxAlgoBackend._get_input_edges_mapping(nncf_graph) node = nncf_graph.get_node_by_name(target_point.target_node_name) - axis = ONNXMinMaxAlgoBackend.get_channel_axes(node, target_point, quantizer_config.per_channel) - if not axis: - axis = None + if quantizer_config.per_channel: + axis = ONNXMinMaxAlgoBackend.get_channel_axes(node, target_point) else: - axis = axis[0] + axis = () onnx_parameters = convert_fq_params_to_onnx_params(parameters, quantizer_config.num_bits, tensor_type, axis) return ONNXQuantizerInsertionCommand(target_point, nncf_input_node_next_nodes, onnx_parameters) @@ -161,9 +160,7 @@ def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: return get_quantized_tensor_shape(nncf_graph, node, target_point) @staticmethod - def get_channel_axes(node: NNCFNode, target_point: ONNXTargetPoint, is_per_channel: bool) -> Tuple[int]: - if not is_per_channel: - return () + def get_channel_axes(node: NNCFNode, target_point: ONNXTargetPoint) -> Tuple[int]: if target_point.is_weight_target_point(): return (get_weight_quantization_axis(node, target_point.port_id),) return (1,) diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py index 947659127ca..293f5f0c90f 100644 --- a/nncf/quantization/algorithms/min_max/openvino_backend.py +++ b/nncf/quantization/algorithms/min_max/openvino_backend.py @@ -148,9 +148,7 @@ def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: raise NotImplementedError(f"Unsupported target point type {target_point.type}.") @staticmethod - def get_channel_axes(node: NNCFNode, target_point: OVTargetPoint, is_per_channel: bool) -> Tuple[int]: - if not is_per_channel: - return () + def get_channel_axes(node: NNCFNode, target_point: OVTargetPoint) -> Tuple[int]: if target_point.is_weight_target_point(): return get_weight_channel_axes(node) return (1,) diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py index b1c9ffc9199..dfc351976cb 100644 --- a/nncf/quantization/algorithms/min_max/torch_backend.py +++ b/nncf/quantization/algorithms/min_max/torch_backend.py @@ -163,9 +163,7 @@ def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: return nncf_graph.get_input_shape_for_insertion_point(target_point) @staticmethod - def get_channel_axes(node: NNCFNode, target_point: PTTargetPoint, is_per_channel: bool) -> Tuple[int]: - if not is_per_channel: - return () + def get_channel_axes(node: NNCFNode, target_point: PTTargetPoint) -> Tuple[int]: if target_point.is_weight_target_point(): return (node.layer_attributes.get_target_dim_for_compression(),) return (1,) diff --git a/nncf/quantization/algorithms/pipeline.py b/nncf/quantization/algorithms/pipeline.py index 2f6d99312e6..cd615258553 100644 --- a/nncf/quantization/algorithms/pipeline.py +++ b/nncf/quantization/algorithms/pipeline.py @@ -157,7 +157,7 @@ def run_from_step( # Collect statistics required to run current pipeline step step_statistics = step_index_to_statistics.get(step_index) if step_statistics is None: - statistic_points = self.get_statistic_points_for_step(step_index, step_model, step_graph, dataset) + statistic_points = self.get_statistic_points_for_step(step_index, step_model, step_graph) step_statistics = collect_statistics(statistic_points, step_model, step_graph, dataset) # Run current pipeline step @@ -168,7 +168,7 @@ def run_from_step( return step_model def get_statistic_points_for_step( - self, step_index: int, model: TModel, graph: NNCFGraph, dataset: Dataset + self, step_index: int, model: TModel, graph: NNCFGraph ) -> StatisticPointsContainer: """ Returns statistics that should be collected to execute `step_index`-th pipeline step. @@ -176,14 +176,13 @@ def get_statistic_points_for_step( :param step_index: Zero-based index of the pipeline step. :param model: A model. :param graph: A graph assosiated with a model. - :param dataset: A dataset that holds the data items for pipeline steps. :return: Statistics that should be collected to execute `step_index`-th pipeline step. """ container = StatisticPointsContainer() pipeline_steps = self._remove_unsupported_algorithms(get_backend(model)) pipeline_step = pipeline_steps[step_index] for algorithm in pipeline_step: - for statistic_points in algorithm.get_statistic_points(model, graph, dataset).values(): + for statistic_points in algorithm.get_statistic_points(model, graph).values(): for statistic_point in statistic_points: container.add_statistic_point(statistic_point) diff --git a/nncf/quantization/algorithms/post_training/algorithm.py b/nncf/quantization/algorithms/post_training/algorithm.py index 9cdda93b34a..304848a99fa 100644 --- a/nncf/quantization/algorithms/post_training/algorithm.py +++ b/nncf/quantization/algorithms/post_training/algorithm.py @@ -89,8 +89,8 @@ def available_backends(self) -> List[BackendType]: backends = backends.intersection(algorithm.available_backends) return list(backends) - def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: - return self._pipeline.get_statistic_points_for_step(0, model, graph, dataset) + def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: + return self._pipeline.get_statistic_points_for_step(0, model, graph) def apply( self, diff --git a/nncf/quantization/algorithms/post_training/pipeline.py b/nncf/quantization/algorithms/post_training/pipeline.py index c394655491d..f02b2fa209d 100644 --- a/nncf/quantization/algorithms/post_training/pipeline.py +++ b/nncf/quantization/algorithms/post_training/pipeline.py @@ -118,6 +118,7 @@ def create_ptq_pipeline( overflow_fix=advanced_parameters.overflow_fix, quantize_outputs=advanced_parameters.quantize_outputs, inplace_statistics=advanced_parameters.inplace_statistics, + statistics_per_sample=advanced_parameters.statistics_per_sample, activations_quantization_params=advanced_parameters.activations_quantization_params, weights_quantization_params=advanced_parameters.weights_quantization_params, activations_range_estimator_params=advanced_parameters.activations_range_estimator_params, diff --git a/nncf/quantization/algorithms/smooth_quant/algorithm.py b/nncf/quantization/algorithms/smooth_quant/algorithm.py index da3efac86ed..d31dd09c451 100644 --- a/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -241,7 +241,7 @@ def _get_statistics_for_node( statistics_for_node.append(Tensor(statistic)) return statistics_for_node - def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: statistic_container = StatisticPointsContainer() self._set_backend_entity(model) diff --git a/nncf/quantization/algorithms/weight_compression/algorithm.py b/nncf/quantization/algorithms/weight_compression/algorithm.py index af5b6673938..6d354303aa1 100644 --- a/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -364,7 +364,7 @@ def do_compression( ) return transformed_model - def get_statistic_points(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: pass def _get_activation_node_and_port(self, node: NNCFNode, nncf_graph: NNCFGraph) -> Tuple[NNCFNode, int]: diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index 6982b347600..d302d4868af 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -36,6 +36,16 @@ TTensor = TypeVar("TTensor") +def _update_advanced_quantization_parameters(advanced_parameters, calibration_dataset): + batch_size = calibration_dataset.get_batch_size() + if batch_size is not None and batch_size > 1: + if advanced_parameters is None: + advanced_parameters = AdvancedQuantizationParameters(statistics_per_sample=True) + elif advanced_parameters.statistics_per_sample is None: + advanced_parameters.statistics_per_sample = True + return advanced_parameters + + @api(canonical_alias="nncf.quantize") def quantize( model: TModel, @@ -90,6 +100,8 @@ def quantize( if subset_size < 1: raise ValueError("Subset size must be positive.") + advanced_parameters = _update_advanced_quantization_parameters(advanced_parameters, calibration_dataset) + backend = get_backend(model) if backend == BackendType.OPENVINO: from nncf.openvino.quantization.quantize_model import quantize_impl @@ -223,6 +235,10 @@ def quantize_with_accuracy_control( :return: The quantized model. :rtype: TModel """ + advanced_quantization_parameters = _update_advanced_quantization_parameters( + advanced_quantization_parameters, calibration_dataset + ) + backend = get_backend(model) if backend == BackendType.OPENVINO: from nncf.openvino.quantization.quantize_model import quantize_with_accuracy_control_impl diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py index a7b88a6dfc5..ad733371fe4 100644 --- a/tests/common/test_statistics_aggregator.py +++ b/tests/common/test_statistics_aggregator.py @@ -645,7 +645,7 @@ def create_statistics_point( algo._set_backend_entity(model) nncf_graph = NNCFGraphFactory.create(model) algo._subset_size = subset_size - tensor_collector = algo._get_stat_collector(nncf_graph, target_point, q_config, is_many_samples=False) + tensor_collector = algo._get_stat_collector(nncf_graph, target_point, q_config, is_per_sample=False) return StatisticPoint(target_point=target_point, tensor_collector=tensor_collector, algorithm=algorithm_name) @pytest.mark.parametrize( diff --git a/tests/openvino/native/quantization/test_fq_params_calculation.py b/tests/openvino/native/quantization/test_fq_params_calculation.py index 936d012af96..965dd23c406 100644 --- a/tests/openvino/native/quantization/test_fq_params_calculation.py +++ b/tests/openvino/native/quantization/test_fq_params_calculation.py @@ -60,7 +60,7 @@ def quantize_model(ov_model, q_params): min_max_algo = MinMaxQuantization(subset_size=1, **q_params) statistics_aggregator = OVStatisticsAggregator(dataset) - statistic_points = min_max_algo.get_statistic_points(ov_model, graph, dataset) + statistic_points = min_max_algo.get_statistic_points(ov_model, graph) statistics_aggregator.register_statistic_points(statistic_points) statistics_aggregator.collect_statistics(ov_model, graph) quantized_model = min_max_algo.apply(ov_model, graph, statistics_aggregator.statistic_points) diff --git a/tests/openvino/native/quantization/test_graphs.py b/tests/openvino/native/quantization/test_graphs.py index 610a25e446b..a701cb8f65e 100644 --- a/tests/openvino/native/quantization/test_graphs.py +++ b/tests/openvino/native/quantization/test_graphs.py @@ -141,7 +141,7 @@ def smooth_quant_model(ov_model: ov.Model, q_params: Dict, quantize=True): smooth_quant_algo = SmoothQuant(subset_size=1) statistics_aggregator = OVStatisticsAggregator(dataset) - statistic_points = smooth_quant_algo.get_statistic_points(ov_model, graph, dataset) + statistic_points = smooth_quant_algo.get_statistic_points(ov_model, graph) statistics_aggregator.register_statistic_points(statistic_points) statistics_aggregator.collect_statistics(ov_model, graph) modified_model = smooth_quant_algo.apply(ov_model, graph, statistics_aggregator.statistic_points) diff --git a/tests/openvino/tools/calibrate.py b/tests/openvino/tools/calibrate.py index 86745652306..dedd0b998b0 100644 --- a/tests/openvino/tools/calibrate.py +++ b/tests/openvino/tools/calibrate.py @@ -1052,8 +1052,7 @@ def main(): xml_path, bin_path = get_model_paths(config.model) accuracy_checker_config = get_accuracy_checker_config(config.engine) nncf_algorithms_config = get_nncf_algorithms_config(config.compression, args.output_dir) - if args.batch_size > 1: - update_config(accuracy_checker_config, args.batch_size) + update_config(accuracy_checker_config, args.batch_size) set_log_file(f"{args.output_dir}/log.txt") output_dir = os.path.join(args.output_dir, "optimized") diff --git a/tests/post_training/pipelines/image_classification_timm.py b/tests/post_training/pipelines/image_classification_timm.py index 5438b4656b5..cf776a23003 100644 --- a/tests/post_training/pipelines/image_classification_timm.py +++ b/tests/post_training/pipelines/image_classification_timm.py @@ -118,12 +118,13 @@ def prepare_calibration_dataset(self): def _validate(self): val_dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform) - val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=2, shuffle=False) + val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=self.batch_size, num_workers=2, shuffle=False) dataset_size = len(val_loader) - predictions = [0] * dataset_size - references = [-1] * dataset_size + # Initialize result tensors for async inference support. + predictions = np.zeros((dataset_size * self.batch_size)) + references = -1 * np.ones((dataset_size * self.batch_size)) core = ov.Core() @@ -143,7 +144,8 @@ def _validate(self): def process_result(request, userdata): output_data = request.get_output_tensor().data predicted_label = np.argmax(output_data, axis=1) - predictions[userdata] = [predicted_label] + for j in range(self.batch_size): + predictions[userdata * self.batch_size + j] = predicted_label[j] pbar.progress.update(pbar.task, advance=1) infer_queue.set_callback(process_result) @@ -152,12 +154,11 @@ def process_result(request, userdata): # W/A for memory leaks when using torch DataLoader and OpenVINO image_copies = copy.deepcopy(images.numpy()) infer_queue.start_async(image_copies, userdata=i) - references[i] = target + for j in range(self.batch_size): + references[i * self.batch_size + j] = target[j] infer_queue.wait_all() - predictions = np.concatenate(predictions, axis=0) - references = np.concatenate(references, axis=0) acc_top1 = accuracy_score(predictions, references) self.run_info.metric_name = "Acc@1" diff --git a/tests/post_training/test_quantize_conformance.py b/tests/post_training/test_quantize_conformance.py index 25ccda73a62..35edff0bd12 100644 --- a/tests/post_training/test_quantize_conformance.py +++ b/tests/post_training/test_quantize_conformance.py @@ -226,6 +226,7 @@ def test_ptq_quantization( "data_dir": data_dir, "no_eval": no_eval, "run_benchmark_app": run_benchmark_app, + "batch_size": batch_size, } ) pipeline: BaseTestPipeline = pipeline_cls(**pipeline_kwargs) diff --git a/tests/post_training/test_templates/helpers.py b/tests/post_training/test_templates/helpers.py index bb8de0d0c92..86765552473 100644 --- a/tests/post_training/test_templates/helpers.py +++ b/tests/post_training/test_templates/helpers.py @@ -58,7 +58,7 @@ def get_static_dataset(input_size: Tuple, transform_fn: Callable, fn_to_type: Ca :param input_size: Size of generated tensors, :param transform_fn: Function to transformation dataset. :param fn_to_type: Function, defaults to None. - :param lenght: The lenght of dataset. + :param length: The length of the dataset. :return: Instance of nncf.Dataset for StaticDatasetMock. """ return Dataset( diff --git a/tests/post_training/test_templates/test_bias_correction.py b/tests/post_training/test_templates/test_bias_correction.py index 915d7e8938b..e27a8ad0868 100644 --- a/tests/post_training/test_templates/test_bias_correction.py +++ b/tests/post_training/test_templates/test_bias_correction.py @@ -184,7 +184,7 @@ def test_verify_collected_stat_inputs_map(self, model_cls, ref_stat_inputs_map, graph = NNCFGraphFactory.create(model) bc_algo = self.get_bias_correction_algorithm() - bc_algo.get_statistic_points(model, graph, None) + bc_algo.get_statistic_points(model, graph) collected_stat_inputs_map = getattr(bc_algo, "_collected_stat_inputs_map") assert collected_stat_inputs_map == ref_stat_inputs_map diff --git a/tests/post_training/test_templates/test_channel_alignment.py b/tests/post_training/test_templates/test_channel_alignment.py index 4316d6c087c..49cb8ffe0c3 100644 --- a/tests/post_training/test_templates/test_channel_alignment.py +++ b/tests/post_training/test_templates/test_channel_alignment.py @@ -511,7 +511,7 @@ class MockBackend(backend_cls): MockBackend.get_statistic_collector = mocker.MagicMock(return_value=ref_stat_collector) algorithm._backend_entity = MockBackend - statistic_container = algorithm.get_statistic_points(None, nncf_graph, None) + statistic_container = algorithm.get_statistic_points(None, nncf_graph) backend_cls = self.get_backend_cls() target_node_name = "/Add_1_0" if num_biases else "/Conv_1_0" diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py index 8b174767e5d..3e276a889bd 100644 --- a/tests/post_training/test_templates/test_ptq_params.py +++ b/tests/post_training/test_templates/test_ptq_params.py @@ -47,7 +47,7 @@ from tests.common.quantization.mock_graphs import get_nncf_graph_from_mock_nx_graph -class MockdedDataset: +class MockedDataset: def get_batch_size(self): return 1 @@ -146,7 +146,7 @@ def test_range_estimator_per_tensor(self, test_params, range_estimator_params): params = test_params["test_range_estimator_per_tensor"] - stat_points = min_max_algo.get_statistic_points(params["model"], params["nncf_graph"], MockdedDataset()) + stat_points = min_max_algo.get_statistic_points(params["model"], params["nncf_graph"]) assert len(stat_points) == params["stat_points_num"] for _, stat_point in stat_points.items(): diff --git a/tests/post_training/test_templates/test_quantizer_config.py b/tests/post_training/test_templates/test_quantizer_config.py index 10f0a04773f..84b4bc37830 100644 --- a/tests/post_training/test_templates/test_quantizer_config.py +++ b/tests/post_training/test_templates/test_quantizer_config.py @@ -82,7 +82,7 @@ def conv_sum_aggregation_nncf_graph(self) -> NNCFGraphToTestSumAggregation: class TestGetStatisticsCollectorParameters: target_type: TargetType target_node_name: str - is_many_samples: bool + is_per_sample: bool ref_per_ch_reduction_axes: List[int] ref_per_tensor_reduction_axes: List[int] @@ -284,7 +284,7 @@ def test_get_stat_collector( target_point = list(min_max_algo._quantization_target_points_to_qconfig.keys())[0] tensor_collector = min_max_algo._get_stat_collector( - conv_sum_aggregation_nncf_graph.nncf_graph, target_point, q_config, params.is_many_samples + conv_sum_aggregation_nncf_graph.nncf_graph, target_point, q_config, params.is_per_sample ) is_weight_tp = target_point.is_weight_target_point() diff --git a/tests/post_training/test_templates/test_smooth_quant.py b/tests/post_training/test_templates/test_smooth_quant.py index abbcdab3aaa..c3827cc81dd 100644 --- a/tests/post_training/test_templates/test_smooth_quant.py +++ b/tests/post_training/test_templates/test_smooth_quant.py @@ -239,7 +239,7 @@ def test_empty_stats(self, mocker, tmpdir): graph = NNCFGraphFactory.create(model) algo = SmoothQuant(subset_size=1, inplace_statistics=False) - algo_statistic_points = algo.get_statistic_points(model, graph, dataset) + algo_statistic_points = algo.get_statistic_points(model, graph) statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) statistics_aggregator.register_statistic_points(algo_statistic_points) statistics_aggregator.collect_statistics(model, graph) diff --git a/tests/torch/ptq/test_calculation_quantizer_params.py b/tests/torch/ptq/test_calculation_quantizer_params.py index 4ae870863cb..5c08d137815 100644 --- a/tests/torch/ptq/test_calculation_quantizer_params.py +++ b/tests/torch/ptq/test_calculation_quantizer_params.py @@ -314,7 +314,7 @@ def test_quantizer_parameters_export(tmp_path: Path): nncf_config = NNCFConfig({"input_info": {"sample_size": [1, 3, 32, 32]}}) nncf_network = create_nncf_network(model, nncf_config) - statistic_points = min_max_algo.get_statistic_points(nncf_network, nncf_network.nncf.get_graph(), dataset) + statistic_points = min_max_algo.get_statistic_points(nncf_network, nncf_network.nncf.get_graph()) statistics_aggregator.register_statistic_points(statistic_points) statistics_aggregator.collect_statistics(model, nncf_network.nncf.get_graph()) torch_quantized_model = min_max_algo.apply( From 8951e3c82e782881fae87adc31cd8f624c67f7fb Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 26 Feb 2024 11:23:43 +0100 Subject: [PATCH 076/108] more comments --- nncf/common/tensor_statistics/aggregator.py | 4 ++-- nncf/onnx/statistics/aggregator.py | 2 +- nncf/openvino/statistics/aggregator.py | 2 +- nncf/torch/statistics/aggregator.py | 2 +- tests/onnx/quantization/test_batch_size.py | 2 +- tests/torch/ptq/test_batch_size.py | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index e6ebeef7aad..66a3837fb7e 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -126,11 +126,11 @@ def is_model_has_no_batch_axis(self, graph: NNCFGraph) -> bool: :return: True if NNCFGraph contains metatypes with no batch axis in output tensor. """ unique_graph_metatypes = set(node.metatype for node in graph.get_all_nodes()) - return any(metatype in self.metatypes_output_has_no_batch_axis for metatype in unique_graph_metatypes) + return any(metatype in self.metatypes_no_batch_support for metatype in unique_graph_metatypes) @property @abstractmethod - def metatypes_output_has_no_batch_axis(self) -> List[OperatorMetatype]: + def metatypes_no_batch_support(self) -> List[OperatorMetatype]: """ These metatypes mix outputs for different samples into one axis. If reducers and aggregators collect statistics at the output of the following operations, diff --git a/nncf/onnx/statistics/aggregator.py b/nncf/onnx/statistics/aggregator.py index 9e19d935df6..ed0417bcee9 100644 --- a/nncf/onnx/statistics/aggregator.py +++ b/nncf/onnx/statistics/aggregator.py @@ -32,7 +32,7 @@ class ONNXStatisticsAggregator(StatisticsAggregator): @property - def metatypes_output_has_no_batch_axis(self) -> List[ONNXOpMetatype]: + def metatypes_no_batch_support(self) -> List[ONNXOpMetatype]: return OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS def collect_statistics(self, model: onnx.ModelProto, graph: NNCFGraph) -> None: diff --git a/nncf/openvino/statistics/aggregator.py b/nncf/openvino/statistics/aggregator.py index 69cf8165392..27011d45631 100644 --- a/nncf/openvino/statistics/aggregator.py +++ b/nncf/openvino/statistics/aggregator.py @@ -34,7 +34,7 @@ class OVStatisticsAggregator(StatisticsAggregator): @property - def metatypes_output_has_no_batch_axis(self) -> List[OVOpMetatype]: + def metatypes_no_batch_support(self) -> List[OVOpMetatype]: return OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS def collect_statistics(self, model: ov.Model, graph: NNCFGraph) -> None: diff --git a/nncf/torch/statistics/aggregator.py b/nncf/torch/statistics/aggregator.py index 3123037221c..713e1de2288 100644 --- a/nncf/torch/statistics/aggregator.py +++ b/nncf/torch/statistics/aggregator.py @@ -32,7 +32,7 @@ class PTStatisticsAggregator(StatisticsAggregator): HOOKS_GROUP_NAME = "statistics_hooks" @property - def metatypes_output_has_no_batch_axis(self) -> List[PTOperatorMetatype]: + def metatypes_no_batch_support(self) -> List[PTOperatorMetatype]: return OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS def collect_statistics(self, model: NNCFNetwork, graph: NNCFGraph) -> None: diff --git a/tests/onnx/quantization/test_batch_size.py b/tests/onnx/quantization/test_batch_size.py index debfd1382d5..fee973c831d 100644 --- a/tests/onnx/quantization/test_batch_size.py +++ b/tests/onnx/quantization/test_batch_size.py @@ -12,6 +12,6 @@ from tests.post_training.test_templates.test_batch_size import TemplateTestBatchSize -class TestOVBatchSize(TemplateTestBatchSize): +class TestONNXBatchSize(TemplateTestBatchSize): def create_statistics_aggregator(self, dataset): return ONNXStatisticsAggregator(dataset) diff --git a/tests/torch/ptq/test_batch_size.py b/tests/torch/ptq/test_batch_size.py index d10eae83cde..df8e1ca4640 100644 --- a/tests/torch/ptq/test_batch_size.py +++ b/tests/torch/ptq/test_batch_size.py @@ -12,6 +12,6 @@ from tests.post_training.test_templates.test_batch_size import TemplateTestBatchSize -class TestOVBatchSize(TemplateTestBatchSize): +class TestTorchBatchSize(TemplateTestBatchSize): def create_statistics_aggregator(self, dataset): return PTStatisticsAggregator(dataset) From 0d72557f6cf25b901ffe9bfa6d67592e430cc86b Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 26 Feb 2024 16:59:45 +0100 Subject: [PATCH 077/108] make bs=128 for Torch sample --- .../post_training_quantization/openvino/mobilenet_v2/main.py | 4 ++-- tests/post_training/test_templates/test_ptq_params.py | 5 ----- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/examples/post_training_quantization/openvino/mobilenet_v2/main.py b/examples/post_training_quantization/openvino/mobilenet_v2/main.py index f2a4a136754..ae4b6d77674 100644 --- a/examples/post_training_quantization/openvino/mobilenet_v2/main.py +++ b/examples/post_training_quantization/openvino/mobilenet_v2/main.py @@ -100,11 +100,11 @@ def get_model_size(ir_path: str, m_type: str = "Mb", verbose: bool = True) -> fl ] ), ) -val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False) +val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=128, shuffle=False) path_to_model = download(MODEL_URL, MODEL_PATH) ov_model = ov.Core().read_model(path_to_model / "mobilenet_v2_fp32.xml") - +ov_model.reshape([-1, 3, 224, 224]) ############################################################################### # Quantize an OpenVINO model # diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py index 3e276a889bd..326560d97f5 100644 --- a/tests/post_training/test_templates/test_ptq_params.py +++ b/tests/post_training/test_templates/test_ptq_params.py @@ -47,11 +47,6 @@ from tests.common.quantization.mock_graphs import get_nncf_graph_from_mock_nx_graph -class MockedDataset: - def get_batch_size(self): - return 1 - - class ModelToTestOverflowFix: # Input_1 Input_2 # | | From 0f8a43820bba25a1e4938aedc70d40eef2b80f33 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 27 Feb 2024 16:16:17 +0100 Subject: [PATCH 078/108] fix channel alighnment + comments --- nncf/quantization/algorithms/channel_alignment/algorithm.py | 2 +- .../algorithms/channel_alignment/openvino_backend.py | 4 +++- nncf/quantization/algorithms/smooth_quant/torch_backend.py | 5 ----- tests/post_training/test_templates/test_channel_alignment.py | 4 ++-- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/nncf/quantization/algorithms/channel_alignment/algorithm.py b/nncf/quantization/algorithms/channel_alignment/algorithm.py index 542cb8801f3..b30749b6d2c 100644 --- a/nncf/quantization/algorithms/channel_alignment/algorithm.py +++ b/nncf/quantization/algorithms/channel_alignment/algorithm.py @@ -390,7 +390,7 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin channel_axis = conv_in.metatype.output_channel_axis activation_shape = list(range(len(graph.get_output_edges(node_in)[0].tensor_shape))) - reduction_axes = get_reduction_axes([channel_axis], activation_shape) + reduction_axes = get_reduction_axes([0, channel_axis], activation_shape) statistic_collector = self._backend_entity.get_statistic_collector( reduction_axes, self._quantile, self.subset_size, self.inplace_statistics diff --git a/nncf/quantization/algorithms/channel_alignment/openvino_backend.py b/nncf/quantization/algorithms/channel_alignment/openvino_backend.py index 44f0ba95feb..f4596b1b36d 100644 --- a/nncf/quantization/algorithms/channel_alignment/openvino_backend.py +++ b/nncf/quantization/algorithms/channel_alignment/openvino_backend.py @@ -85,7 +85,9 @@ def get_statistic_collector( quantile_reducer = OVQuantileReducer(reduction_axes, (q, 1 - q), inplace) for port_id, container_key in enumerate([OVMinMaxTensorStatistic.MIN_STAT, OVMinMaxTensorStatistic.MAX_STAT]): - aggregator = MedianAggregator(OVNNCFCollectorTensorProcessor, num_samples=num_samples) + aggregator = MedianAggregator( + OVNNCFCollectorTensorProcessor, num_samples=num_samples, aggregation_axes=(0, 1) + ) tensor_collector.register_statistic_branch(container_key, quantile_reducer, aggregator, port_id) return tensor_collector diff --git a/nncf/quantization/algorithms/smooth_quant/torch_backend.py b/nncf/quantization/algorithms/smooth_quant/torch_backend.py index 3b125c196ee..275f9a2523e 100644 --- a/nncf/quantization/algorithms/smooth_quant/torch_backend.py +++ b/nncf/quantization/algorithms/smooth_quant/torch_backend.py @@ -19,7 +19,6 @@ from nncf.common.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType -from nncf.common.graph.utils import get_reduction_axes from nncf.common.quantization.quantizer_propagation.structs import QuantizationTrait from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator @@ -87,10 +86,6 @@ def get_activations_port_id(node: NNCFNode, nncf_graph: NNCFGraph) -> int: # all nodes with the metatypes have 0 activation port id. return 0 - @staticmethod - def get_channel_agnostic_reduction_axes(channel_axis: int, shape: Tuple[int]) -> Tuple[int]: - return get_reduction_axes([channel_axis], shape) - @staticmethod def get_abs_max_channel_collector( num_samples: int, stats_reduction_axes: Tuple[int], inplace: bool, branch_key: str diff --git a/tests/post_training/test_templates/test_channel_alignment.py b/tests/post_training/test_templates/test_channel_alignment.py index 49cb8ffe0c3..f1e0fbf440d 100644 --- a/tests/post_training/test_templates/test_channel_alignment.py +++ b/tests/post_training/test_templates/test_channel_alignment.py @@ -528,7 +528,7 @@ class MockBackend(backend_cls): tensor_collectors = stat_points[0].algorithm_to_tensor_collectors[algorithm._algorithm_key] assert len(tensor_collectors) == 1 assert tensor_collectors[0] == ref_stat_collector - MockBackend.get_statistic_collector.assert_called_once_with((0, 2, 3), 1e-4, ref_subset_size, ref_inplace) + MockBackend.get_statistic_collector.assert_called_once_with((2, 3), 1e-4, ref_subset_size, ref_inplace) target_point = stat_points[0].target_point assert target_point.target_node_name == target_node_name @@ -555,4 +555,4 @@ def test_statistic_collectors(self, inplace_ref, q_ref): assert isinstance(aggr, MedianAggregator) assert aggr.num_samples == num_samples_ref assert aggr._keepdims - assert aggr._aggregation_axes == (0,) + assert aggr._aggregation_axes == (0, 1) From 78d4d6cf68170565f997743f834f29acbfce165a Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 28 Feb 2024 11:21:16 +0100 Subject: [PATCH 079/108] comments --- nncf/common/tensor_statistics/aggregator.py | 28 +++++++++++++------ .../test_templates/test_batch_size.py | 2 +- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 66a3837fb7e..39adf7d286c 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -38,16 +38,18 @@ def __init__(self, dataset: Dataset): self.stat_subset_size = None self.batch_size = self.dataset.get_batch_size() or 1 dataset_len = self.dataset.get_length() - self.dataset_sample_size = dataset_len * self.batch_size if dataset_len is not None else dataset_len + self.dataset_sample_size = ( + dataset_len * self.batch_size if dataset_len is not None else dataset_len + ) # Number of samples in the dataset self.statistic_points = StatisticPointsContainer() - def _get_total_statistics_samples( + def _get_number_samples_for_statistics( self, ) -> Optional[int]: """ - Returns total number of statistics samples used. + Returns number of samples for statistics collection. - :return: Total number of statistics samples used. + :return: Number of samples for statistics collection. """ return ( min(self.dataset_sample_size or self.stat_subset_size, self.stat_subset_size) @@ -85,18 +87,28 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: model_with_outputs = model_transformer.transform(transformation_layout) engine = factory.EngineFactory.create(model_with_outputs) - calibration_samples_num = self._get_total_statistics_samples() + statistics_samples_num = self._get_number_samples_for_statistics() iterations_num = ( - self._get_iterations_num(calibration_samples_num) if calibration_samples_num is not None else None + self._get_iterations_num(statistics_samples_num) if statistics_samples_num is not None else None ) if iterations_num is not None and iterations_num == 0: - raise nncf.ValidationError("Batch size > length of dataset or batch size > stat_subset_size.") - with track(total=calibration_samples_num, description="Statistics collection") as pbar: + raise nncf.ValidationError( + "Provided dataset has a batch size value which is bigger than subset size for statistics collection. \ + Please increase number of samples for statistics collection \ + or decrease batch size value in the dataset." + ) + empty_statistics = True + with track(total=statistics_samples_num, description="Statistics collection") as pbar: for input_data in islice(self.dataset.get_inference_data(), iterations_num): outputs = engine.infer(input_data) processed_outputs = self._process_outputs(outputs) self._register_statistics(processed_outputs, merged_statistics) pbar.progress.update(pbar.task, advance=self.batch_size) + empty_statistics = False + if empty_statistics: + raise nncf.ValidationError( + "Calibration dataset must not be empty. Please provide calibration dataset with at least one sample." + ) def register_statistic_points(self, statistic_points: StatisticPointsContainer) -> None: """ diff --git a/tests/post_training/test_templates/test_batch_size.py b/tests/post_training/test_templates/test_batch_size.py index adbac996e69..e56e1000c20 100644 --- a/tests/post_training/test_templates/test_batch_size.py +++ b/tests/post_training/test_templates/test_batch_size.py @@ -60,7 +60,7 @@ def test_batch_size_subset_size_dataset_len(self, test_data): dataset = self.create_dataset(dataset_length, batch_size) statistics_aggregator = self.create_statistics_aggregator(dataset) statistics_aggregator.stat_subset_size = stat_subset_size - total_calibration_samples = statistics_aggregator._get_total_statistics_samples() + total_calibration_samples = statistics_aggregator._get_number_samples_for_statistics() assert total_calibration_samples == ref_calibration_samples_num iterataions_num = statistics_aggregator._get_iterations_num(total_calibration_samples) assert iterataions_num == ref_iterations_num From 34c9960e251efa817a1c809978d766139303ab40 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 28 Feb 2024 11:53:34 +0100 Subject: [PATCH 080/108] update typehints; revert changes in OV sample and apply to Torch --- .../openvino/mobilenet_v2/main.py | 3 +-- .../torch/mobilenet_v2/main.py | 2 +- nncf/quantization/quantize_model.py | 14 ++++++++++++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/examples/post_training_quantization/openvino/mobilenet_v2/main.py b/examples/post_training_quantization/openvino/mobilenet_v2/main.py index ae4b6d77674..360ddccf58e 100644 --- a/examples/post_training_quantization/openvino/mobilenet_v2/main.py +++ b/examples/post_training_quantization/openvino/mobilenet_v2/main.py @@ -100,11 +100,10 @@ def get_model_size(ir_path: str, m_type: str = "Mb", verbose: bool = True) -> fl ] ), ) -val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=128, shuffle=False) +val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False) path_to_model = download(MODEL_URL, MODEL_PATH) ov_model = ov.Core().read_model(path_to_model / "mobilenet_v2_fp32.xml") -ov_model.reshape([-1, 3, 224, 224]) ############################################################################### # Quantize an OpenVINO model # diff --git a/examples/post_training_quantization/torch/mobilenet_v2/main.py b/examples/post_training_quantization/torch/mobilenet_v2/main.py index b053ad43499..cde7b974ebd 100644 --- a/examples/post_training_quantization/torch/mobilenet_v2/main.py +++ b/examples/post_training_quantization/torch/mobilenet_v2/main.py @@ -107,7 +107,7 @@ def get_model_size(ir_path: str, m_type: str = "Mb", verbose: bool = True) -> fl ] ), ) -val_data_loader = torch.utils.data.DataLoader(val_dataset) +val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=128) torch_model = models.mobilenet_v2(num_classes=DATASET_CLASSES) torch_model = load_checkpoint(torch_model) diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index d302d4868af..3ce1061b474 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -36,7 +36,18 @@ TTensor = TypeVar("TTensor") -def _update_advanced_quantization_parameters(advanced_parameters, calibration_dataset): +def _update_advanced_quantization_parameters( + advanced_parameters: Optional[AdvancedQuantizationParameters], calibration_dataset: Dataset +) -> AdvancedQuantizationParameters: + """ + Updates AdvancedQuantizationParameters depending on batch_size. + + :param advanced_parameters: Advanced quantization parameters for + fine-tuning the quantization algorithm. + :param calibration_dataset: A representative dataset for the + calibration process. + :return: Updated AdvancedQuantizationParameters. + """ batch_size = calibration_dataset.get_batch_size() if batch_size is not None and batch_size > 1: if advanced_parameters is None: @@ -96,7 +107,6 @@ def quantize( :return: The quantized model. :rtype: TModel """ - if subset_size < 1: raise ValueError("Subset size must be positive.") From 354505a728d671b3d93d8dede7c90acd1b86af09 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 28 Feb 2024 11:55:29 +0100 Subject: [PATCH 081/108] typo --- .../post_training_quantization/openvino/mobilenet_v2/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/post_training_quantization/openvino/mobilenet_v2/main.py b/examples/post_training_quantization/openvino/mobilenet_v2/main.py index 360ddccf58e..f2a4a136754 100644 --- a/examples/post_training_quantization/openvino/mobilenet_v2/main.py +++ b/examples/post_training_quantization/openvino/mobilenet_v2/main.py @@ -104,6 +104,7 @@ def get_model_size(ir_path: str, m_type: str = "Mb", verbose: bool = True) -> fl path_to_model = download(MODEL_URL, MODEL_PATH) ov_model = ov.Core().read_model(path_to_model / "mobilenet_v2_fp32.xml") + ############################################################################### # Quantize an OpenVINO model # From 97cb07f18e9ef08badf1ec923c71d870b3cc16a6 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 28 Feb 2024 13:56:57 +0100 Subject: [PATCH 082/108] some code improvements --- nncf/common/tensor_statistics/aggregator.py | 24 ++++++--- .../onnx/quantization/quantizer_parameters.py | 5 +- .../test_tensor_collector_batch_size.py | 2 - tests/common/test_statistics_aggregator.py | 52 +++++++++++-------- .../test_templates/test_batch_size.py | 2 +- 5 files changed, 49 insertions(+), 36 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 39adf7d286c..b5310e73ba5 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -27,6 +27,10 @@ TensorType = TypeVar("TensorType") TModel = TypeVar("TModel") +EMPTY_DATASET_MESSAGE = ( + "Calibration dataset must not be empty. Please provide calibration dataset with at least one sample." +) + class StatisticsAggregator(ABC): """ @@ -41,6 +45,8 @@ def __init__(self, dataset: Dataset): self.dataset_sample_size = ( dataset_len * self.batch_size if dataset_len is not None else dataset_len ) # Number of samples in the dataset + if self.dataset_sample_size == 0: + raise nncf.ValidationError(EMPTY_DATASET_MESSAGE) self.statistic_points = StatisticPointsContainer() def _get_number_samples_for_statistics( @@ -78,8 +84,10 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: return if self.batch_size > 1 and self.is_model_has_no_batch_axis(graph): nncf_logger.warning( - "For the particular model the batch size > 1 can lead to inaccurate collected statistics . \ - The recomendation is to provide dataloader instance with the batch_size = 1." + ( + "For the particular model the batch size > 1 can lead to inaccurate collected statistics. " + "The recomendation is to provide dataloader instance with the batch_size = 1." + ) ) model_transformer = factory.ModelTransformerFactory.create(model) merged_statistics = self._get_merged_statistic_points(self.statistic_points, model, graph) @@ -93,9 +101,11 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: ) if iterations_num is not None and iterations_num == 0: raise nncf.ValidationError( - "Provided dataset has a batch size value which is bigger than subset size for statistics collection. \ - Please increase number of samples for statistics collection \ - or decrease batch size value in the dataset." + ( + "Provided dataset has a batch size value is bigger than subset size for statistics collection. " + "Please increase the number of samples for a statistics collection " + "or decrease the batch size value in the dataset." + ) ) empty_statistics = True with track(total=statistics_samples_num, description="Statistics collection") as pbar: @@ -106,9 +116,7 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: pbar.progress.update(pbar.task, advance=self.batch_size) empty_statistics = False if empty_statistics: - raise nncf.ValidationError( - "Calibration dataset must not be empty. Please provide calibration dataset with at least one sample." - ) + raise nncf.ValidationError(EMPTY_DATASET_MESSAGE) def register_statistic_points(self, statistic_points: StatisticPointsContainer) -> None: """ diff --git a/nncf/onnx/quantization/quantizer_parameters.py b/nncf/onnx/quantization/quantizer_parameters.py index 8d224318d27..a063e6a7055 100644 --- a/nncf/onnx/quantization/quantizer_parameters.py +++ b/nncf/onnx/quantization/quantizer_parameters.py @@ -69,10 +69,7 @@ def convert_fq_params_to_onnx_params( scale = np.squeeze(scale) zero_point = np.squeeze(zero_point) # ONNX axis parameter format specification. - if not axis: - axis = None - else: - axis = axis[0] + axis = None if not axis else axis[0] return ONNXQuantizerLayerParameters(scale.data, zero_point.data, tensor_type, axis) diff --git a/tests/common/experimental/test_tensor_collector_batch_size.py b/tests/common/experimental/test_tensor_collector_batch_size.py index a7418f5910b..8f29ccb1e53 100644 --- a/tests/common/experimental/test_tensor_collector_batch_size.py +++ b/tests/common/experimental/test_tensor_collector_batch_size.py @@ -107,14 +107,12 @@ def test_statistics_batch_size_equal(self, reducers, aggregators, inplace): shape_batch_1 = [1, *tensor_shape] collector, reducer, _ = self._create_tensor_collector(shape_batch_1, inplace, reducers, aggregators) - # output_name = reducer.get_output_names(target_node_name, port_id) dataitems_batch_1 = self.add_batch_dim_to_dataitems(dataitems, batch_size=1) self._register_inputs(collector, dataitems_batch_1, reducer) aggregated_tensor_batch_1 = list(collector._aggregate().values()) shape_batch_10 = [10, *tensor_shape] collector, reducer, _ = self._create_tensor_collector(shape_batch_10, inplace, reducers, aggregators) - # output_name = reducer.get_output_names(target_node_name, port_id) dataitems_batch_10 = self.add_batch_dim_to_dataitems(dataitems, batch_size=10) self._register_inputs(collector, dataitems_batch_10, reducer) aggregated_tensor_batch_10 = list(collector._aggregate().values()) diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py index ad733371fe4..f188ec449bb 100644 --- a/tests/common/test_statistics_aggregator.py +++ b/tests/common/test_statistics_aggregator.py @@ -24,6 +24,7 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig +from nncf.common.tensor_statistics.aggregator import EMPTY_DATASET_MESSAGE from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.experimental.common.tensor_statistics.collectors import NoopAggregator @@ -40,6 +41,14 @@ from nncf.quantization.range_estimator import StatisticsType +class MockedDataset: + def __iter__(self): + return self + + def __next__(self): + raise StopIteration + + class BiasCorrectionAlgos(Enum): BIAS_CORRECTION = "bias_correction" FAST_BIAS_CORRECTION = "fast_bias_correction" @@ -895,31 +904,32 @@ def test_register_statistics(self, dataset_samples, statistic_point_params, mock ref_subset_size = subset_size assert statistics_aggregator.stat_subset_size == ref_subset_size - def test_collect_with_empty_dataset(self, dataset_samples, mocker): - model = self.get_backend_model(dataset_samples) - dataset_samples = [] - dataset = self.get_dataset(dataset_samples) - graph = NNCFGraphFactory.create(model) + def test_collect_with_empty_dataset(self): + """ + Checks a correct raising of an error when dataset has len==0 + """ + dataset = nncf.Dataset([]) + with pytest.raises(nncf.ValidationError) as e: + _ = self.get_statistics_aggregator(dataset) + assert EMPTY_DATASET_MESSAGE in str(e) - inplace_statistics = False - quantizer_config = QuantizerConfig(mode=QuantizationMode.ASYMMETRIC, per_channel=False) - target_point = self.get_target_point(TargetType.POST_LAYER_OPERATION) - algorithm_name = "TestAlgo" - statistic_point = self.create_statistics_point( - model, - quantizer_config, - target_point, - len(dataset_samples), - algorithm_name, - inplace_statistics, - RangeEstimatorParametersSet.MEAN_MINMAX, - mocker, + def test_collect_with_empty_dataset_no_len(self, dataset_samples): + """ + Checks a correct raising of an error when dataset has no len() method implementation, + but has no elements. + """ + model = self.get_backend_model(dataset_samples) + dummy_statistic_point = StatisticPoint( + target_point=self.get_target_point(TargetType.POST_LAYER_OPERATION), + tensor_collector=TensorCollector(), + algorithm="dummy", ) statistics_points = StatisticPointsContainer() - statistics_points.add_statistic_point(statistic_point) - + statistics_points.add_statistic_point(dummy_statistic_point) + dataset = nncf.Dataset(MockedDataset()) + graph = NNCFGraphFactory.create(model) statistics_aggregator = self.get_statistics_aggregator(dataset) statistics_aggregator.register_statistic_points(statistics_points) with pytest.raises(nncf.ValidationError) as e: statistics_aggregator.collect_statistics(model, graph) - assert "Batch size > length of dataset or batch size > stat_subset_size." in e.info + assert EMPTY_DATASET_MESSAGE in str(e) diff --git a/tests/post_training/test_templates/test_batch_size.py b/tests/post_training/test_templates/test_batch_size.py index e56e1000c20..a79ab11b376 100644 --- a/tests/post_training/test_templates/test_batch_size.py +++ b/tests/post_training/test_templates/test_batch_size.py @@ -40,7 +40,7 @@ def create_dataset(self, lenght, batch_size): @pytest.mark.parametrize( ("test_data"), - ( # BAD TEST + ( [ # batch_size | dataset_len | stat_subset_size | ref_calibration_samples_num | ref_iterations_num # DataForTest(None, None, None, None, None), # None is None DataForTest(1, 1000, 300, 300, 300), From 2cc8b81f9b0c1aefbd13140deb8ec1bc3a13768c Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 28 Feb 2024 14:14:20 +0100 Subject: [PATCH 083/108] logging --- nncf/common/tensor_statistics/aggregator.py | 35 ++++++++++++--------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index b5310e73ba5..ddb815aee02 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -30,6 +30,19 @@ EMPTY_DATASET_MESSAGE = ( "Calibration dataset must not be empty. Please provide calibration dataset with at least one sample." ) +BATCH_SIZE_IS_BIGGER_THAN_SUBSET_SIZE_MESSAGE = ( + "Provided dataset has a batch size value is bigger than subset size for statistics collection. " + "Please increase the number of samples for a statistics collection " + "or decrease the batch size value in the dataset." +) +BATCH_SIZE_MODEL_WARNING = ( + "For the particular model the batch size > 1 can lead to inaccurate collected statistics. " + "The recomendation is to provide dataloader instance with the batch_size = 1." +) +DECREASING_SAMPLES_NUMBER_MESSAGE = ( + "The number of samples for statistics collection is decreased " + "to align with the provided batch size value of the dataset." +) class StatisticsAggregator(ABC): @@ -83,12 +96,7 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: if not self.statistic_points: return if self.batch_size > 1 and self.is_model_has_no_batch_axis(graph): - nncf_logger.warning( - ( - "For the particular model the batch size > 1 can lead to inaccurate collected statistics. " - "The recomendation is to provide dataloader instance with the batch_size = 1." - ) - ) + nncf_logger.warning(BATCH_SIZE_MODEL_WARNING) model_transformer = factory.ModelTransformerFactory.create(model) merged_statistics = self._get_merged_statistic_points(self.statistic_points, model, graph) transformation_layout = self._get_transformation_layout_extra_outputs(merged_statistics) @@ -99,14 +107,13 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: iterations_num = ( self._get_iterations_num(statistics_samples_num) if statistics_samples_num is not None else None ) - if iterations_num is not None and iterations_num == 0: - raise nncf.ValidationError( - ( - "Provided dataset has a batch size value is bigger than subset size for statistics collection. " - "Please increase the number of samples for a statistics collection " - "or decrease the batch size value in the dataset." - ) - ) + if iterations_num is not None: + if iterations_num == 0: + raise nncf.ValidationError(BATCH_SIZE_IS_BIGGER_THAN_SUBSET_SIZE_MESSAGE) + samples_num = iterations_num * self.batch_size + if samples_num != statistics_samples_num: + nncf_logger.warning(DECREASING_SAMPLES_NUMBER_MESSAGE) + statistics_samples_num = samples_num empty_statistics = True with track(total=statistics_samples_num, description="Statistics collection") as pbar: for input_data in islice(self.dataset.get_inference_data(), iterations_num): From e3a3291571a6d537d231dad815fa440073981847 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 1 Mar 2024 13:19:06 +0100 Subject: [PATCH 084/108] remove iterations_number calculation in Aggregator --- .../torch/mobilenet_v2/main.py | 7 +- nncf/common/tensor_statistics/aggregator.py | 87 +++++++------------ tests/common/test_statistics_aggregator.py | 2 +- tests/openvino/tools/calibrate.py | 28 +++++- .../test_quantize_conformance.py | 4 + 5 files changed, 64 insertions(+), 64 deletions(-) diff --git a/examples/post_training_quantization/torch/mobilenet_v2/main.py b/examples/post_training_quantization/torch/mobilenet_v2/main.py index cde7b974ebd..486afa098f0 100644 --- a/examples/post_training_quantization/torch/mobilenet_v2/main.py +++ b/examples/post_training_quantization/torch/mobilenet_v2/main.py @@ -107,7 +107,8 @@ def get_model_size(ir_path: str, m_type: str = "Mb", verbose: bool = True) -> fl ] ), ) -val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=128) +batch_size = 128 +val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size) torch_model = models.mobilenet_v2(num_classes=DATASET_CLASSES) torch_model = load_checkpoint(torch_model) @@ -140,8 +141,10 @@ def transform_fn(data_item: Tuple[torch.Tensor, int], device: torch.device) -> t # item and prepare model input data. The quantize method uses a small subset # (default: 300 samples) of the calibration dataset. +# Recalculation default subset_size parameter based on batch_size. +subset_size = 300 // batch_size calibration_dataset = nncf.Dataset(val_data_loader, partial(transform_fn, device=device)) -torch_quantized_model = nncf.quantize(torch_model, calibration_dataset) +torch_quantized_model = nncf.quantize(torch_model, calibration_dataset, subset_size=subset_size) ############################################################################### # Benchmark performance, calculate compression rate and validate accuracy diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index ddb815aee02..8fadd8043e3 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -27,21 +27,15 @@ TensorType = TypeVar("TensorType") TModel = TypeVar("TModel") -EMPTY_DATASET_MESSAGE = ( +EMPTY_DATASET_ERROR = ( "Calibration dataset must not be empty. Please provide calibration dataset with at least one sample." ) -BATCH_SIZE_IS_BIGGER_THAN_SUBSET_SIZE_MESSAGE = ( - "Provided dataset has a batch size value is bigger than subset size for statistics collection. " - "Please increase the number of samples for a statistics collection " - "or decrease the batch size value in the dataset." -) BATCH_SIZE_MODEL_WARNING = ( "For the particular model the batch size > 1 can lead to inaccurate collected statistics. " "The recomendation is to provide dataloader instance with the batch_size = 1." ) -DECREASING_SAMPLES_NUMBER_MESSAGE = ( - "The number of samples for statistics collection is decreased " - "to align with the provided batch size value of the dataset." +UPDATING_ITERATIONS_NUMBER_WARNING = ( + "The number of iterations for statistics collection is bigger than the length of the dataset." ) @@ -52,38 +46,24 @@ class StatisticsAggregator(ABC): def __init__(self, dataset: Dataset): self.dataset = dataset - self.stat_subset_size = None - self.batch_size = self.dataset.get_batch_size() or 1 - dataset_len = self.dataset.get_length() - self.dataset_sample_size = ( - dataset_len * self.batch_size if dataset_len is not None else dataset_len - ) # Number of samples in the dataset - if self.dataset_sample_size == 0: - raise nncf.ValidationError(EMPTY_DATASET_MESSAGE) + self.iterations_number = None self.statistic_points = StatisticPointsContainer() - def _get_number_samples_for_statistics( + def _get_iterations_number( self, ) -> Optional[int]: """ - Returns number of samples for statistics collection. - - :return: Number of samples for statistics collection. - """ - return ( - min(self.dataset_sample_size or self.stat_subset_size, self.stat_subset_size) - if self.stat_subset_size is not None - else None - ) - - def _get_iterations_num(self, total_statistics_samples: int) -> int: - """ - Returns number of iterations to collect statistics. + Returns number of iterations which in min(self.iterations_number, dataset_length). - :param total_statistics_samples: Number of statistics samples are used. - :return: Iterations number of statistics collection. + :return: Number of iterations for statistics collection. """ - return total_statistics_samples // self.batch_size + dataset_length = self.dataset.get_length() + if dataset_length and self.iterations_number: + if self.iterations_number > dataset_length: + nncf_logger.warning(UPDATING_ITERATIONS_NUMBER_WARNING) + return dataset_length + return self.iterations_number + return dataset_length or self.iterations_number def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: """ @@ -95,7 +75,8 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: """ if not self.statistic_points: return - if self.batch_size > 1 and self.is_model_has_no_batch_axis(graph): + batch_size = self.dataset.get_batch_size() or 1 + if batch_size > 1 and self.is_model_has_no_batch_axis(graph): nncf_logger.warning(BATCH_SIZE_MODEL_WARNING) model_transformer = factory.ModelTransformerFactory.create(model) merged_statistics = self._get_merged_statistic_points(self.statistic_points, model, graph) @@ -103,27 +84,19 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: model_with_outputs = model_transformer.transform(transformation_layout) engine = factory.EngineFactory.create(model_with_outputs) - statistics_samples_num = self._get_number_samples_for_statistics() - iterations_num = ( - self._get_iterations_num(statistics_samples_num) if statistics_samples_num is not None else None - ) - if iterations_num is not None: - if iterations_num == 0: - raise nncf.ValidationError(BATCH_SIZE_IS_BIGGER_THAN_SUBSET_SIZE_MESSAGE) - samples_num = iterations_num * self.batch_size - if samples_num != statistics_samples_num: - nncf_logger.warning(DECREASING_SAMPLES_NUMBER_MESSAGE) - statistics_samples_num = samples_num + iterations_number = self._get_iterations_number() empty_statistics = True - with track(total=statistics_samples_num, description="Statistics collection") as pbar: - for input_data in islice(self.dataset.get_inference_data(), iterations_num): - outputs = engine.infer(input_data) - processed_outputs = self._process_outputs(outputs) - self._register_statistics(processed_outputs, merged_statistics) - pbar.progress.update(pbar.task, advance=self.batch_size) - empty_statistics = False + for input_data in track( + islice(self.dataset.get_inference_data(), iterations_number), + total=self.iterations_number, + description="Statistics collection", + ): + outputs = engine.infer(input_data) + processed_outputs = self._process_outputs(outputs) + self._register_statistics(processed_outputs, merged_statistics) + empty_statistics = False if empty_statistics: - raise nncf.ValidationError(EMPTY_DATASET_MESSAGE) + raise nncf.ValidationError(EMPTY_DATASET_ERROR) def register_statistic_points(self, statistic_points: StatisticPointsContainer) -> None: """ @@ -140,10 +113,10 @@ def register_statistic_points(self, statistic_points: StatisticPointsContainer) for _statistic_point in _statistic_points: for _, tensor_collectors in _statistic_point.algorithm_to_tensor_collectors.items(): for tensor_collector in tensor_collectors: - if self.stat_subset_size is None: - self.stat_subset_size = tensor_collector.num_samples + if self.iterations_number is None: + self.iterations_number = tensor_collector.num_samples elif tensor_collector.num_samples is not None: - self.stat_subset_size = max(self.stat_subset_size, tensor_collector.num_samples) + self.iterations_number = max(self.iterations_number, tensor_collector.num_samples) def is_model_has_no_batch_axis(self, graph: NNCFGraph) -> bool: """ diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py index f188ec449bb..f8a55ad4e2b 100644 --- a/tests/common/test_statistics_aggregator.py +++ b/tests/common/test_statistics_aggregator.py @@ -902,7 +902,7 @@ def test_register_statistics(self, dataset_samples, statistic_point_params, mock ref_subset_size = max(ref_subset_size, subset_size) else: ref_subset_size = subset_size - assert statistics_aggregator.stat_subset_size == ref_subset_size + assert statistics_aggregator.iterations_number == ref_subset_size def test_collect_with_empty_dataset(self): """ diff --git a/tests/openvino/tools/calibrate.py b/tests/openvino/tools/calibrate.py index dedd0b998b0..84670e961b9 100644 --- a/tests/openvino/tools/calibrate.py +++ b/tests/openvino/tools/calibrate.py @@ -22,7 +22,7 @@ from dataclasses import replace from enum import Enum from itertools import islice -from typing import Any, Iterable, List, Optional, TypeVar +from typing import Any, Dict, Iterable, List, Optional, TypeVar import numpy as np import openvino.runtime as ov @@ -1035,11 +1035,29 @@ def filter_configuration(config: Config) -> Config: return config -def update_config(accuracy_checker_config: Config, batch_size: int) -> None: +def update_accuracy_checker_config(accuracy_checker_config: Config, batch_size: int) -> None: + """ + Updates batch section of accuracy checker configuration file by batch_size value. + + :param accuracy_checker_config: Accuracy checker configuration file. + :param batch_size: Batch size value. + """ for model in accuracy_checker_config["models"]: for dataset in model["datasets"]: - print(f"Updated batch size value to {batch_size}") dataset["batch"] = batch_size + print(f"Updated batch size value to {batch_size}") + + +def update_nncf_algorithms_config(nncf_algorithms_config: Dict[str, Dict[str, Any]], batch_size: int) -> None: + """ + Updates subset_size parameter depending on batch_size and subset_size from an algorithm config. + + :param nncf_algorithms_config: Configuration file of an algorithm. + :param batch_size: Batch size value. + """ + subset_size = nncf_algorithms_config.get("subset_size", 300) + nncf_algorithms_config["subset_size"] = subset_size // batch_size + print(f"Updated subset_size value to {nncf_algorithms_config['subset_size']}") def main(): @@ -1052,7 +1070,9 @@ def main(): xml_path, bin_path = get_model_paths(config.model) accuracy_checker_config = get_accuracy_checker_config(config.engine) nncf_algorithms_config = get_nncf_algorithms_config(config.compression, args.output_dir) - update_config(accuracy_checker_config, args.batch_size) + if args.batch_size > 1: + update_accuracy_checker_config(accuracy_checker_config, args.batch_size) + update_nncf_algorithms_config(nncf_algorithms_config, args.batch_size) set_log_file(f"{args.output_dir}/log.txt") output_dir = os.path.join(args.output_dir, "optimized") diff --git a/tests/post_training/test_quantize_conformance.py b/tests/post_training/test_quantize_conformance.py index 35edff0bd12..765cc621a20 100644 --- a/tests/post_training/test_quantize_conformance.py +++ b/tests/post_training/test_quantize_conformance.py @@ -219,6 +219,10 @@ def test_ptq_quantization( test_model_param = PTQ_TEST_CASES[test_case_name] maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend) pipeline_cls = test_model_param["pipeline_cls"] + # Recalculates subset_size when subset_size is None + if batch_size > 1 and subset_size is None: + subset_size = 300 // batch_size + print(f"Update subset_size value based on provided batch_size to {subset_size}.") pipeline_kwargs = create_pipeline_kwargs(test_model_param, subset_size, test_case_name, ptq_reference_data) pipeline_kwargs.update( { From 8cb7c601785189246c360db7677b16be4db29d7b Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 1 Mar 2024 13:32:47 +0100 Subject: [PATCH 085/108] update tests --- tests/common/test_statistics_aggregator.py | 16 +---- .../test_templates/test_batch_size.py | 66 ------------------- 2 files changed, 3 insertions(+), 79 deletions(-) delete mode 100644 tests/post_training/test_templates/test_batch_size.py diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py index f8a55ad4e2b..0ff1ac227d2 100644 --- a/tests/common/test_statistics_aggregator.py +++ b/tests/common/test_statistics_aggregator.py @@ -24,7 +24,7 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig -from nncf.common.tensor_statistics.aggregator import EMPTY_DATASET_MESSAGE +from nncf.common.tensor_statistics.aggregator import EMPTY_DATASET_ERROR from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.experimental.common.tensor_statistics.collectors import NoopAggregator @@ -904,19 +904,9 @@ def test_register_statistics(self, dataset_samples, statistic_point_params, mock ref_subset_size = subset_size assert statistics_aggregator.iterations_number == ref_subset_size - def test_collect_with_empty_dataset(self): - """ - Checks a correct raising of an error when dataset has len==0 - """ - dataset = nncf.Dataset([]) - with pytest.raises(nncf.ValidationError) as e: - _ = self.get_statistics_aggregator(dataset) - assert EMPTY_DATASET_MESSAGE in str(e) - def test_collect_with_empty_dataset_no_len(self, dataset_samples): """ - Checks a correct raising of an error when dataset has no len() method implementation, - but has no elements. + Checks a correct raising of an error when dataset has no elements to iterate. """ model = self.get_backend_model(dataset_samples) dummy_statistic_point = StatisticPoint( @@ -932,4 +922,4 @@ def test_collect_with_empty_dataset_no_len(self, dataset_samples): statistics_aggregator.register_statistic_points(statistics_points) with pytest.raises(nncf.ValidationError) as e: statistics_aggregator.collect_statistics(model, graph) - assert EMPTY_DATASET_MESSAGE in str(e) + assert EMPTY_DATASET_ERROR in str(e) diff --git a/tests/post_training/test_templates/test_batch_size.py b/tests/post_training/test_templates/test_batch_size.py deleted file mode 100644 index a79ab11b376..00000000000 --- a/tests/post_training/test_templates/test_batch_size.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from abc import ABC -from abc import abstractmethod -from dataclasses import dataclass -from dataclasses import fields - -import pytest - -from nncf.common.tensor_statistics.aggregator import StatisticsAggregator -from tests.post_training.test_templates.helpers import get_static_dataset - - -@dataclass -class DataForTest: - batch_size: int - dataset_len: int - stat_subset_size: int - ref_calibration_samples_num: int - ref_iterations_num: int - - -class TemplateTestBatchSize(ABC): - @abstractmethod - def create_statistics_aggregator(self, dataset) -> StatisticsAggregator: - pass - - def create_dataset(self, lenght, batch_size): - dataset = get_static_dataset(None, None, None, lenght) - dataset._data_source.batch_size = batch_size - return dataset - - @pytest.mark.parametrize( - ("test_data"), - ( - [ # batch_size | dataset_len | stat_subset_size | ref_calibration_samples_num | ref_iterations_num - # DataForTest(None, None, None, None, None), # None is None - DataForTest(1, 1000, 300, 300, 300), - DataForTest(10, 1000, 300, 300, 30), - DataForTest(300, 1000, 300, 300, 1), - DataForTest(301, 1000, 300, 300, 0), # batch_size > stat_subset_size - DataForTest(10, 10, 300, 100, 10), # len(dataset) * batch_size < subset_size - DataForTest(11, 300, 300, 300, 27), # stat_subset_size % batch_size != 0 - ] - ), - ) - def test_batch_size_subset_size_dataset_len(self, test_data): - # Checks correct iterations number depending on batch_size, dataset length, subset_size - batch_size, dataset_length, stat_subset_size, ref_calibration_samples_num, ref_iterations_num = ( - getattr(test_data, field.name) for field in fields(test_data) - ) - dataset = self.create_dataset(dataset_length, batch_size) - statistics_aggregator = self.create_statistics_aggregator(dataset) - statistics_aggregator.stat_subset_size = stat_subset_size - total_calibration_samples = statistics_aggregator._get_number_samples_for_statistics() - assert total_calibration_samples == ref_calibration_samples_num - iterataions_num = statistics_aggregator._get_iterations_num(total_calibration_samples) - assert iterataions_num == ref_iterations_num From ae772aae8b93e837f17f0160713eb1523df44c0f Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 1 Mar 2024 13:42:18 +0100 Subject: [PATCH 086/108] reaname parameter --- nncf/quantization/advanced_parameters.py | 9 ++++++--- .../algorithms/min_max/algorithm.py | 11 ++++++----- .../algorithms/post_training/pipeline.py | 2 +- nncf/quantization/quantize_model.py | 6 +++--- tests/onnx/quantization/test_batch_size.py | 17 ----------------- .../native/quantization/test_batch_size.py | 17 ----------------- .../post_training/test_quantize_conformance.py | 2 ++ tests/torch/ptq/test_batch_size.py | 17 ----------------- 8 files changed, 18 insertions(+), 63 deletions(-) delete mode 100644 tests/onnx/quantization/test_batch_size.py delete mode 100644 tests/openvino/native/quantization/test_batch_size.py delete mode 100644 tests/torch/ptq/test_batch_size.py diff --git a/nncf/quantization/advanced_parameters.py b/nncf/quantization/advanced_parameters.py index 751b2c76006..6154ffe3d17 100644 --- a/nncf/quantization/advanced_parameters.py +++ b/nncf/quantization/advanced_parameters.py @@ -190,8 +190,11 @@ class AdvancedQuantizationParameters: :type disable_channel_alignment: bool :param disable_bias_correction: Whether to disable the bias correction. :type disable_bias_correction: bool - :param statistics_per_sample: Whether calculate statistics regarding batch axis. - :type statistics_per_sample: bool + :param batchwise_statistics: Determines whether quantizer statistics should be calculated + for each item of the batch or for the entire batch, default is None. + "None" means that if torch.DataLoader or tensorflow.Dataset was passed as a data source for the calibration + dataset, then if batch_size > 1 of the data source then batchwise_statistics = True, otherwise False. + :type batchwise_statistics: Optional[bool] :param activations_quantization_params: Quantization parameters for activations. :type activations_quantization_params: nncf.quantization.advanced_parameters.QuantizationParameters :param weights_quantization_params: Quantization parameters for weights. @@ -220,7 +223,7 @@ class AdvancedQuantizationParameters: inplace_statistics: bool = True disable_channel_alignment: bool = True disable_bias_correction: bool = False - statistics_per_sample: bool = None + batchwise_statistics: Optional[bool] = None # Advanced Quantization parameters activations_quantization_params: Union[QuantizationParameters, FP8QuantizationParameters] = None diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index 31d42d56eb2..311fcd38056 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -141,7 +141,7 @@ def __init__( overflow_fix: Optional[OverflowFix] = None, quantize_outputs: bool = False, inplace_statistics: bool = True, - statistics_per_sample: bool = False, + batchwise_statistics: bool = False, activations_quantization_params: Union[QuantizationParameters, FP8QuantizationParameters] = None, weights_quantization_params: Union[QuantizationParameters, FP8QuantizationParameters] = None, activations_range_estimator_params: Optional[RangeEstimatorParameters] = None, @@ -172,7 +172,8 @@ def __init__( :param inplace_statistics: Defines wheather to calculate quantizers statistics by backend graph operations or by default Python implementation, defaults to True. - :param statistics_per_sample: Whether calculate statistics regarding batch axis. + :param batchwise_statistics: Determines whether quantizer statistics should be calculated + for each item of the batch or for the entire batch, default is None. :param activations_quantization_params: Quantization parameters for model activations. :param weights_quantization_params: Quantization parameters for model weights. @@ -189,7 +190,7 @@ def __init__( self._overflow_fix = overflow_fix self._quantize_outputs = quantize_outputs self._inplace_statistics = inplace_statistics - self._statistics_per_sample = statistics_per_sample + self._batchwise_statistics = batchwise_statistics self._backend_params = backend_params self._activations_quantization_params = activations_quantization_params self._weights_quantization_params = weights_quantization_params @@ -928,14 +929,14 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin self._reset_cache() quantization_target_points, _ = self._get_quantization_target_points(model, graph) output = StatisticPointsContainer() - if self._model_type == ModelType.TRANSFORMER and self._statistics_per_sample: + if self._model_type == ModelType.TRANSFORMER and self._batchwise_statistics: nncf_logger.warning( "For transfomer-like models batch_size > 1 could result in inaccurate statistics. \ The recomendation is to use batch_size = 1." ) for quantization_target_point, qconfig in quantization_target_points.items(): stat_collector = self._get_stat_collector( - graph, quantization_target_point, qconfig, self._statistics_per_sample + graph, quantization_target_point, qconfig, self._batchwise_statistics ) output.add_statistic_point( StatisticPoint( diff --git a/nncf/quantization/algorithms/post_training/pipeline.py b/nncf/quantization/algorithms/post_training/pipeline.py index f02b2fa209d..08e97bc31b7 100644 --- a/nncf/quantization/algorithms/post_training/pipeline.py +++ b/nncf/quantization/algorithms/post_training/pipeline.py @@ -118,7 +118,7 @@ def create_ptq_pipeline( overflow_fix=advanced_parameters.overflow_fix, quantize_outputs=advanced_parameters.quantize_outputs, inplace_statistics=advanced_parameters.inplace_statistics, - statistics_per_sample=advanced_parameters.statistics_per_sample, + batchwise_statistics=advanced_parameters.batchwise_statistics, activations_quantization_params=advanced_parameters.activations_quantization_params, weights_quantization_params=advanced_parameters.weights_quantization_params, activations_range_estimator_params=advanced_parameters.activations_range_estimator_params, diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index 3ce1061b474..dc161b0a225 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -51,9 +51,9 @@ def _update_advanced_quantization_parameters( batch_size = calibration_dataset.get_batch_size() if batch_size is not None and batch_size > 1: if advanced_parameters is None: - advanced_parameters = AdvancedQuantizationParameters(statistics_per_sample=True) - elif advanced_parameters.statistics_per_sample is None: - advanced_parameters.statistics_per_sample = True + advanced_parameters = AdvancedQuantizationParameters(batchwise_statistics=True) + elif advanced_parameters.batchwise_statistics is None: + advanced_parameters.batchwise_statistics = True return advanced_parameters diff --git a/tests/onnx/quantization/test_batch_size.py b/tests/onnx/quantization/test_batch_size.py deleted file mode 100644 index fee973c831d..00000000000 --- a/tests/onnx/quantization/test_batch_size.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from nncf.onnx.statistics.aggregator import ONNXStatisticsAggregator -from tests.post_training.test_templates.test_batch_size import TemplateTestBatchSize - - -class TestONNXBatchSize(TemplateTestBatchSize): - def create_statistics_aggregator(self, dataset): - return ONNXStatisticsAggregator(dataset) diff --git a/tests/openvino/native/quantization/test_batch_size.py b/tests/openvino/native/quantization/test_batch_size.py deleted file mode 100644 index 0f74a9f282a..00000000000 --- a/tests/openvino/native/quantization/test_batch_size.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from nncf.openvino.statistics.aggregator import OVStatisticsAggregator -from tests.post_training.test_templates.test_batch_size import TemplateTestBatchSize - - -class TestOVBatchSize(TemplateTestBatchSize): - def create_statistics_aggregator(self, dataset): - return OVStatisticsAggregator(dataset) diff --git a/tests/post_training/test_quantize_conformance.py b/tests/post_training/test_quantize_conformance.py index 765cc621a20..e2472f76af1 100644 --- a/tests/post_training/test_quantize_conformance.py +++ b/tests/post_training/test_quantize_conformance.py @@ -268,6 +268,7 @@ def test_weight_compression( output_dir: Path, wc_result_data: Dict[str, RunInfo], no_eval: bool, + batch_size: int, run_fp32_backend: bool, run_torch_cuda_backend: bool, subset_size: Optional[int], @@ -292,6 +293,7 @@ def test_weight_compression( "data_dir": data_dir, "no_eval": no_eval, "run_benchmark_app": run_benchmark_app, + "batch_size": batch_size, } ) pipeline: BaseTestPipeline = pipeline_cls(**pipeline_kwargs) diff --git a/tests/torch/ptq/test_batch_size.py b/tests/torch/ptq/test_batch_size.py deleted file mode 100644 index df8e1ca4640..00000000000 --- a/tests/torch/ptq/test_batch_size.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from nncf.torch.statistics.aggregator import PTStatisticsAggregator -from tests.post_training.test_templates.test_batch_size import TemplateTestBatchSize - - -class TestTorchBatchSize(TemplateTestBatchSize): - def create_statistics_aggregator(self, dataset): - return PTStatisticsAggregator(dataset) From 41c76fe3b501b7c6f6351234d283bd896f9309ca Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 1 Mar 2024 13:57:12 +0100 Subject: [PATCH 087/108] apply comments --- nncf/quantization/algorithms/bias_correction/onnx_backend.py | 1 - .../quantization/algorithms/fast_bias_correction/onnx_backend.py | 1 - 2 files changed, 2 deletions(-) diff --git a/nncf/quantization/algorithms/bias_correction/onnx_backend.py b/nncf/quantization/algorithms/bias_correction/onnx_backend.py index a075f16aab5..6636d6b9648 100644 --- a/nncf/quantization/algorithms/bias_correction/onnx_backend.py +++ b/nncf/quantization/algorithms/bias_correction/onnx_backend.py @@ -71,7 +71,6 @@ def mean_statistic_collector( num_samples: Optional[int] = None, window_size: Optional[int] = None, ) -> TensorCollector: - inplace = False return get_mean_statistic_collector(num_samples, channel_axis, window_size, inplace) @staticmethod diff --git a/nncf/quantization/algorithms/fast_bias_correction/onnx_backend.py b/nncf/quantization/algorithms/fast_bias_correction/onnx_backend.py index fcbd1e3ee18..f9f2e98d125 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/onnx_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/onnx_backend.py @@ -54,7 +54,6 @@ def mean_statistic_collector( num_samples: Optional[int] = None, window_size: Optional[int] = None, ) -> TensorCollector: - inplace = False return get_mean_statistic_collector(num_samples, channel_axis, window_size, inplace) @staticmethod From 321c65a33352e05b1b1cb3dda488cd4ea48adea4 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 1 Mar 2024 14:30:29 +0100 Subject: [PATCH 088/108] polishing --- nncf/common/quantization/initialization/range.py | 14 ++++++++------ nncf/quantization/algorithms/min_max/algorithm.py | 11 ++++++----- .../algorithms/min_max/onnx_backend.py | 8 ++------ tests/common/test_statistics_aggregator.py | 3 ++- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/nncf/common/quantization/initialization/range.py b/nncf/common/quantization/initialization/range.py index 35bd178f36c..705d12f0e09 100644 --- a/nncf/common/quantization/initialization/range.py +++ b/nncf/common/quantization/initialization/range.py @@ -227,29 +227,31 @@ def _get_reduction_axes( axes_to_keep.update(quantization_axes) return get_reduction_axes(axes_to_keep, shape_to_reduce) - def _get_aggregation_axes(self, is_per_sample: bool) -> Tuple[int]: + def _get_aggregation_axes(self, batchwise_statistics: bool) -> Tuple[int]: """ Returns axes for aggregator. - :param is_per_sample: Whether to aggreagate tensor statistics per batch axis. + :param batchwise_statistics: Determines whether quantizer statistics should be calculated + for each item of the batch or for the entire batch. :return Tuple[int]: Aggregation axes. """ - return (0, 1) if is_per_sample else (0,) + return (0, 1) if batchwise_statistics else (0,) def get_reduction_aggregation_axes( self, shape_to_reduce: Union[Tuple[int], List[int]], quantization_axes: Union[Tuple[int], List[int]], - is_per_sample: bool, + batchwise_statistics: bool, ) -> Tuple[ReductionAxes, AggregationAxes]: """ Calculates the reduction axes, aggregation axes for the tensor. :param shape_to_reduce: Shape of the tensor. :param quantization_axes: Quantization axes if per-channel quantization. - :param is_per_sample: Whether to calculate statistics per-sample (aggregate batch axis) + :param batchwise_statistics: Determines whether quantizer statistics should be calculated + for each item of the batch or for the entire batch. :return: Reduction axes and aggregation axes. """ - aggregation_axes = self._get_aggregation_axes(is_per_sample) + aggregation_axes = self._get_aggregation_axes(batchwise_statistics) reduction_axes = self._get_reduction_axes(shape_to_reduce, quantization_axes, aggregation_axes) return reduction_axes, aggregation_axes diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index 311fcd38056..003685b7c73 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -173,7 +173,7 @@ def __init__( by backend graph operations or by default Python implementation, defaults to True. :param batchwise_statistics: Determines whether quantizer statistics should be calculated - for each item of the batch or for the entire batch, default is None. + for each item of the batch or for the entire batch, default is False. :param activations_quantization_params: Quantization parameters for model activations. :param weights_quantization_params: Quantization parameters for model weights. @@ -402,7 +402,7 @@ def _get_stat_collector( graph: NNCFGraph, target_point: TargetPoint, qconfig: QuantizerConfig, - is_per_sample: bool, + batchwise_statistics: bool, ) -> TensorStatisticCollectorBase: """ Creates and returns a statistic collector based on the quantizer's configuration. @@ -411,7 +411,8 @@ def _get_stat_collector( :param target_point: Target point indicates where statistics should be collected. :param qconfig: Configuration of a quantizer layer, defining the configuration of created statistic collector. - :param is_per_sample: True meaning that statistics is collected per sample. False - per tensor. + :param batchwise_statistics: Determines whether quantizer statistics should be calculated + for each item of the batch or for the entire batch. :return: Statistic Collector. """ is_weight = target_point.is_weight_target_point() @@ -426,7 +427,7 @@ def _get_stat_collector( # Weight statistics is constant, so only one collection is enough. num_samples = self._subset_size if not is_weight else 1 - is_per_sample = is_per_sample and not is_weight + batchwise_statistics = batchwise_statistics and not is_weight collector_params = RangeInitCollectorParams( is_weights=is_weight, scheme=qconfig.mode, per_channel=qconfig.per_channel @@ -434,7 +435,7 @@ def _get_stat_collector( reduction_axes, aggregation_axes = None, None if shape is not None: reduction_axes, aggregation_axes = collector_params.get_reduction_aggregation_axes( - shape, channel_axes, is_per_sample + shape, channel_axes, batchwise_statistics ) return self._backend_entity.get_statistic_collector( diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py index 1b48c4f40d2..2e64551d05b 100644 --- a/nncf/quantization/algorithms/min_max/onnx_backend.py +++ b/nncf/quantization/algorithms/min_max/onnx_backend.py @@ -120,15 +120,11 @@ def create_quantizer_insertion_command( parameters: FakeQuantizeParameters, ): tensor_type = np.int8 if np.any(parameters.input_low.data < 0) else np.uint8 - is_weight = target_point.is_weight_target_point() - if is_weight: + if target_point.is_weight_target_point(): tensor_type = np.int8 # The weight is restricted to have only signed range nncf_input_node_next_nodes = ONNXMinMaxAlgoBackend._get_input_edges_mapping(nncf_graph) node = nncf_graph.get_node_by_name(target_point.target_node_name) - if quantizer_config.per_channel: - axis = ONNXMinMaxAlgoBackend.get_channel_axes(node, target_point) - else: - axis = () + axis = ONNXMinMaxAlgoBackend.get_channel_axes(node, target_point) if quantizer_config.per_channel else () onnx_parameters = convert_fq_params_to_onnx_params(parameters, quantizer_config.num_bits, tensor_type, axis) return ONNXQuantizerInsertionCommand(target_point, nncf_input_node_next_nodes, onnx_parameters) diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py index 0ff1ac227d2..519d653c5e4 100644 --- a/tests/common/test_statistics_aggregator.py +++ b/tests/common/test_statistics_aggregator.py @@ -654,7 +654,7 @@ def create_statistics_point( algo._set_backend_entity(model) nncf_graph = NNCFGraphFactory.create(model) algo._subset_size = subset_size - tensor_collector = algo._get_stat_collector(nncf_graph, target_point, q_config, is_per_sample=False) + tensor_collector = algo._get_stat_collector(nncf_graph, target_point, q_config, False) return StatisticPoint(target_point=target_point, tensor_collector=tensor_collector, algorithm=algorithm_name) @pytest.mark.parametrize( @@ -823,6 +823,7 @@ def test_statistic_merging(self, test_params, key, dataset_samples, inplace_stat StatisticsType.MEAN, StatisticsType.QUANTILE, StatisticsType.ABS_QUANTILE, + "batch_mean", "mean_per_ch", ], ) From f19fd71aea6dfc81f1d697c7db85efe27d762dcc Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 4 Mar 2024 23:49:17 +0100 Subject: [PATCH 089/108] add test --- nncf/onnx/graph/metatypes/onnx_metatypes.py | 4 +- nncf/onnx/graph/node_utils.py | 11 +- .../algorithms/min_max/algorithm.py | 2 +- .../algorithms/min_max/backend.py | 10 +- .../algorithms/min_max/onnx_backend.py | 15 +- .../algorithms/min_max/openvino_backend.py | 8 +- .../algorithms/min_max/torch_backend.py | 10 +- tests/common/quantization/mock_graphs.py | 15 +- tests/onnx/quantization/test_min_max.py | 81 +++++++++ .../native/quantization/test_min_max.py | 81 +++++++++ tests/post_training/test_templates/models.py | 43 ++++- .../test_templates/test_min_max.py | 163 ++++++++++++++++++ tests/torch/ptq/test_min_max.py | 117 +++++++++++++ 13 files changed, 525 insertions(+), 35 deletions(-) create mode 100644 tests/onnx/quantization/test_min_max.py create mode 100644 tests/openvino/native/quantization/test_min_max.py create mode 100644 tests/post_training/test_templates/test_min_max.py create mode 100644 tests/torch/ptq/test_min_max.py diff --git a/nncf/onnx/graph/metatypes/onnx_metatypes.py b/nncf/onnx/graph/metatypes/onnx_metatypes.py index 4902fbf8e59..f16572b28b7 100644 --- a/nncf/onnx/graph/metatypes/onnx_metatypes.py +++ b/nncf/onnx/graph/metatypes/onnx_metatypes.py @@ -113,7 +113,7 @@ class ONNXGemmMetatype(ONNXOpWithWeightsMetatype): name = "GemmOp" op_names = ["Gemm"] hw_config_names = [HWConfigOpName.MATMUL] - weight_channel_axis = -1 + weight_channel_axis = -1 # For port_id=1 weight_port_ids = None bias_port_id = 2 possible_weight_ports = [0, 1] @@ -125,7 +125,7 @@ class ONNXMatMulMetatype(ONNXOpMetatype): name = "MatMulOp" op_names = ["MatMul"] hw_config_names = [HWConfigOpName.MATMUL] - weight_channel_axis = -1 + weight_channel_axis = -1 # For port_id=1 weight_port_ids = None bias_port_id = 2 possible_weight_ports = [0, 1] diff --git a/nncf/onnx/graph/node_utils.py b/nncf/onnx/graph/node_utils.py index e2cd2402c4d..d255a8a94c3 100644 --- a/nncf/onnx/graph/node_utils.py +++ b/nncf/onnx/graph/node_utils.py @@ -127,10 +127,10 @@ def transpose_axis(shape: List[int], axis: int) -> int: Returns transpose axis. :param shape: Tensor shape. - :param axis: Axis before transpose. + :param axis: Axis before transpose (only positive). :return: Axis after transpose. """ - axis %= len(shape) # Make axis positive + assert axis >= 0 return range(len(shape) - 1, -1, -1)[axis] # Iterate backward throug axis @@ -145,6 +145,9 @@ def get_weight_quantization_axis(node: NNCFNode, port_id: int) -> int: weight_channel_axis = node.metatype.weight_channel_axis if node.layer_attributes.has_node_attrs() and node.metatype == om.ONNXGemmMetatype: weight_shape = node.layer_attributes.weight_attrs[port_id]["shape"] + weight_channel_axis %= len(weight_shape) # Make axis positive + if port_id == 0: + weight_channel_axis -= 1 if ( port_id == 0 and node.layer_attributes.node_attrs["transA"] == 1 @@ -157,7 +160,7 @@ def get_weight_quantization_axis(node: NNCFNode, port_id: int) -> int: def _get_activation_tensor_shape( nncf_graph: NNCFGraph, node: NNCFNode, target_point: ONNXTargetPoint -) -> Optional[List[int]]: +) -> Optional[Tuple[int]]: """ Returns shape of an activation tensor which is correspond to the target point and node. ONNX model can not have a shape of a edge, even after shape inference. @@ -193,7 +196,7 @@ def _get_activation_tensor_shape( def get_quantized_tensor_shape( nncf_graph: NNCFGraph, node: NNCFNode, target_point: ONNXTargetPoint -) -> Optional[List[int]]: +) -> Optional[Tuple[int]]: """ Returns quantized tensor shape corresponding to a target point with a node if shape - info is existed. If there is no shape info - returns None. diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index 003685b7c73..81631e388ac 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -422,7 +422,7 @@ def _get_stat_collector( channel_axes = () if qconfig.per_channel: - channel_axes = self._backend_entity.get_channel_axes(node, target_point) + channel_axes = self._backend_entity.get_weight_quantization_axes(node, target_point) if is_weight else (1,) # Weight statistics is constant, so only one collection is enough. num_samples = self._subset_size if not is_weight else 1 diff --git a/nncf/quantization/algorithms/min_max/backend.py b/nncf/quantization/algorithms/min_max/backend.py index 0ee27f12218..a611bfc8ba3 100644 --- a/nncf/quantization/algorithms/min_max/backend.py +++ b/nncf/quantization/algorithms/min_max/backend.py @@ -184,7 +184,7 @@ def unify_statistics(statistics: List[MinMaxTensorStatistic]) -> MinMaxTensorSta @staticmethod @abstractmethod - def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: TargetPoint) -> List[int]: + def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: TargetPoint) -> Tuple[int]: """ Returns shape of a targer point tensor. @@ -196,13 +196,13 @@ def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: @staticmethod @abstractmethod - def get_channel_axes(node: NNCFNode, target_point: TargetPoint) -> Tuple[int]: + def get_weight_quantization_axes(node: NNCFNode, target_point: TargetPoint) -> Tuple[int]: """ - Returns axes for per-channel quantization. + Returns axes for per-channel quantization of weights of the node placed on a input port_id. - :param node: Quantized node. + :param node: Quantized node with the wieght. :param target_point: Corresponding target point. - :return: Axes for per-channel quantization. + :return: Axes for per-channel quantization of weights. """ @staticmethod diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py index 2e64551d05b..5f91f6ebe67 100644 --- a/nncf/quantization/algorithms/min_max/onnx_backend.py +++ b/nncf/quantization/algorithms/min_max/onnx_backend.py @@ -120,11 +120,14 @@ def create_quantizer_insertion_command( parameters: FakeQuantizeParameters, ): tensor_type = np.int8 if np.any(parameters.input_low.data < 0) else np.uint8 - if target_point.is_weight_target_point(): + is_weight = target_point.is_weight_target_point() + if is_weight: tensor_type = np.int8 # The weight is restricted to have only signed range nncf_input_node_next_nodes = ONNXMinMaxAlgoBackend._get_input_edges_mapping(nncf_graph) node = nncf_graph.get_node_by_name(target_point.target_node_name) - axis = ONNXMinMaxAlgoBackend.get_channel_axes(node, target_point) if quantizer_config.per_channel else () + axis = () + if quantizer_config.per_channel: + axis = ONNXMinMaxAlgoBackend.get_weight_quantization_axes(node, target_point) if is_weight else (1,) onnx_parameters = convert_fq_params_to_onnx_params(parameters, quantizer_config.num_bits, tensor_type, axis) return ONNXQuantizerInsertionCommand(target_point, nncf_input_node_next_nodes, onnx_parameters) @@ -152,14 +155,12 @@ def _get_input_edges_mapping(nncf_graph: NNCFGraph): return get_input_edges_mapping(nncf_graph) @staticmethod - def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: ONNXTargetPoint) -> List[int]: + def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: ONNXTargetPoint) -> Tuple[int]: return get_quantized_tensor_shape(nncf_graph, node, target_point) @staticmethod - def get_channel_axes(node: NNCFNode, target_point: ONNXTargetPoint) -> Tuple[int]: - if target_point.is_weight_target_point(): - return (get_weight_quantization_axis(node, target_point.port_id),) - return (1,) + def get_weight_quantization_axes(node: NNCFNode, target_point: ONNXTargetPoint) -> Tuple[int]: + return (get_weight_quantization_axis(node, target_point.port_id),) @staticmethod def get_statistic_collector( diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py index 293f5f0c90f..59ae3539143 100644 --- a/nncf/quantization/algorithms/min_max/openvino_backend.py +++ b/nncf/quantization/algorithms/min_max/openvino_backend.py @@ -138,7 +138,7 @@ def unify_statistics(statistics: List[OVMinMaxTensorStatistic]) -> OVMinMaxTenso return OVMinMaxTensorStatistic(min_values=min_values, max_values=max_values) @staticmethod - def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: OVTargetPoint) -> List[int]: + def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: OVTargetPoint) -> Tuple[int]: if target_point.is_weight_target_point(): return node.layer_attributes.constant_attributes[target_point.port_id]["shape"] if target_point.type == TargetType.PRE_LAYER_OPERATION: @@ -148,10 +148,8 @@ def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: raise NotImplementedError(f"Unsupported target point type {target_point.type}.") @staticmethod - def get_channel_axes(node: NNCFNode, target_point: OVTargetPoint) -> Tuple[int]: - if target_point.is_weight_target_point(): - return get_weight_channel_axes(node) - return (1,) + def get_weight_quantization_axes(node: NNCFNode, target_point: OVTargetPoint) -> Tuple[int]: + return tuple(get_weight_channel_axes(node)) @staticmethod def get_statistic_collector( diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py index dfc351976cb..ddd726850d5 100644 --- a/nncf/quantization/algorithms/min_max/torch_backend.py +++ b/nncf/quantization/algorithms/min_max/torch_backend.py @@ -157,16 +157,14 @@ def unify_statistics(statistics: List[PTMinMaxTensorStatistic]) -> PTMinMaxTenso return PTMinMaxTensorStatistic(min_values=min_values, max_values=max_values) @staticmethod - def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: PTTargetPoint) -> List[int]: + def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: PTTargetPoint) -> Tuple[int]: if target_point.is_weight_target_point(): - return node.layer_attributes.get_weight_shape() + return tuple(node.layer_attributes.get_weight_shape()) return nncf_graph.get_input_shape_for_insertion_point(target_point) @staticmethod - def get_channel_axes(node: NNCFNode, target_point: PTTargetPoint) -> Tuple[int]: - if target_point.is_weight_target_point(): - return (node.layer_attributes.get_target_dim_for_compression(),) - return (1,) + def get_weight_quantization_axes(node: NNCFNode, target_point: PTTargetPoint) -> Tuple[int]: + return (node.layer_attributes.get_target_dim_for_compression(),) @staticmethod def get_statistic_collector( diff --git a/tests/common/quantization/mock_graphs.py b/tests/common/quantization/mock_graphs.py index 7fe44a9836f..b8cb41ee142 100644 --- a/tests/common/quantization/mock_graphs.py +++ b/tests/common/quantization/mock_graphs.py @@ -10,7 +10,7 @@ # limitations under the License. import random -from typing import Dict, List, Optional, Set, Tuple +from typing import Any, Dict, List, Optional, Set, Tuple from unittest.mock import MagicMock import networkx as nx @@ -57,7 +57,9 @@ def __init__( self.layer_attributes = layer_attributes -def create_mock_graph(nodes: List[NodeWithType], node_edges: List[Tuple[str, str]]) -> nx.DiGraph: +def create_mock_graph( + nodes: List[NodeWithType], node_edges: List[Tuple[str, str]], edges_attrs: Optional[Tuple[Any]] = None +) -> nx.DiGraph: mock_graph = nx.DiGraph() for node in nodes: mock_node_attrs = get_mock_nncf_node_attrs( @@ -67,7 +69,11 @@ def create_mock_graph(nodes: List[NodeWithType], node_edges: List[Tuple[str, str layer_attributes=node.layer_attributes, ) mock_graph.add_node(node.node_name, **mock_node_attrs) - mock_graph.add_edges_from(node_edges) + if edges_attrs: + for (edge_from, edge_to), attr in zip(node_edges, edges_attrs): + mock_graph.add_edge(edge_from, edge_to, **attr) + else: + mock_graph.add_edges_from(node_edges) mark_input_ports_lexicographically_based_on_input_node_key(mock_graph) return mock_graph @@ -121,8 +127,9 @@ def get_nncf_graph_from_mock_nx_graph(nx_graph: nx.DiGraph, nncf_graph_cls=NNCFG out_idx, creator_id = edge_vs_output_idx_and_creator_id[in_edge] edge_data = nx_graph.edges[in_edge] dtype = edge_data.get(NNCFGraph.DTYPE_EDGE_ATTR, Dtype.FLOAT) + shape = edge_data.get(NNCFGraph.ACTIVATION_SHAPE_EDGE_ATTR, [1, 1, 1, 1]) mock_graph.add_edge_between_nncf_nodes( - creator_id, node_id, [1, 1, 1, 1], input_port_id=pred_idx, output_port_id=out_idx, dtype=dtype + creator_id, node_id, shape, input_port_id=pred_idx, output_port_id=out_idx, dtype=dtype ) for out_idx, out_edge in enumerate(nx_graph.out_edges(curr_node_key)): diff --git a/tests/onnx/quantization/test_min_max.py b/tests/onnx/quantization/test_min_max.py new file mode 100644 index 00000000000..191b545d6dc --- /dev/null +++ b/tests/onnx/quantization/test_min_max.py @@ -0,0 +1,81 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest + +from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXConvolutionMetatype +from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXDepthwiseConvolutionMetatype +from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXGemmMetatype +from nncf.onnx.graph.nncf_graph_builder import ONNXLayerAttributes +from nncf.onnx.graph.transformations.commands import ONNXTargetPoint +from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend +from nncf.quantization.algorithms.min_max.onnx_backend import ONNXMinMaxAlgoBackend +from tests.post_training.test_templates.models import NNCFGraphToTest +from tests.post_training.test_templates.test_min_max import TemplateTestGetChannelAxes +from tests.post_training.test_templates.test_min_max import TemplateTestGetTargetPointShape +from tests.post_training.test_templates.test_min_max import TemplateTestMinMaxAlgorithm + + +class TestONNXMinMaxAlgorithm(TemplateTestMinMaxAlgorithm): + @property + def backend( + self, + ) -> MinMaxAlgoBackend: + return ONNXMinMaxAlgoBackend + + @property + def conv_metatype( + self, + ): + return ONNXConvolutionMetatype + + def create_target_point(self, target_point_type, name, port_id): + return ONNXTargetPoint(target_point_type, name, port_id) + + +class TestONNXGetTargetPointShape(TemplateTestGetTargetPointShape, TestONNXMinMaxAlgorithm): + def get_nncf_graph(self, weight_port_id, weight_shape): + conv_layer_attrs = ONNXLayerAttributes(weight_attrs={weight_port_id: {"shape": weight_shape}}, bias_attrs={}) + return NNCFGraphToTest(ONNXConvolutionMetatype, conv_layer_attrs).nncf_graph + + +class TestONNXGetChannelAxesMinMaxAlgorithm(TemplateTestGetChannelAxes, TestONNXMinMaxAlgorithm): + @property + def depthwiseconv_metatype(self): + return ONNXDepthwiseConvolutionMetatype + + @property + def matmul_metatype( + self, + ): + return ONNXGemmMetatype + + @staticmethod + def get_conv_node_attrs(weight_port_id, weight_shape): + return ONNXLayerAttributes(weight_attrs={weight_port_id: {"shape": weight_shape}}, bias_attrs={}) + + @staticmethod + def get_depthwiseconv_node_attrs(weight_port_id, weight_shape): + return ONNXLayerAttributes(weight_attrs={weight_port_id: {"shape": weight_shape}}, bias_attrs={}) + + @staticmethod + def get_matmul_node_attrs(weight_port_id, transpose_weight, weight_shape): + weight_attrs = {weight_port_id: {"name": "dummy", "shape": weight_shape}} + if weight_port_id == 0: + gemm_attrs = {"transA": int(transpose_weight), "transB": 0} + elif weight_port_id == 1: + gemm_attrs = {"transA": 0, "transB": int(transpose_weight)} + return ONNXLayerAttributes(weight_attrs=weight_attrs, node_attrs=gemm_attrs) + + def test_get_channel_axes_deptwiseconv_node_ov(self): + pytest.skip("Test is not applied for ONNX backend.") + + def test_get_channel_axes_matmul_torch(self): + pytest.skip("Test is not applied for ONNX backend.") diff --git a/tests/openvino/native/quantization/test_min_max.py b/tests/openvino/native/quantization/test_min_max.py new file mode 100644 index 00000000000..8adeeafba74 --- /dev/null +++ b/tests/openvino/native/quantization/test_min_max.py @@ -0,0 +1,81 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from nncf.openvino.graph.layer_attributes import OVLayerAttributes +from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionMetatype +from nncf.openvino.graph.metatypes.openvino_metatypes import OVDepthwiseConvolutionMetatype +from nncf.openvino.graph.metatypes.openvino_metatypes import OVMatMulMetatype +from nncf.openvino.graph.transformations.commands import OVTargetPoint +from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend +from nncf.quantization.algorithms.min_max.openvino_backend import OVMinMaxAlgoBackend +from tests.post_training.test_templates.models import NNCFGraphToTest +from tests.post_training.test_templates.test_min_max import TemplateTestGetChannelAxes +from tests.post_training.test_templates.test_min_max import TemplateTestGetTargetPointShape +from tests.post_training.test_templates.test_min_max import TemplateTestMinMaxAlgorithm + + +class TestOVMinMaxAlgorithm(TemplateTestMinMaxAlgorithm): + @property + def backend( + self, + ) -> MinMaxAlgoBackend: + return OVMinMaxAlgoBackend + + @property + def conv_metatype( + self, + ): + return OVConvolutionMetatype + + def create_target_point(self, target_point_type, name, port_id): + return OVTargetPoint(target_point_type, name, port_id) + + +class TestOVGetTargetPointShape(TemplateTestGetTargetPointShape, TestOVMinMaxAlgorithm): + def get_nncf_graph(self, weight_port_id, weight_shape): + conv_layer_attrs = OVLayerAttributes({weight_port_id: {"name": "dummy", "shape": weight_shape}}) + return NNCFGraphToTest(OVConvolutionMetatype, conv_layer_attrs).nncf_graph + + +class TestOVGetChannelAxes(TemplateTestGetChannelAxes, TestOVMinMaxAlgorithm): + @property + def depthwiseconv_metatype(self): + return OVDepthwiseConvolutionMetatype + + @property + def matmul_metatype( + self, + ): + return OVMatMulMetatype + + @staticmethod + def get_conv_node_attrs(weight_port_id, weight_shape): + constant_attributes = {weight_port_id: {"name": "dummy", "shape": weight_shape}} + return OVLayerAttributes(constant_attributes, {}, {}) + + @staticmethod + def get_depthwiseconv_node_attrs(weight_port_id, weight_shape): + constant_attributes = {weight_port_id: {"name": "dummy", "shape": weight_shape}} + return OVLayerAttributes(constant_attributes, {}, {}) + + @staticmethod + def get_matmul_node_attrs(weight_port_id, transpose_weight, weight_shape): + constant_attributes = {weight_port_id: {"name": "dummy", "shape": weight_shape}} + constant_attributes[weight_port_id]["transpose"] = transpose_weight + return OVLayerAttributes(constant_attributes, {}, {}) + + def test_get_channel_axes_deptwiseconv_node_onnx_torch(self): + pytest.skip("Test is not applied for OV backend.") + + def test_get_channel_axes_matmul_torch(self): + pytest.skip("Test is not applied for OV backend.") diff --git a/tests/post_training/test_templates/models.py b/tests/post_training/test_templates/models.py index 884e2e0e662..0fbc50762b7 100644 --- a/tests/post_training/test_templates/models.py +++ b/tests/post_training/test_templates/models.py @@ -40,7 +40,14 @@ def __init__( NodeWithType("Output_1", OutputNoopMetatype, layer_attributes=output_layer_attrs), ] node_edges = [("Input_1", "Conv_1"), ("Conv_1", "Output_1")] - original_mock_graph = create_mock_graph(nodes, node_edges) + original_mock_graph = create_mock_graph( + nodes, + node_edges, + ( + {NNCFGraph.ACTIVATION_SHAPE_EDGE_ATTR: (1, 3, 224, 224)}, + {NNCFGraph.ACTIVATION_SHAPE_EDGE_ATTR: (1, 10, 224, 224)}, + ), + ) self.nncf_graph = get_nncf_graph_from_mock_nx_graph(original_mock_graph, nncf_graph_cls) @@ -374,3 +381,37 @@ def __init__( original_mock_graph = create_mock_graph(nodes, edges) self.nncf_graph = get_nncf_graph_from_mock_nx_graph(original_mock_graph, nncf_graph_cls) + + +class NNCFGraphToTestMinMax: + def __init__( + self, + conv_metatype, + sum_metatype, + conv_layer_attrs=None, + nncf_graph_cls=NNCFGraph, + sum_layer_attrs=None, + input_layer_attrs=None, + output_layer_attrs=None, + ): + # Original graph + # Input_1 + # | + # Conv_1 + # | + # Sum_1 + # | + # Output_1 + nodes = [ + NodeWithType("Input_1", InputNoopMetatype, layer_attributes=input_layer_attrs), + NodeWithType("Conv_1", conv_metatype, layer_attributes=conv_layer_attrs), + NodeWithType("Sum_1", sum_metatype, layer_attributes=sum_layer_attrs), + NodeWithType("Output_1", OutputNoopMetatype, layer_attributes=output_layer_attrs), + ] + node_edges = [("Input_1", "Conv_1"), ("Conv_1", "Sum_1"), ("Sum_1", "Output_1")] + original_mock_graph = create_mock_graph(nodes, node_edges) + self.nncf_graph = get_nncf_graph_from_mock_nx_graph(original_mock_graph, nncf_graph_cls) + # Hack output size of the Sum_1 operation + self.nncf_graph._nx_graph.out_edges[("2 /Sum_1_0", "3 /Output_1_0")][ + self.nncf_graph.ACTIVATION_SHAPE_EDGE_ATTR + ] = [1, 1, 1] diff --git a/tests/post_training/test_templates/test_min_max.py b/tests/post_training/test_templates/test_min_max.py new file mode 100644 index 00000000000..182569c9009 --- /dev/null +++ b/tests/post_training/test_templates/test_min_max.py @@ -0,0 +1,163 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from abc import abstractmethod + +import pytest + +from nncf.common.graph.graph import NNCFNode +from nncf.common.graph.transformations.commands import TargetType +from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend + +CONV_WEIGHT_SHAPE = (3, 10, 4, 4) +DEPTHWISECONV_WEIGHT_SHAPE = (5, 10, 20, 7, 7) +MATMUL_WEIGHT_SHAPE = (2, 4) + + +class TemplateTestMinMaxAlgorithm: + @property + @abstractmethod + def backend( + self, + ) -> MinMaxAlgoBackend: + """ + Get backend specific BiasCorrectionAlgoBackend + + :return BiasCorrectionAlgoBackend: Backend specific BiasCorrectionAlgoBackend + """ + + @property + @abstractmethod + def conv_metatype( + self, + ): + """ """ + + @property + @abstractmethod + def create_target_point(self, target_point_type, name, port_id): + """ """ + + +class TemplateTestGetTargetPointShape(TemplateTestMinMaxAlgorithm): + @abstractmethod + def get_nncf_graph(self, weight_port_id, weight_shape): + """ """ + + @pytest.mark.parametrize( + "target_point_type, input_port_id, reference_shape", + ( + (TargetType.PRE_LAYER_OPERATION, 0, (1, 3, 224, 224)), + (TargetType.POST_LAYER_OPERATION, 0, (1, 10, 224, 224)), + (TargetType.OPERATION_WITH_WEIGHTS, 1, (3, 10, 4, 4)), + ), + ) + def test_get_target_point_shape(self, target_point_type, input_port_id, reference_shape): + nncf_graph = self.get_nncf_graph(input_port_id, CONV_WEIGHT_SHAPE) + nodes = nncf_graph.get_nodes_by_metatypes((self.conv_metatype,)) + assert len(nodes) == 1 + node = nodes.pop() + target_point = self.create_target_point(target_point_type, node.node_name, input_port_id) + assert self.backend().get_target_point_shape(nncf_graph, node, target_point) == reference_shape + + +class TemplateTestGetChannelAxes(TemplateTestMinMaxAlgorithm): + @property + @abstractmethod + def depthwiseconv_metatype( + self, + ): + """ """ + + @property + @abstractmethod + def matmul_metatype( + self, + ): + """ """ + + @staticmethod + @abstractmethod + def get_conv_node_attrs(weight_port_id, shape): + """ """ + + @staticmethod + @abstractmethod + def get_matmul_node_attrs(): + """ """ + + @pytest.mark.parametrize( + "conv_shape, weight_port_id, ref_axes", ((CONV_WEIGHT_SHAPE, 0, (0,)), (CONV_WEIGHT_SHAPE, 1, (0,))) + ) + def test_get_channel_axes_conv_node(self, conv_shape, weight_port_id, ref_axes): + """ + Checks Convolution quantization axes in MinMax for OV, ONNX and Torch. + """ + conv_node = NNCFNode({"metatype": self.conv_metatype}) + conv_node.layer_attributes = self.get_conv_node_attrs(weight_port_id, conv_shape) + target_point = self.create_target_point(None, None, weight_port_id) + assert self.backend().get_weight_quantization_axes(conv_node, target_point) == ref_axes + + @pytest.mark.parametrize( + "conv_shape, weight_port_id, ref_axes", + ((DEPTHWISECONV_WEIGHT_SHAPE, 0, (0,)), (DEPTHWISECONV_WEIGHT_SHAPE, 1, (0,))), + ) + def test_get_channel_axes_deptwiseconv_node_onnx_torch(self, conv_shape, weight_port_id, ref_axes): + """ + Checks Depthwise convolution quantization axes in MinMax for ONNX and Torch. + """ + conv_node = NNCFNode({"metatype": self.depthwiseconv_metatype}) + conv_node.layer_attributes = self.get_depthwiseconv_node_attrs(weight_port_id, conv_shape) + target_point = self.create_target_point(None, None, weight_port_id) + assert self.backend().get_weight_quantization_axes(conv_node, target_point) == ref_axes + + @pytest.mark.parametrize( + "conv_shape, weight_port_id, ref_axes", + ((DEPTHWISECONV_WEIGHT_SHAPE, 0, (0, 1)), (DEPTHWISECONV_WEIGHT_SHAPE, 1, (0, 1))), + ) + def test_get_channel_axes_deptwiseconv_node_ov(self, conv_shape, weight_port_id, ref_axes): + """ + Checks Depthwise convolution quantization axes in MinMax for OV. + """ + conv_node = NNCFNode({"metatype": self.depthwiseconv_metatype}) + conv_node.layer_attributes = self.get_depthwiseconv_node_attrs(weight_port_id, conv_shape) + target_point = self.create_target_point(None, None, weight_port_id) + assert self.backend().get_weight_quantization_axes(conv_node, target_point) == ref_axes + + @pytest.mark.parametrize( + "weight_shape, weight_port_id, transpose_weight, ref_axes", + ( + (MATMUL_WEIGHT_SHAPE, 1, False, (1,)), + (MATMUL_WEIGHT_SHAPE, 1, True, (0,)), + (MATMUL_WEIGHT_SHAPE, 0, True, (1,)), + (MATMUL_WEIGHT_SHAPE, 0, False, (0,)), + ), + ) + def test_get_channel_axes_matmul_node_ov_onnx(self, weight_shape, weight_port_id, transpose_weight, ref_axes): + """ + Checks MatMul quantization axes in MinMax for OV and ONNX. + """ + matmul_node = NNCFNode({"metatype": self.matmul_metatype}) + matmul_node.layer_attributes = self.get_matmul_node_attrs(weight_port_id, transpose_weight, weight_shape) + target_point = self.create_target_point(None, None, weight_port_id) + assert self.backend().get_weight_quantization_axes(matmul_node, target_point) == ref_axes + + @pytest.mark.parametrize( + "weight_shape, ref_axes", + # Torch has strict specification - weight has the following layout: [C_OUT, C_IN] + ((MATMUL_WEIGHT_SHAPE, (0,)),), + ) + def test_get_channel_axes_matmul_torch(self, weight_shape, ref_axes): + """ + Checks MatMul quantization axes in MinMax for Torch. + """ + matmul_node = NNCFNode({"metatype": self.matmul_metatype}) + matmul_node.layer_attributes = self.get_matmul_node_attrs(weight_shape) + assert self.backend().get_weight_quantization_axes(matmul_node, "dummy") == ref_axes diff --git a/tests/torch/ptq/test_min_max.py b/tests/torch/ptq/test_min_max.py new file mode 100644 index 00000000000..68e7de0414c --- /dev/null +++ b/tests/torch/ptq/test_min_max.py @@ -0,0 +1,117 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest + +from nncf.common.graph.layer_attributes import ConvolutionLayerAttributes +from nncf.common.graph.layer_attributes import LinearLayerAttributes +from nncf.common.graph.transformations.commands import TargetType +from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend +from nncf.quantization.algorithms.min_max.torch_backend import PTMinMaxAlgoBackend +from nncf.torch.graph.graph import PTNNCFGraph +from nncf.torch.graph.operator_metatypes import PTConv2dMetatype +from nncf.torch.graph.operator_metatypes import PTDepthwiseConv2dSubtype +from nncf.torch.graph.operator_metatypes import PTLinearMetatype +from nncf.torch.graph.transformations.commands import PTTargetPoint +from tests.post_training.test_templates.models import NNCFGraphToTest +from tests.post_training.test_templates.test_min_max import TemplateTestGetChannelAxes +from tests.post_training.test_templates.test_min_max import TemplateTestGetTargetPointShape +from tests.post_training.test_templates.test_min_max import TemplateTestMinMaxAlgorithm + + +class TestTorchMinMaxAlgorithm(TemplateTestMinMaxAlgorithm): + @property + def backend( + self, + ) -> MinMaxAlgoBackend: + return PTMinMaxAlgoBackend + + @property + def conv_metatype( + self, + ): + return PTConv2dMetatype + + def create_target_point(self, target_point_type, name, port_id): + if target_point_type == TargetType.POST_LAYER_OPERATION: + port_id = None + return PTTargetPoint(target_point_type, name, input_port_id=port_id) + + +class TestOVGetTargetPointShape(TemplateTestGetTargetPointShape, TestTorchMinMaxAlgorithm): + def get_nncf_graph(self, weight_port_id, weight_shape): + conv_layer_attrs = ConvolutionLayerAttributes( + weight_requires_grad=True, + in_channels=weight_shape[1], + out_channels=weight_shape[0], + kernel_size=weight_shape[2:], + stride=1, + dilations=1, + groups=1, + transpose=False, + padding_values=[], + ) + return NNCFGraphToTest(PTConv2dMetatype, conv_layer_attrs, PTNNCFGraph).nncf_graph + + +class TestTorchGetChannelAxes(TemplateTestGetChannelAxes, TestTorchMinMaxAlgorithm): + @property + def depthwiseconv_metatype( + self, + ): + return PTDepthwiseConv2dSubtype + + @property + def matmul_metatype( + self, + ): + return PTLinearMetatype + + @staticmethod + def get_conv_node_attrs(weight_port_id, weight_shape): + return ConvolutionLayerAttributes( + weight_requires_grad=False, + in_channels=weight_shape[0], + out_channels=weight_shape[1], + kernel_size=weight_shape[2:], + stride=1, + dilations=1, + groups=1, + transpose=False, + padding_values=[], + ) + + @staticmethod + def get_depthwiseconv_node_attrs(weight_port_id, weight_shape): + return ConvolutionLayerAttributes( + weight_requires_grad=False, + in_channels=weight_shape[1], + out_channels=weight_shape[2], + kernel_size=weight_shape[3:], + stride=1, + dilations=1, + groups=weight_shape[0], + transpose=False, + padding_values=[], + ) + + @staticmethod + def get_matmul_node_attrs(weight_shape): + return LinearLayerAttributes(False, in_features=weight_shape[0], out_features=weight_shape[1]) + + def test_get_channel_axes_matmul_node_ov_onnx( + self, + ): + pytest.skip("Test is not applied for Torch backend.") + + def test_get_channel_axes_deptwiseconv_node_ov( + self, + ): + pytest.skip("Test is not applied for Torch backend.") From 4996333ea257a90196b08d2dcd1a30193af4db11 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 5 Mar 2024 00:02:02 +0100 Subject: [PATCH 090/108] small fixes --- tests/onnx/test_node_utils.py | 6 ++---- .../test_templates/test_min_max.py | 20 +++++++++---------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/tests/onnx/test_node_utils.py b/tests/onnx/test_node_utils.py index 09ebfe7de9f..d73bd002acd 100644 --- a/tests/onnx/test_node_utils.py +++ b/tests/onnx/test_node_utils.py @@ -32,13 +32,11 @@ def test_get_bias_value(model): @pytest.mark.parametrize( "shape, axis, expected_channel_axis", [ - ((1, 3, 5, 5), -1, 0), + ((1, 3, 5, 5), 3, 0), ((1, 3, 5, 5), 1, 2), ((1, 3, 5, 5), 0, 3), ((1, 3, 5, 5), 2, 1), - ((1, 3, 5, 5), -2, 1), - ((1,), -1, 0), - ((1, 1), -1, 0), + ((1,), 0, 0), ((1, 1), 1, 0), ((1, 1), 0, 1), ], diff --git a/tests/post_training/test_templates/test_min_max.py b/tests/post_training/test_templates/test_min_max.py index 182569c9009..268ebf90bae 100644 --- a/tests/post_training/test_templates/test_min_max.py +++ b/tests/post_training/test_templates/test_min_max.py @@ -38,12 +38,12 @@ def backend( def conv_metatype( self, ): - """ """ + pass @property @abstractmethod def create_target_point(self, target_point_type, name, port_id): - """ """ + pass class TemplateTestGetTargetPointShape(TemplateTestMinMaxAlgorithm): @@ -74,24 +74,24 @@ class TemplateTestGetChannelAxes(TemplateTestMinMaxAlgorithm): def depthwiseconv_metatype( self, ): - """ """ + pass @property @abstractmethod def matmul_metatype( self, ): - """ """ + pass @staticmethod @abstractmethod def get_conv_node_attrs(weight_port_id, shape): - """ """ + pass @staticmethod @abstractmethod def get_matmul_node_attrs(): - """ """ + pass @pytest.mark.parametrize( "conv_shape, weight_port_id, ref_axes", ((CONV_WEIGHT_SHAPE, 0, (0,)), (CONV_WEIGHT_SHAPE, 1, (0,))) @@ -102,7 +102,7 @@ def test_get_channel_axes_conv_node(self, conv_shape, weight_port_id, ref_axes): """ conv_node = NNCFNode({"metatype": self.conv_metatype}) conv_node.layer_attributes = self.get_conv_node_attrs(weight_port_id, conv_shape) - target_point = self.create_target_point(None, None, weight_port_id) + target_point = self.create_target_point(TargetType.PRE_LAYER_OPERATION, None, weight_port_id) assert self.backend().get_weight_quantization_axes(conv_node, target_point) == ref_axes @pytest.mark.parametrize( @@ -115,7 +115,7 @@ def test_get_channel_axes_deptwiseconv_node_onnx_torch(self, conv_shape, weight_ """ conv_node = NNCFNode({"metatype": self.depthwiseconv_metatype}) conv_node.layer_attributes = self.get_depthwiseconv_node_attrs(weight_port_id, conv_shape) - target_point = self.create_target_point(None, None, weight_port_id) + target_point = self.create_target_point(TargetType.PRE_LAYER_OPERATION, None, weight_port_id) assert self.backend().get_weight_quantization_axes(conv_node, target_point) == ref_axes @pytest.mark.parametrize( @@ -128,7 +128,7 @@ def test_get_channel_axes_deptwiseconv_node_ov(self, conv_shape, weight_port_id, """ conv_node = NNCFNode({"metatype": self.depthwiseconv_metatype}) conv_node.layer_attributes = self.get_depthwiseconv_node_attrs(weight_port_id, conv_shape) - target_point = self.create_target_point(None, None, weight_port_id) + target_point = self.create_target_point(TargetType.PRE_LAYER_OPERATION, None, weight_port_id) assert self.backend().get_weight_quantization_axes(conv_node, target_point) == ref_axes @pytest.mark.parametrize( @@ -146,7 +146,7 @@ def test_get_channel_axes_matmul_node_ov_onnx(self, weight_shape, weight_port_id """ matmul_node = NNCFNode({"metatype": self.matmul_metatype}) matmul_node.layer_attributes = self.get_matmul_node_attrs(weight_port_id, transpose_weight, weight_shape) - target_point = self.create_target_point(None, None, weight_port_id) + target_point = self.create_target_point(TargetType.PRE_LAYER_OPERATION, None, weight_port_id) assert self.backend().get_weight_quantization_axes(matmul_node, target_point) == ref_axes @pytest.mark.parametrize( From 5e8bce7d4160f9e2a15a5576bcfea47ef652860b Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Tue, 5 Mar 2024 11:14:03 +0100 Subject: [PATCH 091/108] polishing --- tests/post_training/test_templates/models.py | 34 ------------------- .../test_templates/test_min_max.py | 14 +++----- 2 files changed, 4 insertions(+), 44 deletions(-) diff --git a/tests/post_training/test_templates/models.py b/tests/post_training/test_templates/models.py index 0fbc50762b7..b4bbccedb60 100644 --- a/tests/post_training/test_templates/models.py +++ b/tests/post_training/test_templates/models.py @@ -381,37 +381,3 @@ def __init__( original_mock_graph = create_mock_graph(nodes, edges) self.nncf_graph = get_nncf_graph_from_mock_nx_graph(original_mock_graph, nncf_graph_cls) - - -class NNCFGraphToTestMinMax: - def __init__( - self, - conv_metatype, - sum_metatype, - conv_layer_attrs=None, - nncf_graph_cls=NNCFGraph, - sum_layer_attrs=None, - input_layer_attrs=None, - output_layer_attrs=None, - ): - # Original graph - # Input_1 - # | - # Conv_1 - # | - # Sum_1 - # | - # Output_1 - nodes = [ - NodeWithType("Input_1", InputNoopMetatype, layer_attributes=input_layer_attrs), - NodeWithType("Conv_1", conv_metatype, layer_attributes=conv_layer_attrs), - NodeWithType("Sum_1", sum_metatype, layer_attributes=sum_layer_attrs), - NodeWithType("Output_1", OutputNoopMetatype, layer_attributes=output_layer_attrs), - ] - node_edges = [("Input_1", "Conv_1"), ("Conv_1", "Sum_1"), ("Sum_1", "Output_1")] - original_mock_graph = create_mock_graph(nodes, node_edges) - self.nncf_graph = get_nncf_graph_from_mock_nx_graph(original_mock_graph, nncf_graph_cls) - # Hack output size of the Sum_1 operation - self.nncf_graph._nx_graph.out_edges[("2 /Sum_1_0", "3 /Output_1_0")][ - self.nncf_graph.ACTIVATION_SHAPE_EDGE_ATTR - ] = [1, 1, 1] diff --git a/tests/post_training/test_templates/test_min_max.py b/tests/post_training/test_templates/test_min_max.py index 268ebf90bae..df9a867e2d6 100644 --- a/tests/post_training/test_templates/test_min_max.py +++ b/tests/post_training/test_templates/test_min_max.py @@ -35,9 +35,7 @@ def backend( @property @abstractmethod - def conv_metatype( - self, - ): + def conv_metatype(self): pass @property @@ -49,7 +47,7 @@ def create_target_point(self, target_point_type, name, port_id): class TemplateTestGetTargetPointShape(TemplateTestMinMaxAlgorithm): @abstractmethod def get_nncf_graph(self, weight_port_id, weight_shape): - """ """ + pass @pytest.mark.parametrize( "target_point_type, input_port_id, reference_shape", @@ -71,16 +69,12 @@ def test_get_target_point_shape(self, target_point_type, input_port_id, referenc class TemplateTestGetChannelAxes(TemplateTestMinMaxAlgorithm): @property @abstractmethod - def depthwiseconv_metatype( - self, - ): + def depthwiseconv_metatype(self): pass @property @abstractmethod - def matmul_metatype( - self, - ): + def matmul_metatype(self): pass @staticmethod From 9ba570082b3c50330759741672c2a54d906c0fc0 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 6 Mar 2024 13:39:42 +0100 Subject: [PATCH 092/108] conformance adoption for any batch_size; better logging --- .../algorithms/min_max/algorithm.py | 11 +++-- .../pipelines/image_classification_timm.py | 47 +++++++++++++------ 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index 81631e388ac..46592d318c6 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -71,6 +71,12 @@ num_bits=8, mode=QuantizationScheme.SYMMETRIC, signedness_to_force=None, per_channel=False ) +BATCHWISE_STATISTICS_TRANSFORMERS_WARNING = ( + "For transformer-like models, batchwise_statistics option could result in inaccurate statistics. " + "The recommendation is to collect statistics with dataloader " + "having batch_size = 1 and turn off batchwise_statistics option." +) + @dataclasses.dataclass class ModeBasedDefaults: @@ -931,10 +937,7 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin quantization_target_points, _ = self._get_quantization_target_points(model, graph) output = StatisticPointsContainer() if self._model_type == ModelType.TRANSFORMER and self._batchwise_statistics: - nncf_logger.warning( - "For transfomer-like models batch_size > 1 could result in inaccurate statistics. \ - The recomendation is to use batch_size = 1." - ) + nncf_logger.warning(BATCHWISE_STATISTICS_TRANSFORMERS_WARNING) for quantization_target_point, qconfig in quantization_target_points.items(): stat_collector = self._get_stat_collector( graph, quantization_target_point, qconfig, self._batchwise_statistics diff --git a/tests/post_training/pipelines/image_classification_timm.py b/tests/post_training/pipelines/image_classification_timm.py index cf776a23003..cf095260e17 100644 --- a/tests/post_training/pipelines/image_classification_timm.py +++ b/tests/post_training/pipelines/image_classification_timm.py @@ -36,6 +36,16 @@ # Disable using aten::scaled_dot_product_attention set_fused_attn(False, False) +BATCH_SIZE_NOT_A_DIVISOR_MESSAGE = ( + "The model validation will be done with batch_size=1 because the provided batch_size value " + "is not a divisor of the length of the validation dataset. The compressed model also " + "will be reshaped to a shape with batch_size=1." +) +BATCH_SIZE_OPTION_RECOMMENDATION_MESSAGE = ( + "To avoid model reshaping, please, provide the --batch_size option which " + "is a divisor of the length of the validation dataset." +) + class ImageClassificationTimm(PTQTestPipeline): """Pipeline for Image Classification model from timm repository""" @@ -118,24 +128,33 @@ def prepare_calibration_dataset(self): def _validate(self): val_dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform) + dataset_size = len(val_dataset) + core = ov.Core() + ov_model = core.read_model(self.path_compressed_ir) + compiled_model = core.compile_model(ov_model) + if dataset_size % self.batch_size != 0: + print(BATCH_SIZE_NOT_A_DIVISOR_MESSAGE) + self.batch_size = 1 + try: + ov_model.reshape([self.batch_size, *self.input_size[1:]]) + except Exception as e: + print( + ( + f"During model reshaping the following error occurred: {os.linesep} {e} {os.linesep}" + f"{BATCH_SIZE_OPTION_RECOMMENDATION_MESSAGE}" + ) + ) + exit() val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=self.batch_size, num_workers=2, shuffle=False) - - dataset_size = len(val_loader) - # Initialize result tensors for async inference support. - predictions = np.zeros((dataset_size * self.batch_size)) - references = -1 * np.ones((dataset_size * self.batch_size)) - - core = ov.Core() + predictions = np.zeros((dataset_size)) + references = -1 * np.ones((dataset_size)) if os.environ.get("CPU_THREADS_NUM"): # Set CPU_THREADS_NUM for OpenVINO inference cpu_threads_num = os.environ.get("CPU_THREADS_NUM") core.set_property("CPU", properties={"CPU_THREADS_NUM": str(cpu_threads_num)}) - ov_model = core.read_model(self.path_compressed_ir) - compiled_model = core.compile_model(ov_model) - jobs = int(os.environ.get("NUM_VAL_THREADS", DEFAULT_VAL_THREADS)) infer_queue = ov.AsyncInferQueue(compiled_model, jobs) @@ -144,9 +163,8 @@ def _validate(self): def process_result(request, userdata): output_data = request.get_output_tensor().data predicted_label = np.argmax(output_data, axis=1) - for j in range(self.batch_size): - predictions[userdata * self.batch_size + j] = predicted_label[j] - pbar.progress.update(pbar.task, advance=1) + predictions[userdata * self.batch_size : (userdata + 1) * self.batch_size] = predicted_label + pbar.progress.update(pbar.task, advance=self.batch_size) infer_queue.set_callback(process_result) @@ -154,8 +172,7 @@ def process_result(request, userdata): # W/A for memory leaks when using torch DataLoader and OpenVINO image_copies = copy.deepcopy(images.numpy()) infer_queue.start_async(image_copies, userdata=i) - for j in range(self.batch_size): - references[i * self.batch_size + j] = target[j] + references[i * self.batch_size : (i + 1) * self.batch_size] = target infer_queue.wait_all() From a0f5fe9e02e2e4db50814d571f6c2f41b0eb7e9a Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 6 Mar 2024 17:02:29 +0100 Subject: [PATCH 093/108] add dynamic_batch_shape option to conformance --- tests/post_training/conftest.py | 1 + tests/post_training/pipelines/base.py | 2 + .../pipelines/causal_language_model.py | 2 + .../pipelines/image_classification_timm.py | 44 ++++++++----------- .../pipelines/lm_weight_compression.py | 5 +++ .../pipelines/masked_language_modeling.py | 2 + .../test_quantize_conformance.py | 9 ++++ 7 files changed, 40 insertions(+), 25 deletions(-) diff --git a/tests/post_training/conftest.py b/tests/post_training/conftest.py index d860ffee584..6478eae8c1b 100644 --- a/tests/post_training/conftest.py +++ b/tests/post_training/conftest.py @@ -20,6 +20,7 @@ def pytest_addoption(parser): parser.addoption("--output", action="store", default="./tmp/", help="Directory to store artifacts") parser.addoption("--no-eval", action="store_true", help="Skip validation step") parser.addoption("--batch_size", action="store", default=1, type=int, help="Batch size of calibration dataset") + parser.addoption("--dynamic_batch_shape", action="store_true", help="Export model with dynamic batch axis") parser.addoption("--subset-size", type=int, default=None, help="Set subset size") parser.addoption("--fp32", action="store_true", help="Test original model") parser.addoption("--cuda", action="store_true", help="Enable CUDA_TORCH backend") diff --git a/tests/post_training/pipelines/base.py b/tests/post_training/pipelines/base.py index 5349e9e4126..461da0c5a07 100644 --- a/tests/post_training/pipelines/base.py +++ b/tests/post_training/pipelines/base.py @@ -183,6 +183,7 @@ def __init__( reference_data: dict, no_eval: bool, run_benchmark_app: bool, + dynamic_batch_shape: bool, params: dict = None, batch_size: int = 1, ) -> None: @@ -195,6 +196,7 @@ def __init__( self.reference_data = reference_data self.params = params or {} self.batch_size = batch_size + self.dynamic_batch_shape = dynamic_batch_shape self.no_eval = no_eval self.run_benchmark_app = run_benchmark_app self.output_model_dir: Path = self.output_dir / self.reported_name / self.backend.value diff --git a/tests/post_training/pipelines/causal_language_model.py b/tests/post_training/pipelines/causal_language_model.py index 84a011dd137..8dc27c7a8fb 100644 --- a/tests/post_training/pipelines/causal_language_model.py +++ b/tests/post_training/pipelines/causal_language_model.py @@ -24,6 +24,8 @@ class CausalLMHF(PTQTestPipeline): """Pipeline for causal language models from Hugging Face repository""" def prepare_model(self) -> None: + if self.dynamic_batch_shape: + raise ValueError("The model does not support export with dynamic input shape") if self.backend in OV_BACKENDS + [BackendType.FP32]: self.model_hf = OVModelForCausalLM.from_pretrained(self.model_id, export=True, compile=False) self.model = self.model_hf.model diff --git a/tests/post_training/pipelines/image_classification_timm.py b/tests/post_training/pipelines/image_classification_timm.py index cf095260e17..c9a386d369d 100644 --- a/tests/post_training/pipelines/image_classification_timm.py +++ b/tests/post_training/pipelines/image_classification_timm.py @@ -36,16 +36,6 @@ # Disable using aten::scaled_dot_product_attention set_fused_attn(False, False) -BATCH_SIZE_NOT_A_DIVISOR_MESSAGE = ( - "The model validation will be done with batch_size=1 because the provided batch_size value " - "is not a divisor of the length of the validation dataset. The compressed model also " - "will be reshaped to a shape with batch_size=1." -) -BATCH_SIZE_OPTION_RECOMMENDATION_MESSAGE = ( - "To avoid model reshaping, please, provide the --batch_size option which " - "is a divisor of the length of the validation dataset." -) - class ImageClassificationTimm(PTQTestPipeline): """Pipeline for Image Classification model from timm repository""" @@ -57,13 +47,21 @@ def prepare_model(self) -> None: self.model_cfg = timm_model.default_cfg self.input_size = [self.batch_size] + list(timm_model.default_cfg["input_size"]) self.dummy_tensor = torch.rand(self.input_size) + if self.dynamic_batch_shape: + self.input_size[0] = -1 if self.backend in PT_BACKENDS: self.model = timm_model if self.backend == BackendType.ONNX: onnx_path = self.fp32_model_dir / "model_fp32.onnx" - torch.onnx.export(timm_model, self.dummy_tensor, onnx_path, export_params=True, opset_version=13) + additional_kwargs = {} + if self.dynamic_batch_shape: + additional_kwargs["input_names"] = ["image"] + additional_kwargs["dynamic_axes"] = {"image": {0: "batch"}} + torch.onnx.export( + timm_model, self.dummy_tensor, onnx_path, export_params=True, opset_version=13, **additional_kwargs + ) self.model = onnx.load(onnx_path) self.input_name = self.model.graph.input[0].name @@ -128,24 +126,20 @@ def prepare_calibration_dataset(self): def _validate(self): val_dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform) + val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=self.batch_size, num_workers=2, shuffle=False) dataset_size = len(val_dataset) + if dataset_size % self.batch_size != 0 and not self.dynamic_batch_shape: + raise ValueError( + ( + "Because the batch_size is not a divisor of the length of the dataset, " + "the one of the data tensors has a shape incompatible with static model input. " + "Use --dynamic_batch_shape option to export such model with dynamic shape." + ) + ) + core = ov.Core() ov_model = core.read_model(self.path_compressed_ir) compiled_model = core.compile_model(ov_model) - if dataset_size % self.batch_size != 0: - print(BATCH_SIZE_NOT_A_DIVISOR_MESSAGE) - self.batch_size = 1 - try: - ov_model.reshape([self.batch_size, *self.input_size[1:]]) - except Exception as e: - print( - ( - f"During model reshaping the following error occurred: {os.linesep} {e} {os.linesep}" - f"{BATCH_SIZE_OPTION_RECOMMENDATION_MESSAGE}" - ) - ) - exit() - val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=self.batch_size, num_workers=2, shuffle=False) # Initialize result tensors for async inference support. predictions = np.zeros((dataset_size)) references = -1 * np.ones((dataset_size)) diff --git a/tests/post_training/pipelines/lm_weight_compression.py b/tests/post_training/pipelines/lm_weight_compression.py index 1d69967bce1..bb950172e7c 100644 --- a/tests/post_training/pipelines/lm_weight_compression.py +++ b/tests/post_training/pipelines/lm_weight_compression.py @@ -71,6 +71,8 @@ class LMWeightCompression(BaseTestPipeline): OV_MODEL_NAME = "openvino_model.xml" def prepare_model(self) -> None: + if self.dynamic_batch_shape: + raise ValueError("The model does not support export with dynamic input shape") is_stateful = self.params.get("is_stateful", False) if is_stateful: self.fp32_model_dir = self.fp32_model_dir.parent / (self.fp32_model_dir.name + "_sf") @@ -129,6 +131,9 @@ def transform_fn(data): return transform_fn def prepare_calibration_dataset(self): + if self.batch_size > 1: + print("Batch size > 1 is not supported for causal language models. Batch size = 1 is set.") + self.batch_size = 1 dataset = load_dataset("wikitext", "wikitext-2-v1", split="train", revision="b08601e") dataset = dataset.filter(lambda example: len(example["text"]) > 80) self.calibration_dataset = nncf.Dataset(dataset, self.get_transform_calibration_fn()) diff --git a/tests/post_training/pipelines/masked_language_modeling.py b/tests/post_training/pipelines/masked_language_modeling.py index 76cf3206f12..c268dee79ab 100644 --- a/tests/post_training/pipelines/masked_language_modeling.py +++ b/tests/post_training/pipelines/masked_language_modeling.py @@ -86,6 +86,8 @@ def transform_func(data): return transform_func def prepare_calibration_dataset(self): + if self.dynamic_batch_shape: + raise ValueError("The model does not support export with dynamic input shape") if self.batch_size > 1: print("Batch size > 1 is not supported for masked language models. Batch size = 1 is set.") self.batch_size = 1 diff --git a/tests/post_training/test_quantize_conformance.py b/tests/post_training/test_quantize_conformance.py index e2472f76af1..6840930fc48 100644 --- a/tests/post_training/test_quantize_conformance.py +++ b/tests/post_training/test_quantize_conformance.py @@ -50,6 +50,11 @@ def fixture_batch_size(pytestconfig): return pytestconfig.getoption("batch_size") +@pytest.fixture(scope="session", name="dynamic_batch_shape") +def fixture_dynamic_batch_shape(pytestconfig): + return pytestconfig.getoption("dynamic_batch_shape") + + @pytest.fixture(scope="session", name="subset_size") def fixture_subset_size(pytestconfig): return pytestconfig.getoption("subset_size") @@ -202,6 +207,7 @@ def test_ptq_quantization( ptq_result_data: Dict[str, RunInfo], no_eval: bool, batch_size: int, + dynamic_batch_shape: bool, run_fp32_backend: bool, run_torch_cuda_backend: bool, subset_size: Optional[int], @@ -231,6 +237,7 @@ def test_ptq_quantization( "no_eval": no_eval, "run_benchmark_app": run_benchmark_app, "batch_size": batch_size, + "dynamic_batch_shape": dynamic_batch_shape, } ) pipeline: BaseTestPipeline = pipeline_cls(**pipeline_kwargs) @@ -269,6 +276,7 @@ def test_weight_compression( wc_result_data: Dict[str, RunInfo], no_eval: bool, batch_size: int, + dynamic_batch_shape: bool, run_fp32_backend: bool, run_torch_cuda_backend: bool, subset_size: Optional[int], @@ -294,6 +302,7 @@ def test_weight_compression( "no_eval": no_eval, "run_benchmark_app": run_benchmark_app, "batch_size": batch_size, + "dynamic_batch_shape": dynamic_batch_shape, } ) pipeline: BaseTestPipeline = pipeline_cls(**pipeline_kwargs) From deb0b51a3cd66d56f7edfba3a7b907685e06db0b Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 7 Mar 2024 14:36:14 +0100 Subject: [PATCH 094/108] polishing test --- tests/onnx/quantization/test_min_max.py | 30 ++++++------- .../native/quantization/test_min_max.py | 31 +++++++------ .../test_templates/test_min_max.py | 43 ++++++++++++------- tests/torch/ptq/test_min_max.py | 37 ++++++---------- 4 files changed, 71 insertions(+), 70 deletions(-) diff --git a/tests/onnx/quantization/test_min_max.py b/tests/onnx/quantization/test_min_max.py index 191b545d6dc..75ce070bef4 100644 --- a/tests/onnx/quantization/test_min_max.py +++ b/tests/onnx/quantization/test_min_max.py @@ -8,8 +8,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Tuple + import pytest +from nncf.common.graph.graph import NNCFGraph +from nncf.common.graph.transformations.commands import TargetType from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXConvolutionMetatype from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXDepthwiseConvolutionMetatype from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXGemmMetatype @@ -25,23 +29,19 @@ class TestONNXMinMaxAlgorithm(TemplateTestMinMaxAlgorithm): @property - def backend( - self, - ) -> MinMaxAlgoBackend: + def backend(self) -> MinMaxAlgoBackend: return ONNXMinMaxAlgoBackend @property - def conv_metatype( - self, - ): + def conv_metatype(self): return ONNXConvolutionMetatype - def create_target_point(self, target_point_type, name, port_id): + def create_target_point(self, target_point_type: TargetType, name: str, port_id: int) -> ONNXTargetPoint: return ONNXTargetPoint(target_point_type, name, port_id) class TestONNXGetTargetPointShape(TemplateTestGetTargetPointShape, TestONNXMinMaxAlgorithm): - def get_nncf_graph(self, weight_port_id, weight_shape): + def get_nncf_graph(self, weight_port_id: int, weight_shape: Tuple[int]) -> NNCFGraph: conv_layer_attrs = ONNXLayerAttributes(weight_attrs={weight_port_id: {"shape": weight_shape}}, bias_attrs={}) return NNCFGraphToTest(ONNXConvolutionMetatype, conv_layer_attrs).nncf_graph @@ -52,21 +52,21 @@ def depthwiseconv_metatype(self): return ONNXDepthwiseConvolutionMetatype @property - def matmul_metatype( - self, - ): + def matmul_metatype(self): return ONNXGemmMetatype @staticmethod - def get_conv_node_attrs(weight_port_id, weight_shape): + def get_conv_node_attrs(weight_port_id: int, weight_shape: Tuple[int]) -> ONNXLayerAttributes: return ONNXLayerAttributes(weight_attrs={weight_port_id: {"shape": weight_shape}}, bias_attrs={}) @staticmethod - def get_depthwiseconv_node_attrs(weight_port_id, weight_shape): - return ONNXLayerAttributes(weight_attrs={weight_port_id: {"shape": weight_shape}}, bias_attrs={}) + def get_depthwiseconv_node_attrs(weight_port_id: int, weight_shape: Tuple[int]) -> ONNXLayerAttributes: + return TestONNXGetChannelAxesMinMaxAlgorithm.get_conv_node_attrs(weight_port_id, weight_shape) @staticmethod - def get_matmul_node_attrs(weight_port_id, transpose_weight, weight_shape): + def get_matmul_node_attrs( + weight_port_id: int, transpose_weight: Tuple[int], weight_shape: Tuple[int] + ) -> ONNXLayerAttributes: weight_attrs = {weight_port_id: {"name": "dummy", "shape": weight_shape}} if weight_port_id == 0: gemm_attrs = {"transA": int(transpose_weight), "transB": 0} diff --git a/tests/openvino/native/quantization/test_min_max.py b/tests/openvino/native/quantization/test_min_max.py index 8adeeafba74..6873a05dd45 100644 --- a/tests/openvino/native/quantization/test_min_max.py +++ b/tests/openvino/native/quantization/test_min_max.py @@ -9,8 +9,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Tuple + import pytest +from nncf.common.graph.graph import NNCFGraph +from nncf.common.graph.transformations.commands import TargetType from nncf.openvino.graph.layer_attributes import OVLayerAttributes from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVDepthwiseConvolutionMetatype @@ -26,23 +30,19 @@ class TestOVMinMaxAlgorithm(TemplateTestMinMaxAlgorithm): @property - def backend( - self, - ) -> MinMaxAlgoBackend: + def backend(self) -> MinMaxAlgoBackend: return OVMinMaxAlgoBackend @property - def conv_metatype( - self, - ): + def conv_metatype(self): return OVConvolutionMetatype - def create_target_point(self, target_point_type, name, port_id): + def create_target_point(self, target_point_type: TargetType, name: str, port_id: int) -> OVTargetPoint: return OVTargetPoint(target_point_type, name, port_id) class TestOVGetTargetPointShape(TemplateTestGetTargetPointShape, TestOVMinMaxAlgorithm): - def get_nncf_graph(self, weight_port_id, weight_shape): + def get_nncf_graph(self, weight_port_id: int, weight_shape: Tuple[int]) -> NNCFGraph: conv_layer_attrs = OVLayerAttributes({weight_port_id: {"name": "dummy", "shape": weight_shape}}) return NNCFGraphToTest(OVConvolutionMetatype, conv_layer_attrs).nncf_graph @@ -53,23 +53,22 @@ def depthwiseconv_metatype(self): return OVDepthwiseConvolutionMetatype @property - def matmul_metatype( - self, - ): + def matmul_metatype(self): return OVMatMulMetatype @staticmethod - def get_conv_node_attrs(weight_port_id, weight_shape): + def get_conv_node_attrs(weight_port_id: int, weight_shape: Tuple[int]) -> OVLayerAttributes: constant_attributes = {weight_port_id: {"name": "dummy", "shape": weight_shape}} return OVLayerAttributes(constant_attributes, {}, {}) @staticmethod - def get_depthwiseconv_node_attrs(weight_port_id, weight_shape): - constant_attributes = {weight_port_id: {"name": "dummy", "shape": weight_shape}} - return OVLayerAttributes(constant_attributes, {}, {}) + def get_depthwiseconv_node_attrs(weight_port_id: int, weight_shape: Tuple[int]) -> OVLayerAttributes: + return TestOVGetChannelAxes.get_conv_node_attrs(weight_port_id, weight_shape) @staticmethod - def get_matmul_node_attrs(weight_port_id, transpose_weight, weight_shape): + def get_matmul_node_attrs( + weight_port_id: int, transpose_weight: Tuple[int], weight_shape: Tuple[int] + ) -> OVLayerAttributes: constant_attributes = {weight_port_id: {"name": "dummy", "shape": weight_shape}} constant_attributes[weight_port_id]["transpose"] = transpose_weight return OVLayerAttributes(constant_attributes, {}, {}) diff --git a/tests/post_training/test_templates/test_min_max.py b/tests/post_training/test_templates/test_min_max.py index df9a867e2d6..188296511b6 100644 --- a/tests/post_training/test_templates/test_min_max.py +++ b/tests/post_training/test_templates/test_min_max.py @@ -9,10 +9,14 @@ # See the License for the specific language governing permissions and # limitations under the License. from abc import abstractmethod +from typing import Tuple import pytest +from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.graph import NNCFNode +from nncf.common.graph.layer_attributes import BaseLayerAttributes +from nncf.common.graph.transformations.commands import TargetPoint from nncf.common.graph.transformations.commands import TargetType from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend @@ -24,9 +28,7 @@ class TemplateTestMinMaxAlgorithm: @property @abstractmethod - def backend( - self, - ) -> MinMaxAlgoBackend: + def backend(self) -> MinMaxAlgoBackend: """ Get backend specific BiasCorrectionAlgoBackend @@ -36,18 +38,18 @@ def backend( @property @abstractmethod def conv_metatype(self): - pass + "Backend specific Convolution metatype." @property @abstractmethod - def create_target_point(self, target_point_type, name, port_id): - pass + def create_target_point(self, target_point_type: TargetType, name: str, port_id: int) -> TargetPoint: + "Creates backend specific TargetPoint." class TemplateTestGetTargetPointShape(TemplateTestMinMaxAlgorithm): @abstractmethod - def get_nncf_graph(self, weight_port_id, weight_shape): - pass + def get_nncf_graph(self, weight_port_id: int, weight_shape: Tuple[int]) -> NNCFGraph: + "Returns backend specific NNCFGraph having a single Convloution." @pytest.mark.parametrize( "target_point_type, input_port_id, reference_shape", @@ -57,7 +59,9 @@ def get_nncf_graph(self, weight_port_id, weight_shape): (TargetType.OPERATION_WITH_WEIGHTS, 1, (3, 10, 4, 4)), ), ) - def test_get_target_point_shape(self, target_point_type, input_port_id, reference_shape): + def test_get_target_point_shape( + self, target_point_type: TargetType, input_port_id: int, reference_shape: Tuple[int] + ): nncf_graph = self.get_nncf_graph(input_port_id, CONV_WEIGHT_SHAPE) nodes = nncf_graph.get_nodes_by_metatypes((self.conv_metatype,)) assert len(nodes) == 1 @@ -70,22 +74,29 @@ class TemplateTestGetChannelAxes(TemplateTestMinMaxAlgorithm): @property @abstractmethod def depthwiseconv_metatype(self): - pass + "Backend specific Depthwise convolution metatype." @property @abstractmethod def matmul_metatype(self): - pass + "Backend specific MatMul metatype." @staticmethod @abstractmethod - def get_conv_node_attrs(weight_port_id, shape): - pass + def get_conv_node_attrs(weight_port_id: int, weight_shape: Tuple[int]) -> BaseLayerAttributes: + "Returns backend specific layer attributes for Convolution." @staticmethod @abstractmethod - def get_matmul_node_attrs(): - pass + def get_depthwiseconv_node_attrs(weight_port_id: int, weight_shape: Tuple[int]) -> BaseLayerAttributes: + "Returns backend specific layer attributes for Convolution." + + @staticmethod + @abstractmethod + def get_matmul_node_attrs( + weight_port_id: int, transpose_weight: Tuple[int], weight_shape: Tuple[int] + ) -> BaseLayerAttributes: + "Returns backend specific layer attributes for MatMul." @pytest.mark.parametrize( "conv_shape, weight_port_id, ref_axes", ((CONV_WEIGHT_SHAPE, 0, (0,)), (CONV_WEIGHT_SHAPE, 1, (0,))) @@ -153,5 +164,5 @@ def test_get_channel_axes_matmul_torch(self, weight_shape, ref_axes): Checks MatMul quantization axes in MinMax for Torch. """ matmul_node = NNCFNode({"metatype": self.matmul_metatype}) - matmul_node.layer_attributes = self.get_matmul_node_attrs(weight_shape) + matmul_node.layer_attributes = self.get_matmul_node_attrs(None, None, weight_shape) assert self.backend().get_weight_quantization_axes(matmul_node, "dummy") == ref_axes diff --git a/tests/torch/ptq/test_min_max.py b/tests/torch/ptq/test_min_max.py index 68e7de0414c..c57c82be429 100644 --- a/tests/torch/ptq/test_min_max.py +++ b/tests/torch/ptq/test_min_max.py @@ -8,8 +8,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Tuple + import pytest +from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.layer_attributes import ConvolutionLayerAttributes from nncf.common.graph.layer_attributes import LinearLayerAttributes from nncf.common.graph.transformations.commands import TargetType @@ -28,25 +31,21 @@ class TestTorchMinMaxAlgorithm(TemplateTestMinMaxAlgorithm): @property - def backend( - self, - ) -> MinMaxAlgoBackend: + def backend(self) -> MinMaxAlgoBackend: return PTMinMaxAlgoBackend @property - def conv_metatype( - self, - ): + def conv_metatype(self): return PTConv2dMetatype - def create_target_point(self, target_point_type, name, port_id): + def create_target_point(self, target_point_type: TargetType, name: str, port_id: int) -> PTTargetPoint: if target_point_type == TargetType.POST_LAYER_OPERATION: port_id = None return PTTargetPoint(target_point_type, name, input_port_id=port_id) class TestOVGetTargetPointShape(TemplateTestGetTargetPointShape, TestTorchMinMaxAlgorithm): - def get_nncf_graph(self, weight_port_id, weight_shape): + def get_nncf_graph(self, weight_port_id: int, weight_shape: Tuple[int]) -> NNCFGraph: conv_layer_attrs = ConvolutionLayerAttributes( weight_requires_grad=True, in_channels=weight_shape[1], @@ -63,19 +62,15 @@ def get_nncf_graph(self, weight_port_id, weight_shape): class TestTorchGetChannelAxes(TemplateTestGetChannelAxes, TestTorchMinMaxAlgorithm): @property - def depthwiseconv_metatype( - self, - ): + def depthwiseconv_metatype(self): return PTDepthwiseConv2dSubtype @property - def matmul_metatype( - self, - ): + def matmul_metatype(self): return PTLinearMetatype @staticmethod - def get_conv_node_attrs(weight_port_id, weight_shape): + def get_conv_node_attrs(weight_port_id: int, weight_shape: Tuple[int]) -> ConvolutionLayerAttributes: return ConvolutionLayerAttributes( weight_requires_grad=False, in_channels=weight_shape[0], @@ -89,7 +84,7 @@ def get_conv_node_attrs(weight_port_id, weight_shape): ) @staticmethod - def get_depthwiseconv_node_attrs(weight_port_id, weight_shape): + def get_depthwiseconv_node_attrs(weight_port_id: int, weight_shape: Tuple[int]) -> ConvolutionLayerAttributes: return ConvolutionLayerAttributes( weight_requires_grad=False, in_channels=weight_shape[1], @@ -103,15 +98,11 @@ def get_depthwiseconv_node_attrs(weight_port_id, weight_shape): ) @staticmethod - def get_matmul_node_attrs(weight_shape): + def get_matmul_node_attrs(weight_port_id: int, transpose_weight: Tuple[int], weight_shape: Tuple[int]): return LinearLayerAttributes(False, in_features=weight_shape[0], out_features=weight_shape[1]) - def test_get_channel_axes_matmul_node_ov_onnx( - self, - ): + def test_get_channel_axes_matmul_node_ov_onnx(self): pytest.skip("Test is not applied for Torch backend.") - def test_get_channel_axes_deptwiseconv_node_ov( - self, - ): + def test_get_channel_axes_deptwiseconv_node_ov(self): pytest.skip("Test is not applied for Torch backend.") From 64cbd99107f8501c9a800bd702b32c0b8bedb100 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 7 Mar 2024 14:51:29 +0100 Subject: [PATCH 095/108] fix calibrate.py --- tests/openvino/tools/calibrate.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/openvino/tools/calibrate.py b/tests/openvino/tools/calibrate.py index b161b5f7989..49a601ffc3d 100644 --- a/tests/openvino/tools/calibrate.py +++ b/tests/openvino/tools/calibrate.py @@ -1055,9 +1055,11 @@ def update_nncf_algorithms_config(nncf_algorithms_config: Dict[str, Dict[str, An :param nncf_algorithms_config: Configuration file of an algorithm. :param batch_size: Batch size value. """ - subset_size = nncf_algorithms_config.get("subset_size", 300) - nncf_algorithms_config["subset_size"] = subset_size // batch_size - print(f"Updated subset_size value to {nncf_algorithms_config['subset_size']}") + for nncf_method, config in nncf_algorithms_config.items(): + subset_size = config.get("subset_size", 300) + new_subset_size = subset_size // batch_size + config["subset_size"] = new_subset_size + print(f"Updated subset_size value for {nncf_method} method to {new_subset_size} ") def main(): @@ -1070,6 +1072,7 @@ def main(): xml_path, bin_path = get_model_paths(config.model) accuracy_checker_config = get_accuracy_checker_config(config.engine) nncf_algorithms_config = get_nncf_algorithms_config(config.compression, args.output_dir) + assert args.batch_size >= 0 if args.batch_size > 1: update_accuracy_checker_config(accuracy_checker_config, args.batch_size) update_nncf_algorithms_config(nncf_algorithms_config, args.batch_size) From 611951178555156ffea222c4f67de251c0d5ab7e Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Thu, 7 Mar 2024 14:56:35 +0100 Subject: [PATCH 096/108] new polishing --- nncf/quantization/algorithms/post_training/algorithm.py | 1 + tests/post_training/test_templates/test_ptq_params.py | 1 - tests/post_training/test_templates/test_quantizer_config.py | 4 ++-- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nncf/quantization/algorithms/post_training/algorithm.py b/nncf/quantization/algorithms/post_training/algorithm.py index 304848a99fa..862dc5d5037 100644 --- a/nncf/quantization/algorithms/post_training/algorithm.py +++ b/nncf/quantization/algorithms/post_training/algorithm.py @@ -104,6 +104,7 @@ def apply( "A dataset is required for the post-training quantization " "algorithm to collect statistics for intermediate models." ) + step_index_to_statistics = None if statistic_points: step_index_to_statistics = {0: statistic_points} diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py index 326560d97f5..c7a0e45799c 100644 --- a/tests/post_training/test_templates/test_ptq_params.py +++ b/tests/post_training/test_templates/test_ptq_params.py @@ -140,7 +140,6 @@ def test_range_estimator_per_tensor(self, test_params, range_estimator_params): assert min_max_algo._range_estimator_params[QuantizerGroup.ACTIVATIONS] == range_estimator_params params = test_params["test_range_estimator_per_tensor"] - stat_points = min_max_algo.get_statistic_points(params["model"], params["nncf_graph"]) assert len(stat_points) == params["stat_points_num"] diff --git a/tests/post_training/test_templates/test_quantizer_config.py b/tests/post_training/test_templates/test_quantizer_config.py index 84b4bc37830..f369b6fcd7b 100644 --- a/tests/post_training/test_templates/test_quantizer_config.py +++ b/tests/post_training/test_templates/test_quantizer_config.py @@ -82,7 +82,7 @@ def conv_sum_aggregation_nncf_graph(self) -> NNCFGraphToTestSumAggregation: class TestGetStatisticsCollectorParameters: target_type: TargetType target_node_name: str - is_per_sample: bool + batchwise_statistics: bool ref_per_ch_reduction_axes: List[int] ref_per_tensor_reduction_axes: List[int] @@ -284,7 +284,7 @@ def test_get_stat_collector( target_point = list(min_max_algo._quantization_target_points_to_qconfig.keys())[0] tensor_collector = min_max_algo._get_stat_collector( - conv_sum_aggregation_nncf_graph.nncf_graph, target_point, q_config, params.is_per_sample + conv_sum_aggregation_nncf_graph.nncf_graph, target_point, q_config, params.batchwise_statistics ) is_weight_tp = target_point.is_weight_target_point() From e01008733aac82d768e05ddf89364abb514c4d5f Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 8 Mar 2024 13:16:39 +0100 Subject: [PATCH 097/108] remove warnings about bathc_size>1 in aggregator --- nncf/common/tensor_statistics/aggregator.py | 34 ++------------------- nncf/onnx/statistics/aggregator.py | 8 +---- nncf/openvino/statistics/aggregator.py | 8 +---- nncf/torch/statistics/aggregator.py | 8 +---- 4 files changed, 6 insertions(+), 52 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 8fadd8043e3..965717beb40 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -11,12 +11,11 @@ from abc import ABC from abc import abstractmethod from itertools import islice -from typing import Any, Dict, List, Optional, TypeVar +from typing import Any, Dict, Optional, TypeVar import nncf from nncf.common import factory from nncf.common.graph.graph import NNCFGraph -from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.layout import TransformationLayout from nncf.common.logging.logger import nncf_logger from nncf.common.logging.track_progress import track @@ -30,11 +29,7 @@ EMPTY_DATASET_ERROR = ( "Calibration dataset must not be empty. Please provide calibration dataset with at least one sample." ) -BATCH_SIZE_MODEL_WARNING = ( - "For the particular model the batch size > 1 can lead to inaccurate collected statistics. " - "The recomendation is to provide dataloader instance with the batch_size = 1." -) -UPDATING_ITERATIONS_NUMBER_WARNING = ( +ITERATIONS_NUMBER_WARNING = ( "The number of iterations for statistics collection is bigger than the length of the dataset." ) @@ -60,7 +55,7 @@ def _get_iterations_number( dataset_length = self.dataset.get_length() if dataset_length and self.iterations_number: if self.iterations_number > dataset_length: - nncf_logger.warning(UPDATING_ITERATIONS_NUMBER_WARNING) + nncf_logger.warning(ITERATIONS_NUMBER_WARNING) return dataset_length return self.iterations_number return dataset_length or self.iterations_number @@ -75,9 +70,6 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: """ if not self.statistic_points: return - batch_size = self.dataset.get_batch_size() or 1 - if batch_size > 1 and self.is_model_has_no_batch_axis(graph): - nncf_logger.warning(BATCH_SIZE_MODEL_WARNING) model_transformer = factory.ModelTransformerFactory.create(model) merged_statistics = self._get_merged_statistic_points(self.statistic_points, model, graph) transformation_layout = self._get_transformation_layout_extra_outputs(merged_statistics) @@ -118,26 +110,6 @@ def register_statistic_points(self, statistic_points: StatisticPointsContainer) elif tensor_collector.num_samples is not None: self.iterations_number = max(self.iterations_number, tensor_collector.num_samples) - def is_model_has_no_batch_axis(self, graph: NNCFGraph) -> bool: - """ - Returns True if NNCFGraph contains metatypes with no batch axis in output tensor. - - :param graph: NNCFGraph. - :return: True if NNCFGraph contains metatypes with no batch axis in output tensor. - """ - unique_graph_metatypes = set(node.metatype for node in graph.get_all_nodes()) - return any(metatype in self.metatypes_no_batch_support for metatype in unique_graph_metatypes) - - @property - @abstractmethod - def metatypes_no_batch_support(self) -> List[OperatorMetatype]: - """ - These metatypes mix outputs for different samples into one axis. - If reducers and aggregators collect statistics at the output of the following operations, - assuming that 0-axis is batch axis, they get only 1 value instead of batch_size values. - It could lead to inaccurate/incorrect statistics result. - """ - @abstractmethod def _register_statistics(self, outputs: Dict[str, NNCFTensor], statistic_points: StatisticPointsContainer) -> None: """ diff --git a/nncf/onnx/statistics/aggregator.py b/nncf/onnx/statistics/aggregator.py index ed0417bcee9..a6296e121ca 100644 --- a/nncf/onnx/statistics/aggregator.py +++ b/nncf/onnx/statistics/aggregator.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List +from typing import Dict import numpy as np import onnx @@ -21,8 +21,6 @@ from nncf.common.tensor_statistics.aggregator import StatisticsAggregator from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.experimental.common.tensor_statistics.collectors import TensorCollector -from nncf.onnx.graph.metatypes.groups import OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS -from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXOpMetatype from nncf.onnx.graph.node_utils import get_input_edge from nncf.onnx.graph.node_utils import get_input_edges_mapping from nncf.onnx.graph.onnx_helper import get_name_to_node_map @@ -31,10 +29,6 @@ class ONNXStatisticsAggregator(StatisticsAggregator): - @property - def metatypes_no_batch_support(self) -> List[ONNXOpMetatype]: - return OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS - def collect_statistics(self, model: onnx.ModelProto, graph: NNCFGraph) -> None: self.input_edges_mapping = get_input_edges_mapping(graph) self.node_mapping = get_name_to_node_map(model) diff --git a/nncf/openvino/statistics/aggregator.py b/nncf/openvino/statistics/aggregator.py index 27011d45631..d7c1a8829e5 100644 --- a/nncf/openvino/statistics/aggregator.py +++ b/nncf/openvino/statistics/aggregator.py @@ -10,7 +10,7 @@ # limitations under the License. from collections import defaultdict -from typing import Dict, List +from typing import Dict import numpy as np import openvino.runtime as ov @@ -23,8 +23,6 @@ from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.experimental.common.tensor_statistics.collectors import MergedTensorCollector from nncf.experimental.common.tensor_statistics.collectors import TensorCollector -from nncf.openvino.graph.metatypes.groups import OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS -from nncf.openvino.graph.metatypes.openvino_metatypes import OVOpMetatype from nncf.openvino.graph.node_utils import get_ov_model_reduce_node_name from nncf.openvino.graph.node_utils import get_reducer_output_node_names from nncf.openvino.graph.transformations.commands import OVInplaceFnInsertionCommand @@ -33,10 +31,6 @@ class OVStatisticsAggregator(StatisticsAggregator): - @property - def metatypes_no_batch_support(self) -> List[OVOpMetatype]: - return OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS - def collect_statistics(self, model: ov.Model, graph: NNCFGraph) -> None: self._name_to_node_mapping = {op.get_friendly_name(): op for op in model.get_ops()} super().collect_statistics(model, graph) diff --git a/nncf/torch/statistics/aggregator.py b/nncf/torch/statistics/aggregator.py index 713e1de2288..7daae8286fd 100644 --- a/nncf/torch/statistics/aggregator.py +++ b/nncf/torch/statistics/aggregator.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List +from typing import Dict import numpy as np import torch @@ -20,8 +20,6 @@ from nncf.common.graph.transformations.layout import TransformationLayout from nncf.common.tensor_statistics.aggregator import StatisticPointsContainer from nncf.common.tensor_statistics.aggregator import StatisticsAggregator -from nncf.torch.graph.operator_metatypes import OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS -from nncf.torch.graph.operator_metatypes import PTOperatorMetatype from nncf.torch.graph.transformations.commands import PTInsertionCommand from nncf.torch.nncf_network import NNCFNetwork from nncf.torch.tensor import PTNNCFTensor @@ -31,10 +29,6 @@ class PTStatisticsAggregator(StatisticsAggregator): HOOKS_GROUP_NAME = "statistics_hooks" - @property - def metatypes_no_batch_support(self) -> List[PTOperatorMetatype]: - return OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS - def collect_statistics(self, model: NNCFNetwork, graph: NNCFGraph) -> None: with torch.no_grad(): super().collect_statistics(model, graph) From 54319cbefc4ea29f7c08cd2d73b62020ef628229 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 8 Mar 2024 13:18:28 +0100 Subject: [PATCH 098/108] add baatch_size logging in quantize_impl() --- nncf/onnx/quantization/quantize_model.py | 4 ++++ nncf/openvino/quantization/quantize_model.py | 6 ++++++ nncf/quantization/quantize_model.py | 7 +++++++ nncf/torch/quantization/quantize_model.py | 11 +++++++---- 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/nncf/onnx/quantization/quantize_model.py b/nncf/onnx/quantization/quantize_model.py index c89b39a14eb..e0b09491c5d 100644 --- a/nncf/onnx/quantization/quantize_model.py +++ b/nncf/onnx/quantization/quantize_model.py @@ -17,6 +17,7 @@ from nncf.common.logging.logger import nncf_logger from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset +from nncf.onnx.graph.metatypes.groups import OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS from nncf.onnx.graph.nncf_graph_builder import GraphConverter from nncf.parameters import DropType from nncf.parameters import ModelType @@ -29,6 +30,7 @@ from nncf.quantization.algorithms.accuracy_control.algorithm import calculate_accuracy_drop from nncf.quantization.algorithms.accuracy_control.evaluator import Evaluator from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization +from nncf.quantization.quantize_model import BATCHWISE_STATISTICS_WARNING from nncf.quantization.quantize_model import quantize_with_tune_hyperparams from nncf.quantization.telemetry_extractors import CompressionStartedWithQuantizeApi from nncf.scopes import IgnoredScope @@ -81,6 +83,8 @@ def quantize_impl( ) graph = GraphConverter.create_nncf_graph(model) + if advanced_parameters.batchwise_statistics and graph.get_nodes_by_metatypes(OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS): + nncf_logger.warning(BATCHWISE_STATISTICS_WARNING) quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset) return quantized_model diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py index 69b1938329b..1d5b1750300 100644 --- a/nncf/openvino/quantization/quantize_model.py +++ b/nncf/openvino/quantization/quantize_model.py @@ -19,6 +19,7 @@ from nncf.common.logging import nncf_logger from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset +from nncf.openvino.graph.metatypes.groups import OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS from nncf.openvino.graph.model_utils import remove_friendly_name_duplicates from nncf.openvino.graph.nncf_graph_builder import GraphConverter from nncf.openvino.graph.node_utils import get_number_if_op @@ -40,6 +41,7 @@ from nncf.quantization.algorithms.accuracy_control.evaluator import Evaluator from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression +from nncf.quantization.quantize_model import BATCHWISE_STATISTICS_WARNING from nncf.quantization.quantize_model import quantize_with_tune_hyperparams from nncf.quantization.telemetry_extractors import CompressionStartedWithQuantizeApi from nncf.scopes import IgnoredScope @@ -81,6 +83,8 @@ def native_quantize_if_op_impl( ) graph = GraphConverter.create_nncf_graph(model) + if advanced_parameters.batchwise_statistics and graph.get_nodes_by_metatypes(OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS): + nncf_logger.warning(BATCHWISE_STATISTICS_WARNING) if_ops_number = get_number_if_op(model) all_models_number = if_ops_number * 2 + 1 nncf_logger.info( @@ -137,6 +141,8 @@ def native_quantize_impl( ) graph = GraphConverter.create_nncf_graph(model) + if advanced_parameters.batchwise_statistics and graph.get_nodes_by_metatypes(OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS): + nncf_logger.warning(BATCHWISE_STATISTICS_WARNING) quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset) if is_weight_compression_needed(advanced_parameters): diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index dc161b0a225..e3dac32a542 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -35,6 +35,13 @@ TTensor = TypeVar("TTensor") +BATCHWISE_STATISTICS_WARNING = ( + "For the particular model the batchwise statistics collection can lead to inaccurate statistics. " + "If the accuracy degradation after compression is unsatisfactory, then " + "the recomendation is to turn off batchwise statistics. If the results are still unsatisfactory, " + "provide a dataloader with batch_size = 1 to the calibration dataset." +) + def _update_advanced_quantization_parameters( advanced_parameters: Optional[AdvancedQuantizationParameters], calibration_dataset: Dataset diff --git a/nncf/torch/quantization/quantize_model.py b/nncf/torch/quantization/quantize_model.py index fe883dabb6e..5ca5f15fceb 100644 --- a/nncf/torch/quantization/quantize_model.py +++ b/nncf/torch/quantization/quantize_model.py @@ -16,6 +16,7 @@ import nncf from nncf.common.factory import NNCFGraphFactory +from nncf.common.logging.logger import nncf_logger from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset from nncf.parameters import CompressWeightsMode @@ -26,7 +27,9 @@ from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression +from nncf.quantization.quantize_model import BATCHWISE_STATISTICS_WARNING from nncf.scopes import IgnoredScope +from nncf.torch.graph.operator_metatypes import OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS from nncf.torch.model_creation import wrap_model DEFAULT_RANGE_TYPE = "mean_min_max" @@ -68,10 +71,10 @@ def quantize_impl( ignored_scope=ignored_scope, advanced_parameters=advanced_parameters, ) - - quantized_model = quantization_algorithm.apply( - nncf_network, nncf_network.nncf.get_graph(), dataset=calibration_dataset - ) + graph = nncf_network.nncf.get_graph() + if advanced_parameters.batchwise_statistics and graph.get_nodes_by_metatypes(OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS): + nncf_logger.warning(BATCHWISE_STATISTICS_WARNING) + quantized_model = quantization_algorithm.apply(nncf_network, graph, dataset=calibration_dataset) quantized_model.nncf.disable_dynamic_graph_building() From 4e90c658bd0a62a23cf8c724915398ebfbd30e0f Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 8 Mar 2024 13:19:10 +0100 Subject: [PATCH 099/108] add IF op to batch_size warning metatypes list --- nncf/openvino/graph/metatypes/groups.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nncf/openvino/graph/metatypes/groups.py b/nncf/openvino/graph/metatypes/groups.py index cf92932741a..11e2c86d0dc 100644 --- a/nncf/openvino/graph/metatypes/groups.py +++ b/nncf/openvino/graph/metatypes/groups.py @@ -208,4 +208,5 @@ ov_metatypes.OVROIPoolingMetatype, ov_metatypes.OVROIAlignMetatype, ov_metatypes.OVEmbeddingMetatype, + ov_metatypes.OVIfMetatype, ] From d04ba75c5ab4ae4f8b2690fb072e171c0985ce84 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 8 Mar 2024 13:56:41 +0100 Subject: [PATCH 100/108] put logs from minmax to quantize_impl --- nncf/onnx/quantization/quantize_model.py | 7 +++--- nncf/openvino/quantization/quantize_model.py | 13 ++++++----- .../algorithms/min_max/algorithm.py | 8 ------- nncf/quantization/quantize_model.py | 23 +++++++++++++++++++ nncf/torch/quantization/quantize_model.py | 8 +++---- 5 files changed, 38 insertions(+), 21 deletions(-) diff --git a/nncf/onnx/quantization/quantize_model.py b/nncf/onnx/quantization/quantize_model.py index e0b09491c5d..5939b38e2bf 100644 --- a/nncf/onnx/quantization/quantize_model.py +++ b/nncf/onnx/quantization/quantize_model.py @@ -30,8 +30,8 @@ from nncf.quantization.algorithms.accuracy_control.algorithm import calculate_accuracy_drop from nncf.quantization.algorithms.accuracy_control.evaluator import Evaluator from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization -from nncf.quantization.quantize_model import BATCHWISE_STATISTICS_WARNING from nncf.quantization.quantize_model import quantize_with_tune_hyperparams +from nncf.quantization.quantize_model import warning_model_no_batchwise_support from nncf.quantization.telemetry_extractors import CompressionStartedWithQuantizeApi from nncf.scopes import IgnoredScope from nncf.telemetry import tracked_function @@ -83,8 +83,9 @@ def quantize_impl( ) graph = GraphConverter.create_nncf_graph(model) - if advanced_parameters.batchwise_statistics and graph.get_nodes_by_metatypes(OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS): - nncf_logger.warning(BATCHWISE_STATISTICS_WARNING) + warning_model_no_batchwise_support( + graph, advanced_parameters.batchwise_statistics, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS + ) quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset) return quantized_model diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py index 1d5b1750300..01c9802505b 100644 --- a/nncf/openvino/quantization/quantize_model.py +++ b/nncf/openvino/quantization/quantize_model.py @@ -41,8 +41,8 @@ from nncf.quantization.algorithms.accuracy_control.evaluator import Evaluator from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression -from nncf.quantization.quantize_model import BATCHWISE_STATISTICS_WARNING from nncf.quantization.quantize_model import quantize_with_tune_hyperparams +from nncf.quantization.quantize_model import warning_model_no_batchwise_support from nncf.quantization.telemetry_extractors import CompressionStartedWithQuantizeApi from nncf.scopes import IgnoredScope from nncf.telemetry.decorator import tracked_function @@ -83,8 +83,9 @@ def native_quantize_if_op_impl( ) graph = GraphConverter.create_nncf_graph(model) - if advanced_parameters.batchwise_statistics and graph.get_nodes_by_metatypes(OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS): - nncf_logger.warning(BATCHWISE_STATISTICS_WARNING) + warning_model_no_batchwise_support( + graph, advanced_parameters.batchwise_statistics, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS + ) if_ops_number = get_number_if_op(model) all_models_number = if_ops_number * 2 + 1 nncf_logger.info( @@ -139,10 +140,10 @@ def native_quantize_impl( ignored_scope=ignored_scope, advanced_parameters=advanced_parameters, ) - graph = GraphConverter.create_nncf_graph(model) - if advanced_parameters.batchwise_statistics and graph.get_nodes_by_metatypes(OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS): - nncf_logger.warning(BATCHWISE_STATISTICS_WARNING) + warning_model_no_batchwise_support( + graph, advanced_parameters.batchwise_statistics, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS + ) quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset) if is_weight_compression_needed(advanced_parameters): diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index 46592d318c6..d0cc478247c 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -71,12 +71,6 @@ num_bits=8, mode=QuantizationScheme.SYMMETRIC, signedness_to_force=None, per_channel=False ) -BATCHWISE_STATISTICS_TRANSFORMERS_WARNING = ( - "For transformer-like models, batchwise_statistics option could result in inaccurate statistics. " - "The recommendation is to collect statistics with dataloader " - "having batch_size = 1 and turn off batchwise_statistics option." -) - @dataclasses.dataclass class ModeBasedDefaults: @@ -936,8 +930,6 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin self._reset_cache() quantization_target_points, _ = self._get_quantization_target_points(model, graph) output = StatisticPointsContainer() - if self._model_type == ModelType.TRANSFORMER and self._batchwise_statistics: - nncf_logger.warning(BATCHWISE_STATISTICS_TRANSFORMERS_WARNING) for quantization_target_point, qconfig in quantization_target_points.items(): stat_collector = self._get_stat_collector( graph, quantization_target_point, qconfig, self._batchwise_statistics diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index e3dac32a542..b0bb0b0c630 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -14,6 +14,9 @@ import nncf from nncf.api.compression import TModel from nncf.common.deprecation import warning_deprecated +from nncf.common.graph import NNCFGraph +from nncf.common.graph.operator_metatypes import OperatorMetatype +from nncf.common.logging.logger import nncf_logger from nncf.common.quantization.structs import QuantizationPreset from nncf.common.utils.api_marker import api from nncf.common.utils.backend import BackendType @@ -43,6 +46,26 @@ ) +def warning_model_no_batchwise_support( + graph: NNCFGraph, + batchwise_statistics: bool, + model_type: ModelType, + no_batchwise_support_metatypes: List[OperatorMetatype], +) -> None: + """ + Prints a warning message if batchwise statistics could lead to a significant accuracy drop. + + :param graph: Model's NNCFGraph. + :param batchwise_statistics: Is turned on or turned off batchwise statistics. + :param model_type: Model type algorithm option. + :param no_batchwise_support_metatypes: Meatypes having no batchwise statistics support. + """ + if batchwise_statistics and ( + graph.get_nodes_by_metatypes(no_batchwise_support_metatypes) or model_type == ModelType.TRANSFORMER + ): + nncf_logger.warning(BATCHWISE_STATISTICS_WARNING) + + def _update_advanced_quantization_parameters( advanced_parameters: Optional[AdvancedQuantizationParameters], calibration_dataset: Dataset ) -> AdvancedQuantizationParameters: diff --git a/nncf/torch/quantization/quantize_model.py b/nncf/torch/quantization/quantize_model.py index 5ca5f15fceb..44898b3dc61 100644 --- a/nncf/torch/quantization/quantize_model.py +++ b/nncf/torch/quantization/quantize_model.py @@ -16,7 +16,6 @@ import nncf from nncf.common.factory import NNCFGraphFactory -from nncf.common.logging.logger import nncf_logger from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset from nncf.parameters import CompressWeightsMode @@ -27,7 +26,7 @@ from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression -from nncf.quantization.quantize_model import BATCHWISE_STATISTICS_WARNING +from nncf.quantization.quantize_model import warning_model_no_batchwise_support from nncf.scopes import IgnoredScope from nncf.torch.graph.operator_metatypes import OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS from nncf.torch.model_creation import wrap_model @@ -72,8 +71,9 @@ def quantize_impl( advanced_parameters=advanced_parameters, ) graph = nncf_network.nncf.get_graph() - if advanced_parameters.batchwise_statistics and graph.get_nodes_by_metatypes(OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS): - nncf_logger.warning(BATCHWISE_STATISTICS_WARNING) + warning_model_no_batchwise_support( + graph, advanced_parameters.batchwise_statistics, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS + ) quantized_model = quantization_algorithm.apply(nncf_network, graph, dataset=calibration_dataset) quantized_model.nncf.disable_dynamic_graph_building() From 6048155dbf5bb3f39005b28d02cc8d30851da218 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 8 Mar 2024 14:00:18 +0100 Subject: [PATCH 101/108] rm typos --- nncf/quantization/advanced_parameters.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nncf/quantization/advanced_parameters.py b/nncf/quantization/advanced_parameters.py index 9157d5c3078..187c26dd716 100644 --- a/nncf/quantization/advanced_parameters.py +++ b/nncf/quantization/advanced_parameters.py @@ -192,7 +192,7 @@ class AdvancedQuantizationParameters: :type disable_bias_correction: bool :param batchwise_statistics: Determines whether quantizer statistics should be calculated for each item of the batch or for the entire batch, default is None. - "None" means that if torch.DataLoader or tensorflow.Dataset was passed as a data source for the calibration + "None" means that if torch.DataLoader or tensorflow.Dataset was passed as a data source for the calibration dataset, then if batch_size > 1 of the data source then batchwise_statistics = True, otherwise False. :type batchwise_statistics: Optional[bool] :param activations_quantization_params: Quantization parameters for activations. @@ -212,7 +212,6 @@ class AdvancedQuantizationParameters: :type smooth_quant_alpha: AdvancedSmoothQuantParameters :param smooth_quant_alpha: Deprecated SmoothQuant-related parameter. :type smooth_quant_alpha: float - :param backend_params: Backend-specific parameters. :type backend_params: Dict[str, Any] """ From 09783c47991c07dbb7024b3f2aa82e377cbade3f Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 8 Mar 2024 14:15:11 +0100 Subject: [PATCH 102/108] typehints --- nncf/common/graph/utils.py | 4 +++- nncf/common/quantization/initialization/range.py | 12 ++++++------ nncf/common/tensor_statistics/aggregator.py | 4 +--- nncf/onnx/graph/node_utils.py | 4 ++-- nncf/quantization/algorithms/min_max/backend.py | 8 ++++---- nncf/quantization/algorithms/min_max/onnx_backend.py | 6 +++--- .../algorithms/min_max/openvino_backend.py | 6 +++--- .../quantization/algorithms/min_max/torch_backend.py | 6 +++--- 8 files changed, 25 insertions(+), 25 deletions(-) diff --git a/nncf/common/graph/utils.py b/nncf/common/graph/utils.py index c3672ed7ef4..577d03d6ec5 100644 --- a/nncf/common/graph/utils.py +++ b/nncf/common/graph/utils.py @@ -116,7 +116,9 @@ def get_number_of_quantized_ops( return len(quantized_ops) -def get_reduction_axes(channel_axes: Union[List[int], Tuple[int]], shape: Union[List[int], Tuple[int]]) -> Tuple[int]: +def get_reduction_axes( + channel_axes: Union[List[int], Tuple[int, ...]], shape: Union[List[int], Tuple[int, ...]] +) -> Tuple[int, ...]: """ Returns filtered reduction axes without axes that corresponds channels. diff --git a/nncf/common/quantization/initialization/range.py b/nncf/common/quantization/initialization/range.py index 705d12f0e09..a015e92b5cd 100644 --- a/nncf/common/quantization/initialization/range.py +++ b/nncf/common/quantization/initialization/range.py @@ -210,9 +210,9 @@ def use_means_of_maxs(self) -> bool: def _get_reduction_axes( self, - shape_to_reduce: Union[Tuple[int], List[int]], - quantization_axes: Union[Tuple[int], List[int]], - aggregation_axes: Union[Tuple[int], List[int]], + shape_to_reduce: Union[Tuple[int, ...], List[int]], + quantization_axes: Union[Tuple[int, ...], List[int]], + aggregation_axes: Union[Tuple[int, ...], List[int]], ): """ Returns axes for a reducer regarding aggregation axes. As aggregator takes axes counting from stacked tensors, @@ -227,7 +227,7 @@ def _get_reduction_axes( axes_to_keep.update(quantization_axes) return get_reduction_axes(axes_to_keep, shape_to_reduce) - def _get_aggregation_axes(self, batchwise_statistics: bool) -> Tuple[int]: + def _get_aggregation_axes(self, batchwise_statistics: bool) -> Tuple[int, ...]: """ Returns axes for aggregator. @@ -239,8 +239,8 @@ def _get_aggregation_axes(self, batchwise_statistics: bool) -> Tuple[int]: def get_reduction_aggregation_axes( self, - shape_to_reduce: Union[Tuple[int], List[int]], - quantization_axes: Union[Tuple[int], List[int]], + shape_to_reduce: Union[Tuple[int, ...], List[int]], + quantization_axes: Union[Tuple[int, ...], List[int]], batchwise_statistics: bool, ) -> Tuple[ReductionAxes, AggregationAxes]: """ diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 965717beb40..ae0245e6204 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -44,9 +44,7 @@ def __init__(self, dataset: Dataset): self.iterations_number = None self.statistic_points = StatisticPointsContainer() - def _get_iterations_number( - self, - ) -> Optional[int]: + def _get_iterations_number(self) -> Optional[int]: """ Returns number of iterations which in min(self.iterations_number, dataset_length). diff --git a/nncf/onnx/graph/node_utils.py b/nncf/onnx/graph/node_utils.py index d255a8a94c3..be9909cb86a 100644 --- a/nncf/onnx/graph/node_utils.py +++ b/nncf/onnx/graph/node_utils.py @@ -160,7 +160,7 @@ def get_weight_quantization_axis(node: NNCFNode, port_id: int) -> int: def _get_activation_tensor_shape( nncf_graph: NNCFGraph, node: NNCFNode, target_point: ONNXTargetPoint -) -> Optional[Tuple[int]]: +) -> Optional[Tuple[int, ...]]: """ Returns shape of an activation tensor which is correspond to the target point and node. ONNX model can not have a shape of a edge, even after shape inference. @@ -196,7 +196,7 @@ def _get_activation_tensor_shape( def get_quantized_tensor_shape( nncf_graph: NNCFGraph, node: NNCFNode, target_point: ONNXTargetPoint -) -> Optional[Tuple[int]]: +) -> Optional[Tuple[int, ...]]: """ Returns quantized tensor shape corresponding to a target point with a node if shape - info is existed. If there is no shape info - returns None. diff --git a/nncf/quantization/algorithms/min_max/backend.py b/nncf/quantization/algorithms/min_max/backend.py index a611bfc8ba3..d521f233243 100644 --- a/nncf/quantization/algorithms/min_max/backend.py +++ b/nncf/quantization/algorithms/min_max/backend.py @@ -184,7 +184,7 @@ def unify_statistics(statistics: List[MinMaxTensorStatistic]) -> MinMaxTensorSta @staticmethod @abstractmethod - def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: TargetPoint) -> Tuple[int]: + def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: TargetPoint) -> Tuple[int, ...]: """ Returns shape of a targer point tensor. @@ -196,7 +196,7 @@ def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: @staticmethod @abstractmethod - def get_weight_quantization_axes(node: NNCFNode, target_point: TargetPoint) -> Tuple[int]: + def get_weight_quantization_axes(node: NNCFNode, target_point: TargetPoint) -> Tuple[int, ...]: """ Returns axes for per-channel quantization of weights of the node placed on a input port_id. @@ -210,8 +210,8 @@ def get_weight_quantization_axes(node: NNCFNode, target_point: TargetPoint) -> T def get_statistic_collector( range_estimator_params: RangeEstimatorParameters, use_abs_max: bool, - reduction_axes: Optional[Tuple[int]], - aggregation_axes: Optional[Tuple[int]], + reduction_axes: Optional[Tuple[int, ...]], + aggregation_axes: Optional[Tuple[int, ...]], inplace: bool, num_samples: Optional[int] = None, ) -> TensorStatisticCollectorBase: diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py index 5f91f6ebe67..16295518de1 100644 --- a/nncf/quantization/algorithms/min_max/onnx_backend.py +++ b/nncf/quantization/algorithms/min_max/onnx_backend.py @@ -155,7 +155,7 @@ def _get_input_edges_mapping(nncf_graph: NNCFGraph): return get_input_edges_mapping(nncf_graph) @staticmethod - def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: ONNXTargetPoint) -> Tuple[int]: + def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: ONNXTargetPoint) -> Tuple[int, ...]: return get_quantized_tensor_shape(nncf_graph, node, target_point) @staticmethod @@ -166,8 +166,8 @@ def get_weight_quantization_axes(node: NNCFNode, target_point: ONNXTargetPoint) def get_statistic_collector( range_estimator_params: RangeEstimatorParameters, use_abs_max: bool, - reduction_axes: Optional[Tuple[int]], - aggregation_axes: Optional[Tuple[int]], + reduction_axes: Optional[Tuple[int, ...]], + aggregation_axes: Optional[Tuple[int, ...]], inplace: bool, num_samples: Optional[int] = None, ) -> TensorCollector: diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py index 59ae3539143..d621993c3ae 100644 --- a/nncf/quantization/algorithms/min_max/openvino_backend.py +++ b/nncf/quantization/algorithms/min_max/openvino_backend.py @@ -138,7 +138,7 @@ def unify_statistics(statistics: List[OVMinMaxTensorStatistic]) -> OVMinMaxTenso return OVMinMaxTensorStatistic(min_values=min_values, max_values=max_values) @staticmethod - def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: OVTargetPoint) -> Tuple[int]: + def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: OVTargetPoint) -> Tuple[int, ...]: if target_point.is_weight_target_point(): return node.layer_attributes.constant_attributes[target_point.port_id]["shape"] if target_point.type == TargetType.PRE_LAYER_OPERATION: @@ -155,8 +155,8 @@ def get_weight_quantization_axes(node: NNCFNode, target_point: OVTargetPoint) -> def get_statistic_collector( range_estimator_params: RangeEstimatorParameters, use_abs_max: bool, - reduction_axes: Optional[Tuple[int]], - aggregation_axes: Optional[Tuple[int]], + reduction_axes: Optional[Tuple[int, ...]], + aggregation_axes: Optional[Tuple[int, ...]], inplace: bool, num_samples: Optional[int] = None, ) -> TensorCollector: diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py index ddd726850d5..fa1f2b0cbb6 100644 --- a/nncf/quantization/algorithms/min_max/torch_backend.py +++ b/nncf/quantization/algorithms/min_max/torch_backend.py @@ -157,7 +157,7 @@ def unify_statistics(statistics: List[PTMinMaxTensorStatistic]) -> PTMinMaxTenso return PTMinMaxTensorStatistic(min_values=min_values, max_values=max_values) @staticmethod - def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: PTTargetPoint) -> Tuple[int]: + def get_target_point_shape(nncf_graph: NNCFGraph, node: NNCFNode, target_point: PTTargetPoint) -> Tuple[int, ...]: if target_point.is_weight_target_point(): return tuple(node.layer_attributes.get_weight_shape()) return nncf_graph.get_input_shape_for_insertion_point(target_point) @@ -170,8 +170,8 @@ def get_weight_quantization_axes(node: NNCFNode, target_point: PTTargetPoint) -> def get_statistic_collector( range_estimator_params: RangeEstimatorParameters, use_abs_max: bool, - reduction_axes: Optional[Tuple[int]], - aggregation_axes: Optional[Tuple[int]], + reduction_axes: Optional[Tuple[int, ...]], + aggregation_axes: Optional[Tuple[int, ...]], inplace: bool, num_samples: Optional[int] = None, ) -> TensorCollector: From da05b93f85d09bfcd9bddcc43138f573fab917e8 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 8 Mar 2024 14:24:42 +0100 Subject: [PATCH 103/108] revert debug message minmax --- nncf/quantization/algorithms/min_max/algorithm.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index d0cc478247c..97f6cc53ca2 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -931,6 +931,10 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin quantization_target_points, _ = self._get_quantization_target_points(model, graph) output = StatisticPointsContainer() for quantization_target_point, qconfig in quantization_target_points.items(): + nncf_logger.debug( + f"Adding target point {quantization_target_point.target_node_name}" + f" with type {quantization_target_point.type} for statistics collection" + ) stat_collector = self._get_stat_collector( graph, quantization_target_point, qconfig, self._batchwise_statistics ) From 291110eb52d73b408c2fb361fd24a504685cc489 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Fri, 8 Mar 2024 14:49:50 +0100 Subject: [PATCH 104/108] typo --- nncf/onnx/quantization/quantize_model.py | 4 +--- nncf/openvino/quantization/quantize_model.py | 8 ++------ nncf/quantization/quantize_model.py | 10 ++++++---- nncf/torch/quantization/quantize_model.py | 4 +--- 4 files changed, 10 insertions(+), 16 deletions(-) diff --git a/nncf/onnx/quantization/quantize_model.py b/nncf/onnx/quantization/quantize_model.py index 5939b38e2bf..094b98e81af 100644 --- a/nncf/onnx/quantization/quantize_model.py +++ b/nncf/onnx/quantization/quantize_model.py @@ -83,9 +83,7 @@ def quantize_impl( ) graph = GraphConverter.create_nncf_graph(model) - warning_model_no_batchwise_support( - graph, advanced_parameters.batchwise_statistics, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS - ) + warning_model_no_batchwise_support(graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS) quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset) return quantized_model diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py index 01c9802505b..d74a656169e 100644 --- a/nncf/openvino/quantization/quantize_model.py +++ b/nncf/openvino/quantization/quantize_model.py @@ -83,9 +83,7 @@ def native_quantize_if_op_impl( ) graph = GraphConverter.create_nncf_graph(model) - warning_model_no_batchwise_support( - graph, advanced_parameters.batchwise_statistics, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS - ) + warning_model_no_batchwise_support(graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS) if_ops_number = get_number_if_op(model) all_models_number = if_ops_number * 2 + 1 nncf_logger.info( @@ -141,9 +139,7 @@ def native_quantize_impl( advanced_parameters=advanced_parameters, ) graph = GraphConverter.create_nncf_graph(model) - warning_model_no_batchwise_support( - graph, advanced_parameters.batchwise_statistics, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS - ) + warning_model_no_batchwise_support(graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS) quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset) if is_weight_compression_needed(advanced_parameters): diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index b0bb0b0c630..d290904a399 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -48,7 +48,7 @@ def warning_model_no_batchwise_support( graph: NNCFGraph, - batchwise_statistics: bool, + advanced_quantization_parameters: Optional[AdvancedQuantizationParameters], model_type: ModelType, no_batchwise_support_metatypes: List[OperatorMetatype], ) -> None: @@ -56,12 +56,14 @@ def warning_model_no_batchwise_support( Prints a warning message if batchwise statistics could lead to a significant accuracy drop. :param graph: Model's NNCFGraph. - :param batchwise_statistics: Is turned on or turned off batchwise statistics. + :param advanced_quantization_parameters: AdvancedQuantizationParameters. :param model_type: Model type algorithm option. :param no_batchwise_support_metatypes: Meatypes having no batchwise statistics support. """ - if batchwise_statistics and ( - graph.get_nodes_by_metatypes(no_batchwise_support_metatypes) or model_type == ModelType.TRANSFORMER + if ( + advanced_quantization_parameters + and advanced_quantization_parameters.batchwise_statistics + and (graph.get_nodes_by_metatypes(no_batchwise_support_metatypes) or model_type == ModelType.TRANSFORMER) ): nncf_logger.warning(BATCHWISE_STATISTICS_WARNING) diff --git a/nncf/torch/quantization/quantize_model.py b/nncf/torch/quantization/quantize_model.py index 44898b3dc61..48f3ddefae2 100644 --- a/nncf/torch/quantization/quantize_model.py +++ b/nncf/torch/quantization/quantize_model.py @@ -71,9 +71,7 @@ def quantize_impl( advanced_parameters=advanced_parameters, ) graph = nncf_network.nncf.get_graph() - warning_model_no_batchwise_support( - graph, advanced_parameters.batchwise_statistics, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS - ) + warning_model_no_batchwise_support(graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS) quantized_model = quantization_algorithm.apply(nncf_network, graph, dataset=calibration_dataset) quantized_model.nncf.disable_dynamic_graph_building() From 2676fbde6af31563857ef74643345358bf33b1a9 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 18 Mar 2024 15:03:52 +0100 Subject: [PATCH 105/108] add model_param is_batch_size_supported to conformance; make all models bs>1 wit dynamic batch_size at input shape; make validation with bs=1 --- tests/post_training/README.md | 1 + tests/post_training/conftest.py | 3 +- tests/post_training/model_scope.py | 9 ++++++ tests/post_training/pipelines/base.py | 2 -- .../pipelines/causal_language_model.py | 5 --- .../pipelines/image_classification_timm.py | 31 ++++++++----------- .../pipelines/lm_weight_compression.py | 5 --- .../pipelines/masked_language_modeling.py | 5 --- .../test_quantize_conformance.py | 17 +++------- 9 files changed, 29 insertions(+), 49 deletions(-) diff --git a/tests/post_training/README.md b/tests/post_training/README.md index ef6ef864fb4..f5487311e70 100644 --- a/tests/post_training/README.md +++ b/tests/post_training/README.md @@ -60,6 +60,7 @@ Additional arguments: - `--fp32` to run validation of not quantized model - `--cuda` to enable CUDA_TORCH backend - `--subset-size=N` to force subset_size of calibration dataset +- `--batch-size=N` to use batch_size for calibration - `--benchmark` to collect throughput statistics, add `FPS` column to result.csv - `--extra-columns` to add additional columns to reports.csv: - `Stat. collection time` - time of statistic collection diff --git a/tests/post_training/conftest.py b/tests/post_training/conftest.py index 6478eae8c1b..8288b993b31 100644 --- a/tests/post_training/conftest.py +++ b/tests/post_training/conftest.py @@ -19,8 +19,7 @@ def pytest_addoption(parser): parser.addoption("--data", action="store", help="Data directory") parser.addoption("--output", action="store", default="./tmp/", help="Directory to store artifacts") parser.addoption("--no-eval", action="store_true", help="Skip validation step") - parser.addoption("--batch_size", action="store", default=1, type=int, help="Batch size of calibration dataset") - parser.addoption("--dynamic_batch_shape", action="store_true", help="Export model with dynamic batch axis") + parser.addoption("--batch-size", action="store", default=1, type=int, help="Batch size of calibration dataset") parser.addoption("--subset-size", type=int, default=None, help="Set subset size") parser.addoption("--fp32", action="store_true", help="Test original model") parser.addoption("--cuda", action="store_true", help="Enable CUDA_TORCH backend") diff --git a/tests/post_training/model_scope.py b/tests/post_training/model_scope.py index e643b68087d..a4696fc091a 100644 --- a/tests/post_training/model_scope.py +++ b/tests/post_training/model_scope.py @@ -39,6 +39,7 @@ "subset_size": 2, }, "backends": ALL_PTQ_BACKENDS + [BackendType.OPTIMUM], + "is_batch_size_supported": False, }, { "reported_name": "hf/hf-internal-testing/tiny-random-GPTNeoXForCausalLM", @@ -50,6 +51,7 @@ "subset_size": 2, }, "backends": [BackendType.OPTIMUM], + "is_batch_size_supported": False, }, # Timm models { @@ -159,6 +161,7 @@ ), }, "backends": NNCF_PTQ_BACKENDS, + "is_batch_size_supported": False, # Issue is raised during export with dynamich shape. }, { "reported_name": "timm/mobilenetv2_050", @@ -286,6 +289,7 @@ "sensitivity_metric": SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, }, "backends": [BackendType.OV], + "is_batch_size_supported": False, }, { "reported_name": "tinyllama_data_aware", @@ -293,6 +297,7 @@ "pipeline_cls": LMWeightCompression, "compression_params": {"group_size": 64, "ratio": 0.8, "mode": CompressWeightsMode.INT4_SYM}, "backends": [BackendType.OV], + "is_batch_size_supported": False, }, { "reported_name": "tinyllama_data_aware_awq", @@ -300,6 +305,7 @@ "pipeline_cls": LMWeightCompression, "compression_params": {"group_size": 64, "ratio": 0.8, "mode": CompressWeightsMode.INT4_SYM, "awq": True}, "backends": [BackendType.OV], + "is_batch_size_supported": False, }, { "reported_name": "tinyllama_data_aware_awq_stateful", @@ -308,6 +314,7 @@ "compression_params": {"group_size": 64, "ratio": 0.8, "mode": CompressWeightsMode.INT4_SYM, "awq": True}, "params": {"is_stateful": True}, "backends": [BackendType.OV], + "is_batch_size_supported": False, }, ] @@ -322,6 +329,8 @@ def generate_tests_scope(models_list: List[Dict]) -> Dict[str, dict]: for test_model_param in models_list: for backend in test_model_param["backends"] + [BackendType.FP32]: model_param = copy.deepcopy(test_model_param) + if "is_batch_size_supported" not in model_param: # Set default value of is_batch_size_supported. + model_param["is_batch_size_supported"] = True reported_name = model_param["reported_name"] model_id = reported_name_to_model_id_mapping[reported_name] if backend == BackendType.FP32: diff --git a/tests/post_training/pipelines/base.py b/tests/post_training/pipelines/base.py index 35bb967a09c..96b84e1cbd8 100644 --- a/tests/post_training/pipelines/base.py +++ b/tests/post_training/pipelines/base.py @@ -183,7 +183,6 @@ def __init__( reference_data: dict, no_eval: bool, run_benchmark_app: bool, - dynamic_batch_shape: bool, params: dict = None, batch_size: int = 1, ) -> None: @@ -196,7 +195,6 @@ def __init__( self.reference_data = reference_data self.params = params or {} self.batch_size = batch_size - self.dynamic_batch_shape = dynamic_batch_shape self.no_eval = no_eval self.run_benchmark_app = run_benchmark_app self.output_model_dir: Path = self.output_dir / self.reported_name / self.backend.value diff --git a/tests/post_training/pipelines/causal_language_model.py b/tests/post_training/pipelines/causal_language_model.py index 8dc27c7a8fb..44385830e0a 100644 --- a/tests/post_training/pipelines/causal_language_model.py +++ b/tests/post_training/pipelines/causal_language_model.py @@ -24,8 +24,6 @@ class CausalLMHF(PTQTestPipeline): """Pipeline for causal language models from Hugging Face repository""" def prepare_model(self) -> None: - if self.dynamic_batch_shape: - raise ValueError("The model does not support export with dynamic input shape") if self.backend in OV_BACKENDS + [BackendType.FP32]: self.model_hf = OVModelForCausalLM.from_pretrained(self.model_id, export=True, compile=False) self.model = self.model_hf.model @@ -42,9 +40,6 @@ def transform_func(examples): return transform_func def prepare_calibration_dataset(self): - if self.batch_size > 1: - print("Batch size > 1 is not supported for causal language models. Batch size = 1 is set.") - self.batch_size = 1 quantizer = OVQuantizer.from_pretrained(self.model_hf) num_samples = self.compression_params.get("subset_size", 300) diff --git a/tests/post_training/pipelines/image_classification_timm.py b/tests/post_training/pipelines/image_classification_timm.py index c500a90bfd9..5cda1d04e6d 100644 --- a/tests/post_training/pipelines/image_classification_timm.py +++ b/tests/post_training/pipelines/image_classification_timm.py @@ -47,7 +47,7 @@ def prepare_model(self) -> None: self.model_cfg = timm_model.default_cfg self.input_size = [self.batch_size] + list(timm_model.default_cfg["input_size"]) self.dummy_tensor = torch.rand(self.input_size) - if self.dynamic_batch_shape: + if self.batch_size > 1: # Dynamic batch_size shape export self.input_size[0] = -1 if self.backend in PT_BACKENDS: @@ -56,7 +56,7 @@ def prepare_model(self) -> None: if self.backend == BackendType.ONNX: onnx_path = self.fp32_model_dir / "model_fp32.onnx" additional_kwargs = {} - if self.dynamic_batch_shape: + if self.batch_size > 1: additional_kwargs["input_names"] = ["image"] additional_kwargs["dynamic_axes"] = {"image": {0: "batch"}} torch.onnx.export( @@ -126,29 +126,24 @@ def prepare_calibration_dataset(self): def _validate(self): val_dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform) - val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=self.batch_size, num_workers=2, shuffle=False) - dataset_size = len(val_dataset) - if dataset_size % self.batch_size != 0 and not self.dynamic_batch_shape: - raise ValueError( - ( - "Because the batch_size is not a divisor of the length of the dataset, " - "the one of the data tensors has a shape incompatible with static model input. " - "Use --dynamic_batch_shape option to export such model with dynamic shape." - ) - ) + val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=2, shuffle=False) + + dataset_size = len(val_loader) - core = ov.Core() - ov_model = core.read_model(self.path_compressed_ir) - compiled_model = core.compile_model(ov_model) # Initialize result tensors for async inference support. predictions = np.zeros((dataset_size)) references = -1 * np.ones((dataset_size)) + core = ov.Core() + if os.environ.get("INFERENCE_NUM_THREADS"): # Set CPU_THREADS_NUM for OpenVINO inference inference_num_threads = os.environ.get("INFERENCE_NUM_THREADS") core.set_property("CPU", properties={"INFERENCE_NUM_THREADS": str(inference_num_threads)}) + ov_model = core.read_model(self.path_compressed_ir) + compiled_model = core.compile_model(ov_model) + jobs = int(os.environ.get("NUM_VAL_THREADS", DEFAULT_VAL_THREADS)) infer_queue = ov.AsyncInferQueue(compiled_model, jobs) @@ -157,8 +152,8 @@ def _validate(self): def process_result(request, userdata): output_data = request.get_output_tensor().data predicted_label = np.argmax(output_data, axis=1) - predictions[userdata * self.batch_size : (userdata + 1) * self.batch_size] = predicted_label - pbar.progress.update(pbar.task, advance=self.batch_size) + predictions[userdata] = predicted_label + pbar.progress.update(pbar.task, advance=1) infer_queue.set_callback(process_result) @@ -166,7 +161,7 @@ def process_result(request, userdata): # W/A for memory leaks when using torch DataLoader and OpenVINO image_copies = copy.deepcopy(images.numpy()) infer_queue.start_async(image_copies, userdata=i) - references[i * self.batch_size : (i + 1) * self.batch_size] = target + references[i] = target infer_queue.wait_all() diff --git a/tests/post_training/pipelines/lm_weight_compression.py b/tests/post_training/pipelines/lm_weight_compression.py index b80cfe898d0..b1a6e5853dc 100644 --- a/tests/post_training/pipelines/lm_weight_compression.py +++ b/tests/post_training/pipelines/lm_weight_compression.py @@ -71,8 +71,6 @@ class LMWeightCompression(BaseTestPipeline): OV_MODEL_NAME = "openvino_model.xml" def prepare_model(self) -> None: - if self.dynamic_batch_shape: - raise ValueError("The model does not support export with dynamic input shape") is_stateful = self.params.get("is_stateful", False) if is_stateful: self.fp32_model_dir = self.fp32_model_dir.parent / (self.fp32_model_dir.name + "_sf") @@ -131,9 +129,6 @@ def transform_fn(data): return transform_fn def prepare_calibration_dataset(self): - if self.batch_size > 1: - print("Batch size > 1 is not supported for causal language models. Batch size = 1 is set.") - self.batch_size = 1 dataset = load_dataset("wikitext", "wikitext-2-v1", split="train", revision="b08601e") dataset = dataset.filter(lambda example: len(example["text"]) > 80) self.calibration_dataset = nncf.Dataset(dataset, self.get_transform_calibration_fn()) diff --git a/tests/post_training/pipelines/masked_language_modeling.py b/tests/post_training/pipelines/masked_language_modeling.py index c268dee79ab..9f750808506 100644 --- a/tests/post_training/pipelines/masked_language_modeling.py +++ b/tests/post_training/pipelines/masked_language_modeling.py @@ -86,11 +86,6 @@ def transform_func(data): return transform_func def prepare_calibration_dataset(self): - if self.dynamic_batch_shape: - raise ValueError("The model does not support export with dynamic input shape") - if self.batch_size > 1: - print("Batch size > 1 is not supported for masked language models. Batch size = 1 is set.") - self.batch_size = 1 quantizer = OVQuantizer.from_pretrained(self.model_hf) num_samples = self.compression_params.get("subset_size", 300) diff --git a/tests/post_training/test_quantize_conformance.py b/tests/post_training/test_quantize_conformance.py index 6840930fc48..815c1f33b7b 100644 --- a/tests/post_training/test_quantize_conformance.py +++ b/tests/post_training/test_quantize_conformance.py @@ -50,11 +50,6 @@ def fixture_batch_size(pytestconfig): return pytestconfig.getoption("batch_size") -@pytest.fixture(scope="session", name="dynamic_batch_shape") -def fixture_dynamic_batch_shape(pytestconfig): - return pytestconfig.getoption("dynamic_batch_shape") - - @pytest.fixture(scope="session", name="subset_size") def fixture_subset_size(pytestconfig): return pytestconfig.getoption("subset_size") @@ -137,11 +132,13 @@ def fixture_wc_report_data(output_dir): df.to_csv(output_dir / "results.csv", index=False) -def maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend): +def maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend, batch_size): if test_model_param["backend"] == BackendType.FP32 and not run_fp32_backend: pytest.skip("To run test for not quantized model use --fp32 argument") if test_model_param["backend"] == BackendType.CUDA_TORCH and not run_torch_cuda_backend: pytest.skip("To run test for CUDA_TORCH backend use --cuda argument") + if batch_size > 1 and not test_model_param["is_batch_size_supported"]: + pytest.skip("The model does not support batch_size > 1. Please use --batch-size 1.") return test_model_param @@ -207,7 +204,6 @@ def test_ptq_quantization( ptq_result_data: Dict[str, RunInfo], no_eval: bool, batch_size: int, - dynamic_batch_shape: bool, run_fp32_backend: bool, run_torch_cuda_backend: bool, subset_size: Optional[int], @@ -223,7 +219,7 @@ def test_ptq_quantization( if test_case_name not in ptq_reference_data: raise nncf.ValidationError(f"{test_case_name} does not exist in 'reference_data.yaml'") test_model_param = PTQ_TEST_CASES[test_case_name] - maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend) + maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend, batch_size) pipeline_cls = test_model_param["pipeline_cls"] # Recalculates subset_size when subset_size is None if batch_size > 1 and subset_size is None: @@ -237,7 +233,6 @@ def test_ptq_quantization( "no_eval": no_eval, "run_benchmark_app": run_benchmark_app, "batch_size": batch_size, - "dynamic_batch_shape": dynamic_batch_shape, } ) pipeline: BaseTestPipeline = pipeline_cls(**pipeline_kwargs) @@ -276,7 +271,6 @@ def test_weight_compression( wc_result_data: Dict[str, RunInfo], no_eval: bool, batch_size: int, - dynamic_batch_shape: bool, run_fp32_backend: bool, run_torch_cuda_backend: bool, subset_size: Optional[int], @@ -292,7 +286,7 @@ def test_weight_compression( if test_case_name not in wc_reference_data: raise RuntimeError(f"{test_case_name} is not defined in `wc_reference_data` fixture") test_model_param = WC_TEST_CASES[test_case_name] - maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend) + maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend, batch_size) pipeline_cls = test_model_param["pipeline_cls"] pipeline_kwargs = create_pipeline_kwargs(test_model_param, subset_size, test_case_name, wc_reference_data) pipeline_kwargs.update( @@ -302,7 +296,6 @@ def test_weight_compression( "no_eval": no_eval, "run_benchmark_app": run_benchmark_app, "batch_size": batch_size, - "dynamic_batch_shape": dynamic_batch_shape, } ) pipeline: BaseTestPipeline = pipeline_cls(**pipeline_kwargs) From 3ae9d2834f97bf568ca2373e3edcb34d6dfff85f Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 18 Mar 2024 15:44:28 +0100 Subject: [PATCH 106/108] add example in Readme --- tests/post_training/README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/post_training/README.md b/tests/post_training/README.md index f5487311e70..af2639c8ec7 100644 --- a/tests/post_training/README.md +++ b/tests/post_training/README.md @@ -60,7 +60,7 @@ Additional arguments: - `--fp32` to run validation of not quantized model - `--cuda` to enable CUDA_TORCH backend - `--subset-size=N` to force subset_size of calibration dataset -- `--batch-size=N` to use batch_size for calibration +- `--batch-size=N` to use batch_size for calibration. Some of the models do not support --batch-size > 1. For such models, please, use --batch-size=1. - `--benchmark` to collect throughput statistics, add `FPS` column to result.csv - `--extra-columns` to add additional columns to reports.csv: - `Stat. collection time` - time of statistic collection @@ -116,3 +116,9 @@ Run test with additional columns: ```bash pytest --data= --extra-columns tests/post_training/test_quantize_conformance.py ``` + +Run test with calibration dataset having batch-size=10 for all models: + +```bash +pytest --data= --batch-size 10 tests/post_training/test_quantize_conformance.py +``` From 5efcdb5a74042f0f70e52076acb7cd7a430253f0 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 20 Mar 2024 11:11:32 +0100 Subject: [PATCH 107/108] comments --- nncf/common/graph/utils.py | 2 +- nncf/common/tensor_statistics/aggregator.py | 2 +- nncf/quantization/advanced_parameters.py | 5 +++-- nncf/quantization/quantize_model.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/nncf/common/graph/utils.py b/nncf/common/graph/utils.py index 577d03d6ec5..60a89d0615d 100644 --- a/nncf/common/graph/utils.py +++ b/nncf/common/graph/utils.py @@ -120,7 +120,7 @@ def get_reduction_axes( channel_axes: Union[List[int], Tuple[int, ...]], shape: Union[List[int], Tuple[int, ...]] ) -> Tuple[int, ...]: """ - Returns filtered reduction axes without axes that corresponds channels. + Returns filtered reduction axes without axes that correspond to channels. :param channel_axes: Channel axes. :param shape: Shape that need to be filtered. diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index ae0245e6204..e8946100ca0 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -46,7 +46,7 @@ def __init__(self, dataset: Dataset): def _get_iterations_number(self) -> Optional[int]: """ - Returns number of iterations which in min(self.iterations_number, dataset_length). + Returns number of iterations, output number is less than min(self.iterations_number, dataset_length). :return: Number of iterations for statistics collection. """ diff --git a/nncf/quantization/advanced_parameters.py b/nncf/quantization/advanced_parameters.py index 187c26dd716..f916945a8ae 100644 --- a/nncf/quantization/advanced_parameters.py +++ b/nncf/quantization/advanced_parameters.py @@ -192,8 +192,9 @@ class AdvancedQuantizationParameters: :type disable_bias_correction: bool :param batchwise_statistics: Determines whether quantizer statistics should be calculated for each item of the batch or for the entire batch, default is None. - "None" means that if torch.DataLoader or tensorflow.Dataset was passed as a data source for the calibration - dataset, then if batch_size > 1 of the data source then batchwise_statistics = True, otherwise False. + "None" means that if torch.DataLoader or tensorflow.Dataset was passed as a data source for + the calibration dataset, then in case batch_size of the data source > 1 batchwise_statistics sets to True, + otherwise sets to False. :type batchwise_statistics: Optional[bool] :param activations_quantization_params: Quantization parameters for activations. :type activations_quantization_params: nncf.quantization.advanced_parameters.QuantizationParameters diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index d290904a399..fe8a69ace20 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -53,7 +53,7 @@ def warning_model_no_batchwise_support( no_batchwise_support_metatypes: List[OperatorMetatype], ) -> None: """ - Prints a warning message if batchwise statistics could lead to a significant accuracy drop. + Prints the warning message if batchwise statistics could lead to a significant accuracy drop. :param graph: Model's NNCFGraph. :param advanced_quantization_parameters: AdvancedQuantizationParameters. From d8ea324dc6264a4b622d72630559d08bc020aa94 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 20 Mar 2024 18:40:27 +0100 Subject: [PATCH 108/108] iterations_number -> stat_subset_size --- nncf/common/tensor_statistics/aggregator.py | 20 ++++++++++---------- tests/common/test_statistics_aggregator.py | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index e8946100ca0..910b359bff0 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -41,22 +41,22 @@ class StatisticsAggregator(ABC): def __init__(self, dataset: Dataset): self.dataset = dataset - self.iterations_number = None + self.stat_subset_size = None self.statistic_points = StatisticPointsContainer() def _get_iterations_number(self) -> Optional[int]: """ - Returns number of iterations, output number is less than min(self.iterations_number, dataset_length). + Returns number of iterations, output number is less than min(self.stat_subset_size, dataset_length). :return: Number of iterations for statistics collection. """ dataset_length = self.dataset.get_length() - if dataset_length and self.iterations_number: - if self.iterations_number > dataset_length: + if dataset_length and self.stat_subset_size: + if self.stat_subset_size > dataset_length: nncf_logger.warning(ITERATIONS_NUMBER_WARNING) return dataset_length - return self.iterations_number - return dataset_length or self.iterations_number + return self.stat_subset_size + return dataset_length or self.stat_subset_size def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: """ @@ -78,7 +78,7 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: empty_statistics = True for input_data in track( islice(self.dataset.get_inference_data(), iterations_number), - total=self.iterations_number, + total=self.stat_subset_size, description="Statistics collection", ): outputs = engine.infer(input_data) @@ -103,10 +103,10 @@ def register_statistic_points(self, statistic_points: StatisticPointsContainer) for _statistic_point in _statistic_points: for _, tensor_collectors in _statistic_point.algorithm_to_tensor_collectors.items(): for tensor_collector in tensor_collectors: - if self.iterations_number is None: - self.iterations_number = tensor_collector.num_samples + if self.stat_subset_size is None: + self.stat_subset_size = tensor_collector.num_samples elif tensor_collector.num_samples is not None: - self.iterations_number = max(self.iterations_number, tensor_collector.num_samples) + self.stat_subset_size = max(self.stat_subset_size, tensor_collector.num_samples) @abstractmethod def _register_statistics(self, outputs: Dict[str, NNCFTensor], statistic_points: StatisticPointsContainer) -> None: diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py index 519d653c5e4..2a2161abc40 100644 --- a/tests/common/test_statistics_aggregator.py +++ b/tests/common/test_statistics_aggregator.py @@ -903,7 +903,7 @@ def test_register_statistics(self, dataset_samples, statistic_point_params, mock ref_subset_size = max(ref_subset_size, subset_size) else: ref_subset_size = subset_size - assert statistics_aggregator.iterations_number == ref_subset_size + assert statistics_aggregator.stat_subset_size == ref_subset_size def test_collect_with_empty_dataset_no_len(self, dataset_samples): """