diff --git a/nncf/experimental/torch/quantization/__init__.py b/nncf/experimental/torch/quantization/__init__.py deleted file mode 100644 index 9b29b47534a..00000000000 --- a/nncf/experimental/torch/quantization/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) 2023 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nncf/experimental/torch/quantization/quantize_model.py b/nncf/experimental/torch/quantization/quantize_model.py deleted file mode 100644 index 714cc5ed702..00000000000 --- a/nncf/experimental/torch/quantization/quantize_model.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright (c) 2023 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Any, Dict, Optional, Tuple, Union - -import torch - -from nncf.common.quantization.structs import QuantizationPreset -from nncf.data import Dataset -from nncf.parameters import ModelType -from nncf.parameters import TargetDevice -from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization -from nncf.scopes import IgnoredScope -from nncf.torch.dynamic_graph.context import no_nncf_trace -from nncf.torch.dynamic_graph.io_handling import replicate_same_tensors -from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_inputs_with_objwalk -from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_outputs_with_objwalk -from nncf.torch.nested_objects_traversal import objwalk -from nncf.torch.nncf_network import NNCFNetwork -from nncf.torch.utils import get_model_device -from nncf.torch.utils import is_tensor -from nncf.torch.utils import training_mode_switcher - - -def create_nncf_network(model: torch.nn.Module, dataset: Dataset) -> NNCFNetwork: - """ - Creates NNCFNetwork instance for the PyTorch model where the first item of dataset - is used for model tracing. - - :param model: PyTorch model - :param dataset: Dataset for model tracing - :return: NNCFNetwork instance for the input model - """ - - def get_inputs(dataloader_output: Any) -> Tuple[Tuple, Dict]: - if not isinstance(dataloader_output, tuple): - dataloader_output = (dataloader_output,) - return dataloader_output, {} - - def wrap_inputs(args, kwargs): - return wrap_nncf_model_inputs_with_objwalk(args, kwargs) - - def wrap_outputs(retval): - return wrap_nncf_model_outputs_with_objwalk(retval) - - def create_dummy_forward_fn(dataset, device): - def dummy_forward(model): - with no_nncf_trace(): - args = next(iter(dataset.get_inference_data())) - args, kwargs = get_inputs(args) - - def send_to_device(tensor): - return tensor.to(device) - - args = objwalk(args, is_tensor, send_to_device) - kwargs = objwalk(kwargs, is_tensor, send_to_device) - - args, kwargs = wrap_inputs(args, kwargs) - retval = model(*args, **kwargs) - retval = replicate_same_tensors(retval) - return wrap_outputs(retval) - - return dummy_forward - - device = get_model_device(model) - dummy_forward_fn = create_dummy_forward_fn(dataset, device) - - with training_mode_switcher(model, is_training=False): - nncf_network = NNCFNetwork( - model, dummy_forward_fn=dummy_forward_fn, wrap_inputs_fn=wrap_inputs, wrap_outputs_fn=wrap_outputs - ) - - nncf_network.nncf.get_tracing_context().disable_trace_dynamic_graph() - - return nncf_network - - -def quantize_impl( - model: torch.nn.Module, - calibration_dataset: Dataset, - preset: Union[QuantizationPreset, None], - target_device: TargetDevice, - subset_size: int, - fast_bias_correction: bool, - model_type: Optional[ModelType] = None, - ignored_scope: Optional[IgnoredScope] = None, - advanced_parameters: Optional[AdvancedQuantizationParameters] = None, -) -> torch.nn.Module: - """ - Experimental implementation of the `quantize()` method for the PyTorch backend. - """ - if fast_bias_correction is False: - raise ValueError(f"fast_bias_correction={fast_bias_correction} is not supported") - if target_device == TargetDevice.CPU_SPR: - raise RuntimeError("target_device == CPU_SPR is not supported") - - nncf_network = create_nncf_network(model.eval(), calibration_dataset) - - quantization_algorithm = PostTrainingQuantization( - preset=preset, - target_device=target_device, - subset_size=subset_size, - fast_bias_correction=fast_bias_correction, - model_type=model_type, - ignored_scope=ignored_scope, - advanced_parameters=advanced_parameters, - ) - - quantized_model = quantization_algorithm.apply( - nncf_network, nncf_network.nncf.get_graph(), dataset=calibration_dataset - ) - - quantized_model.nncf.disable_dynamic_graph_building() - - return quantized_model diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index a1bc87caf9b..177128c9412 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -59,7 +59,7 @@ def quantize( - `performance`: Symmetric quantization of weights and activations. - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. Default value is None. In this case, `mixed` preset is used for `transformer` - model type otherwise `performace`. + model type otherwise `performance`. :type preset: nncf.QuantizationPreset :param target_device: A target device the specificity of which will be taken into account while compressing in order to obtain the best performance @@ -185,7 +185,7 @@ def quantize_with_accuracy_control( - `performance`: Symmetric quantization of weights and activations. - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. Default value is None. In this case, `mixed` preset is used for `transformer` - model type otherwise `performace`. + model type otherwise `performance`. :type preset: nncf.QuantizationPreset :param target_device: A target device the specificity of which will be taken into account while compressing in order to obtain the best performance @@ -317,7 +317,7 @@ def quantize_with_tune_hyperparams( - `performance`: Symmetric quantization of weights and activations. - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. Default value is None. In this case, `mixed` preset is used for `transformer` - model type otherwise `performace`. + model type otherwise `performance`. :param target_device: A target device the specificity of which will be taken into account while compressing in order to obtain the best performance for this type of device. diff --git a/nncf/torch/engine.py b/nncf/torch/engine.py index abdc25d7425..b30ba4dbef1 100644 --- a/nncf/torch/engine.py +++ b/nncf/torch/engine.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, Union +from typing import Any, Dict, Tuple, Union import torch from torch import nn @@ -32,12 +32,17 @@ def __init__(self, model: nn.Module): self._model = model self._model.eval() - def infer(self, input_data: Union[torch.Tensor, Dict[str, torch.Tensor]]) -> Union[torch.Tensor, Dict[str, Any]]: + def infer( + self, input_data: Union[torch.Tensor, Tuple[torch.Tensor], Dict[str, torch.Tensor]] + ) -> Union[torch.Tensor, Dict[str, Any]]: """ Runs Torch model on the provided input. - :param input_data: inputs for the model - :return output_data: model outputs + :param input_data: Inputs for the model. + :return: Model outputs. """ - + if isinstance(input_data, dict): + return self._model(**input_data) + if isinstance(input_data, tuple): + return self._model(*input_data) return self._model(input_data) diff --git a/nncf/torch/quantization/quantize_model.py b/nncf/torch/quantization/quantize_model.py index c37d69c3204..d0d7619c709 100644 --- a/nncf/torch/quantization/quantize_model.py +++ b/nncf/torch/quantization/quantize_model.py @@ -9,190 +9,84 @@ # See the License for the specific language governing permissions and # limitations under the License. -from copy import deepcopy from typing import Any, Dict, Optional, Tuple, Union import torch from nncf.common.quantization.structs import QuantizationPreset -from nncf.config import NNCFConfig -from nncf.config.structures import BNAdaptationInitArgs -from nncf.config.structures import QuantizationRangeInitArgs from nncf.data import Dataset from nncf.parameters import CompressWeightsMode from nncf.parameters import ModelType from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters -from nncf.quantization.advanced_parameters import apply_advanced_parameters_to_config +from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization from nncf.scopes import IgnoredScope -from nncf.scopes import convert_ignored_scope_to_list from nncf.torch.dynamic_graph.context import no_nncf_trace from nncf.torch.dynamic_graph.io_handling import replicate_same_tensors from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_inputs_with_objwalk from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_outputs_with_objwalk -from nncf.torch.initialization import PTInitializingDataLoader -from nncf.torch.model_creation import create_compressed_model from nncf.torch.nested_objects_traversal import objwalk from nncf.torch.nncf_module_replacement import replace_modules_by_nncf_modules +from nncf.torch.nncf_network import NNCFNetwork from nncf.torch.quantization.weights_compression import insert_pre_compression_operations from nncf.torch.utils import get_model_device from nncf.torch.utils import is_tensor +from nncf.torch.utils import training_mode_switcher -DEFAULT_RANGE_TYPE = "mean_min_max" - -# TODO(alexsu52): It is a workaround and should be removed. -class CalibrationDataLoader(PTInitializingDataLoader): +def create_nncf_network(model: torch.nn.Module, dataset: Dataset) -> NNCFNetwork: """ - This class wraps the nncf.Dataset. + Creates NNCFNetwork instance for the PyTorch model where the first item of dataset + is used for model tracing. - This is required for proper initialization of certain compression algorithms. + :param model: PyTorch model. + :param dataset: Dataset for model tracing. + :return: NNCFNetwork instance for the input model. """ - def __init__(self, data_loader: Dataset): - super().__init__(data_loader) - self._length = None - - @property - def batch_size(self): - data_source = getattr(self._data_loader, "_data_source") - return getattr(data_source, "batch_size", 1) - - def __iter__(self): - return iter(self._data_loader.get_inference_data()) - - def __len__(self): - if self._length is None: - data = self._data_loader.get_inference_data() - self._length = CalibrationDataLoader._get_length(data) - return self._length - - def get_inputs(self, dataloader_output: Any) -> Tuple[Tuple, Dict]: - if not isinstance(dataloader_output, tuple): - dataloader_output = (dataloader_output,) - return dataloader_output, {} - - @staticmethod - def _get_length(iterable) -> int: - length = 0 - for _ in iterable: - length = length + 1 - - return length - - -def _get_transformer_quantization_config(preset: QuantizationPreset, subset_size: int) -> Dict[str, Any]: - """ - Returns the quantization config for transformer-based models. - - :param preset: A preset that controls the quantization mode - (symmetric and asymmetric). It can take the following values: - - `performance`: Symmetric quantization of weights and activations. - - `mixed`: Symmetric quantization of weights and asymmetric - quantization of activations. - :param subset_size: Size of a subset to calculate activations - statistics used for quantization. - :return: The quantization config for transformer-based models. - """ - return { - "algorithm": "quantization", - "preset": preset.value, - "initializer": { - "range": {"num_init_samples": subset_size, "type": DEFAULT_RANGE_TYPE}, - "batchnorm_adaptation": {"num_bn_adaptation_samples": 0}, - }, - "scope_overrides": {"activations": {"{re}.*matmul_0": {"mode": "symmetric"}}}, - "ignored_scopes": [ - "{re}.*Embeddings.*", - "{re}.*__add___[0-1]", - "{re}.*layer_norm_0", - "{re}.*matmul_1", - "{re}.*__truediv__*", - ], - "overflow_fix": "first_layer_only", - } + def get_inputs(dataloader_output: Any) -> Tuple[Tuple, Dict]: + if isinstance(dataloader_output, dict): + return (), dataloader_output + if isinstance(dataloader_output, tuple): + return dataloader_output, {} + return (dataloader_output,), {} + def wrap_inputs(args, kwargs): + return wrap_nncf_model_inputs_with_objwalk(args, kwargs) -def _get_default_quantization_config(preset: QuantizationPreset, subset_size: int) -> Dict[str, Any]: - """ - Returns the default quantization config + def wrap_outputs(retval): + return wrap_nncf_model_outputs_with_objwalk(retval) - :param preset: A preset that controls the quantization mode - (symmetric and asymmetric). It can take the following values: - - `performance`: Symmetric quantization of weights and activations. - - `mixed`: Symmetric quantization of weights and asymmetric - quantization of activations. - :param subset_size: Size of a subset to calculate activations - statistics used for quantization. - :return: The default quantization config. - """ - return { - "algorithm": "quantization", - "preset": preset.value, - "initializer": { - "range": {"num_init_samples": subset_size, "type": DEFAULT_RANGE_TYPE}, - "batchnorm_adaptation": {"num_bn_adaptation_samples": subset_size}, - }, - "overflow_fix": "first_layer_only", - } + def create_dummy_forward_fn(dataset, device): + def dummy_forward(model): + with no_nncf_trace(): + data = next(iter(dataset.get_inference_data())) + args, kwargs = get_inputs(data) + def send_to_device(tensor): + return tensor.to(device) -def _create_nncf_config( - preset: Union[QuantizationPreset, None], - target_device: TargetDevice, - subset_size: int, - model_type: Union[ModelType, None], - ignored_scope: Union[IgnoredScope, None], - advanced_parameters: Union[AdvancedQuantizationParameters, None], -) -> NNCFConfig: - """ - Creates the NNCFConfig for the quantization algorithm. + args = objwalk(args, is_tensor, send_to_device) + kwargs = objwalk(kwargs, is_tensor, send_to_device) - :param preset: A preset that controls the quantization mode - (symmetric and asymmetric). It can take the following values: - - `performance`: Symmetric quantization of weights and activations. - - `mixed`: Symmetric quantization of weights and asymmetric - quantization of activations. - - `None`: `mixed` preset is used for `transformer` model type otherwise `performace`. - :param target_device: A target device the specificity of which will be taken - into account while compressing in order to obtain the best performance - for this type of device. - :param subset_size: Size of a subset to calculate activations - statistics used for quantization. - :param model_type: Model type is needed to specify additional patterns - in the model. - :param ignored_scope: An ignored scope that defined the list of model control - flow graph nodes to be ignored during quantization. - :param advanced_parameters: Advanced quantization parameters for - fine-tuning the quantization algorithm. - :return: NNCFConfig for the quantization algorithm. - """ - if preset is None: - if model_type == ModelType.TRANSFORMER: - preset = QuantizationPreset.MIXED - else: - preset = QuantizationPreset.PERFORMANCE + args, kwargs = wrap_inputs(args, kwargs) + retval = model(*args, **kwargs) + retval = replicate_same_tensors(retval) + return wrap_outputs(retval) - if model_type == ModelType.TRANSFORMER: - compression_config = _get_transformer_quantization_config(preset, subset_size) - else: - compression_config = _get_default_quantization_config(preset, subset_size) + return dummy_forward - if ignored_scope is not None: - _ignored_scope = convert_ignored_scope_to_list(ignored_scope) - if "ignored_scopes" in compression_config: - compression_config["ignored_scopes"].extend(_ignored_scope) - else: - compression_config["ignored_scopes"] = _ignored_scope - compression_config["validate_scopes"] = ignored_scope.validate + device = get_model_device(model) + dummy_forward_fn = create_dummy_forward_fn(dataset, device) - if advanced_parameters is not None: - compression_config = apply_advanced_parameters_to_config(compression_config, advanced_parameters) + with training_mode_switcher(model, is_training=False): + nncf_network = NNCFNetwork( + model, dummy_forward_fn=dummy_forward_fn, wrap_inputs_fn=wrap_inputs, wrap_outputs_fn=wrap_outputs + ) - if model_type == ModelType.TRANSFORMER: - compression_config["validate_scopes"] = False + nncf_network.nncf.get_tracing_context().disable_trace_dynamic_graph() - return NNCFConfig({"target_device": target_device.value, "compression": compression_config}) + return nncf_network def quantize_impl( @@ -210,67 +104,29 @@ def quantize_impl( Implementation of the `quantize()` method for the PyTorch backend. """ if fast_bias_correction is False: - raise ValueError(f"fast_bias_correction={fast_bias_correction} is not " "supported") - if ignored_scope is not None and ignored_scope.types: - raise RuntimeError( - "Quantization algorithm from the PyTorch backend " - "does not support operation types in the ignored " - "scopes yet" - ) + raise ValueError(f"fast_bias_correction={fast_bias_correction} is not supported") if target_device == TargetDevice.CPU_SPR: raise RuntimeError("target_device == CPU_SPR is not supported") - nncf_config = _create_nncf_config( - preset, target_device, subset_size, model_type, ignored_scope, advanced_parameters - ) + nncf_network = create_nncf_network(model.eval(), calibration_dataset) - calibration_data_loader = CalibrationDataLoader(calibration_dataset) - nncf_config.register_extra_structs( - [ - QuantizationRangeInitArgs(data_loader=calibration_data_loader), - BNAdaptationInitArgs(data_loader=calibration_data_loader), - ] + quantization_algorithm = PostTrainingQuantization( + preset=preset, + target_device=target_device, + subset_size=subset_size, + fast_bias_correction=fast_bias_correction, + model_type=model_type, + ignored_scope=ignored_scope, + advanced_parameters=advanced_parameters, ) - def wrap_inputs(args, kwargs): - return wrap_nncf_model_inputs_with_objwalk(args, kwargs) - - def wrap_outputs(retval): - return wrap_nncf_model_outputs_with_objwalk(retval) - - def create_dummy_forward_fn(data_loader, device): - def dummy_forward(model): - with no_nncf_trace(): - data_item = next(iter(data_loader)) - args, kwargs = data_loader.get_inputs(data_item) - - def send_to_device(tensor): - return tensor.to(device) - - args = objwalk(args, is_tensor, send_to_device) - kwargs = objwalk(kwargs, is_tensor, send_to_device) - - args, kwargs = wrap_inputs(args, kwargs) - retval = model(*args, **kwargs) - retval = replicate_same_tensors(retval) - return wrap_outputs(retval) - - return dummy_forward - - dummy_forward_fn = create_dummy_forward_fn(calibration_data_loader, get_model_device(model)) - - clone_model = deepcopy(model) - compression_ctrl, compressed_model = create_compressed_model( - model=clone_model, - config=nncf_config, - dummy_forward_fn=dummy_forward_fn, - wrap_inputs_fn=wrap_inputs, - wrap_outputs_fn=wrap_outputs, + quantized_model = quantization_algorithm.apply( + nncf_network, nncf_network.nncf.get_graph(), dataset=calibration_dataset ) - compression_ctrl.prepare_for_export() - compressed_model.nncf.disable_dynamic_graph_building() - return compressed_model + quantized_model.nncf.disable_dynamic_graph_building() + + return quantized_model def compress_weights_impl( diff --git a/tests/post_training/pipelines/base.py b/tests/post_training/pipelines/base.py index bc97a6a1b4b..3468028cef8 100644 --- a/tests/post_training/pipelines/base.py +++ b/tests/post_training/pipelines/base.py @@ -28,7 +28,6 @@ import nncf from nncf import TargetDevice -from nncf.experimental.torch.quantization.quantize_model import quantize_impl as pt_impl_experimental from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters DEFAULT_VAL_THREADS = 4 @@ -36,17 +35,16 @@ class BackendType(Enum): FP32 = "FP32" - OLD_TORCH = "OLD_TORCH" # Quantization via create_compressed_model - TORCH = "TORCH" # PTQ implementation + TORCH = "TORCH" ONNX = "ONNX" OV = "OV" POT = "POT" OPTIMUM = "OPTIMUM" -NNCF_PTQ_BACKENDS = [BackendType.OLD_TORCH, BackendType.TORCH, BackendType.ONNX, BackendType.OV] +NNCF_PTQ_BACKENDS = [BackendType.TORCH, BackendType.ONNX, BackendType.OV] ALL_PTQ_BACKENDS = NNCF_PTQ_BACKENDS + [BackendType.POT] -PT_BACKENDS = [BackendType.TORCH, BackendType.OLD_TORCH] +PT_BACKENDS = [BackendType.TORCH] OV_BACKENDS = [BackendType.OV, BackendType.POT, BackendType.OPTIMUM] @@ -169,23 +167,12 @@ def _quantize(self): quantizer = OVQuantizer.from_pretrained(self.model_hf) quantizer.quantize(calibration_dataset=self.calibration_dataset, save_directory=self.output_model_dir) else: - quantize_fn = nncf.quantize - if self.backend == BackendType.TORCH: - # Use experimental torch api - quantize_fn = pt_impl_experimental - if "preset" not in self.ptq_params: - self.ptq_params["preset"] = nncf.QuantizationPreset.PERFORMANCE - if "subset_size" not in self.ptq_params: - self.ptq_params["subset_size"] = 300 - if "fast_bias_correction" not in self.ptq_params: - self.ptq_params["fast_bias_correction"] = True - if self.backend == BackendType.POT: self.ptq_params["advanced_parameters"] = AdvancedQuantizationParameters( backend_params={"use_pot": True} ) - self.quantized_model = quantize_fn( + self.quantized_model = nncf.quantize( model=self.model, target_device=TargetDevice.CPU, calibration_dataset=self.calibration_dataset, @@ -198,7 +185,7 @@ def quantize(self) -> None: """ print("Quantization...") - if self.backend in [BackendType.TORCH, BackendType.OLD_TORCH]: + if self.backend in PT_BACKENDS: cpu_threads_num = os.environ.get("CPU_THREADS_NUM") if cpu_threads_num is not None: torch.set_num_threads(int(cpu_threads_num)) diff --git a/tests/post_training/pipelines/image_classification_timm.py b/tests/post_training/pipelines/image_classification_timm.py index 315a345fe9b..00171d701b7 100644 --- a/tests/post_training/pipelines/image_classification_timm.py +++ b/tests/post_training/pipelines/image_classification_timm.py @@ -121,9 +121,8 @@ def transform_fn(data_item): return transform_fn def prepare_calibration_dataset(self): - batch_size = 128 if self.backend == BackendType.OLD_TORCH else 1 dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform) - loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=2, shuffle=False) + loader = torch.utils.data.DataLoader(dataset, batch_size=1, num_workers=2, shuffle=False) self.calibration_dataset = nncf.Dataset(loader, self.get_transform_calibration_fn()) diff --git a/tests/torch/ptq/test_ptq_params.py b/tests/torch/ptq/test_ptq_params.py index 0ef53c946a1..320e8842bfc 100644 --- a/tests/torch/ptq/test_ptq_params.py +++ b/tests/torch/ptq/test_ptq_params.py @@ -12,31 +12,22 @@ import pytest from torch import nn -from nncf import NNCFConfig from nncf.common.graph.patterns import GraphPattern from nncf.common.graph.patterns.manager import PatternsManager from nncf.common.graph.transformations.commands import TargetType -from nncf.common.quantization.structs import QuantizationPreset from nncf.common.utils.backend import BackendType from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator from nncf.experimental.common.tensor_statistics.collectors import MeanAggregator from nncf.experimental.common.tensor_statistics.collectors import MinAggregator from nncf.experimental.common.tensor_statistics.collectors import TensorCollector -from nncf.parameters import ModelType from nncf.parameters import TargetDevice -from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters -from nncf.quantization.advanced_parameters import OverflowFix -from nncf.quantization.advanced_parameters import QuantizationMode -from nncf.quantization.advanced_parameters import QuantizationParameters from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization from nncf.quantization.algorithms.min_max.torch_backend import PTMinMaxAlgoBackend -from nncf.quantization.range_estimator import RangeEstimatorParametersSet from nncf.scopes import IgnoredScope from nncf.torch.graph.graph import PTTargetPoint from nncf.torch.graph.operator_metatypes import PTModuleConv2dMetatype from nncf.torch.graph.operator_metatypes import PTModuleLinearMetatype from nncf.torch.graph.operator_metatypes import PTSoftmaxMetatype -from nncf.torch.quantization.quantize_model import _create_nncf_config from tests.common.quantization.metatypes import Conv2dTestMetatype from tests.common.quantization.metatypes import LinearTestMetatype from tests.common.quantization.metatypes import SoftmaxTestMetatype @@ -175,116 +166,3 @@ def test_params(self): @pytest.fixture(params=[(IgnoredScope([]), 1, 1), (IgnoredScope(["/Conv_1_0"]), 0, 0)]) def ignored_scopes_data(self, request): return request.param - - -@pytest.mark.parametrize( - "params", - ( - { - "preset": None, - "target_device": TargetDevice.ANY, - "subset_size": 1, - "model_type": ModelType.TRANSFORMER, - "ignored_scope": IgnoredScope(), - "advanced_parameters": AdvancedQuantizationParameters(), - }, - { - "preset": None, - "target_device": TargetDevice.ANY, - "subset_size": 1, - "model_type": None, - "ignored_scope": IgnoredScope(), - "advanced_parameters": AdvancedQuantizationParameters(), - }, - { - "preset": QuantizationPreset.MIXED, - "target_device": TargetDevice.ANY, - "subset_size": 1, - "model_type": ModelType.TRANSFORMER, - "ignored_scope": IgnoredScope(names=["node_1"]), - "advanced_parameters": AdvancedQuantizationParameters( - overflow_fix=OverflowFix.DISABLE, quantize_outputs=True, disable_bias_correction=True - ), - }, - { - "preset": QuantizationPreset.MIXED, - "target_device": TargetDevice.ANY, - "subset_size": 2, - "model_type": None, - "ignored_scope": None, - "advanced_parameters": AdvancedQuantizationParameters( - overflow_fix=OverflowFix.ENABLE, quantize_outputs=False, disable_bias_correction=False - ), - }, - { - "preset": QuantizationPreset.MIXED, - "target_device": TargetDevice.ANY, - "subset_size": 3, - "model_type": None, - "ignored_scope": IgnoredScope(names=["node_1"]), - "advanced_parameters": AdvancedQuantizationParameters( - overflow_fix=OverflowFix.FIRST_LAYER, quantize_outputs=True, disable_bias_correction=False - ), - }, - { - "preset": QuantizationPreset.MIXED, - "target_device": TargetDevice.ANY, - "subset_size": 4, - "model_type": None, - "ignored_scope": IgnoredScope(names=["node_1"]), - "advanced_parameters": AdvancedQuantizationParameters( - overflow_fix=OverflowFix.FIRST_LAYER, - quantize_outputs=True, - disable_bias_correction=False, - activations_quantization_params=QuantizationParameters(num_bits=8, mode=QuantizationMode.SYMMETRIC), - activations_range_estimator_params=RangeEstimatorParametersSet.MEAN_MINMAX, - weights_quantization_params=QuantizationParameters(num_bits=8, mode=QuantizationMode.SYMMETRIC), - weights_range_estimator_params=RangeEstimatorParametersSet.MEAN_MINMAX, - ), - }, - ), -) -def test_create_nncf_config(params): - config = _create_nncf_config(**params) - - assert config["compression"]["overflow_fix"] == params["advanced_parameters"].overflow_fix.value - assert config["compression"]["quantize_outputs"] == params["advanced_parameters"].quantize_outputs - - preset = params["preset"] - if params["preset"] is None: - if params["model_type"] == ModelType.TRANSFORMER: - preset = QuantizationPreset.MIXED - else: - preset = QuantizationPreset.PERFORMANCE - - assert config["compression"]["preset"] == preset.value - - range_config = config["compression"]["initializer"]["range"] - if isinstance(range_config, dict): - assert range_config["num_init_samples"] == params["subset_size"] - assert range_config["type"] == "mean_min_max" - else: - for rc in range_config: - assert rc["num_init_samples"] == params["subset_size"] - assert rc["type"] == "mean_min_max" - - num_bn_samples = config["compression"]["initializer"]["batchnorm_adaptation"]["num_bn_adaptation_samples"] - if params["advanced_parameters"].disable_bias_correction is True or params["model_type"] == ModelType.TRANSFORMER: - assert num_bn_samples == 0 - else: - assert num_bn_samples == params["subset_size"] - - ref_scope = params["ignored_scope"].names if params["ignored_scope"] is not None else [] - if params["model_type"] == ModelType.TRANSFORMER: - ref_scope = [ - "{re}.*Embeddings.*", - "{re}.*__add___[0-1]", - "{re}.*layer_norm_0", - "{re}.*matmul_1", - "{re}.*__truediv__*", - ] + ref_scope - assert config["compression"].get("ignored_scopes", []) == ref_scope - - # To validate NNCFConfig requared input_info - config["input_info"] = {"sample_size": [1, 2, 224, 224]} - NNCFConfig.validate(config) diff --git a/tests/torch/ptq/test_quantize_model_helpers.py b/tests/torch/ptq/test_quantize_model_helpers.py index 2931c4fd8a9..a9abe856d29 100644 --- a/tests/torch/ptq/test_quantize_model_helpers.py +++ b/tests/torch/ptq/test_quantize_model_helpers.py @@ -13,7 +13,7 @@ from torch import nn from nncf.data import Dataset -from nncf.experimental.torch.quantization.quantize_model import create_nncf_network +from nncf.torch.quantization.quantize_model import create_nncf_network class TestModel(nn.Module): diff --git a/tests/torch/ptq/test_strip.py b/tests/torch/ptq/test_strip.py index eaf59c75abe..d6cab4b784e 100644 --- a/tests/torch/ptq/test_strip.py +++ b/tests/torch/ptq/test_strip.py @@ -14,7 +14,6 @@ import nncf from nncf.data import Dataset -from nncf.experimental.torch.quantization.quantize_model import quantize_impl from nncf.parameters import TargetDevice from nncf.quantization import QuantizationPreset from nncf.torch.nncf_network import ExtraCompressionModuleType @@ -70,7 +69,7 @@ def transform_fn(data_item): dataset = Dataset(RandomDatasetMock(input_size), transform_fn) - quantized_model = quantize_impl( + quantized_model = nncf.quantize( model=model, calibration_dataset=dataset, preset=QuantizationPreset.MIXED, diff --git a/tests/torch/test_transform_fn.py b/tests/torch/test_transform_fn.py index d0b44c8dce6..f0a19438cbb 100644 --- a/tests/torch/test_transform_fn.py +++ b/tests/torch/test_transform_fn.py @@ -9,6 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pytest import torch from torch import nn @@ -42,25 +43,36 @@ def single_input_transform_fn(data_item): return data_item[0] -def multiple_inputs_transform_fn(data_item): - return data_item[0], data_item[1] - - def test_transform_fn_single_input(): model = ModelWithSingleInput() # Check the transformation function - _ = model(single_input_transform_fn(next(iter(dataloader)))) + model(single_input_transform_fn(next(iter(dataloader)))) # Start quantization calibration_dataset = nncf.Dataset(dataloader, single_input_transform_fn) - _ = nncf.quantize(model, calibration_dataset) + nncf.quantize(model, calibration_dataset) + + +def multiple_inputs_transform_tuple_fn(data_item): + return data_item[0], data_item[1] + + +def multiple_inputs_transform_dict_fn(data_item): + return {"input_0": data_item[0], "input_1": data_item[1]} -def test_transform_fn_multiple_inputs(): +@pytest.mark.parametrize( + "transform_fn", (multiple_inputs_transform_tuple_fn, multiple_inputs_transform_dict_fn), ids=["tuple", "dict"] +) +def test_transform_fn_multiple_inputs(transform_fn): model = ModelWithMultipleInputs() # Check the transformation function - _ = model(*multiple_inputs_transform_fn(next(iter(dataloader)))) + input_data = transform_fn(next(iter(dataloader))) + if isinstance(input_data, tuple): + model(*input_data) + if isinstance(input_data, dict): + model(**input_data) # Start quantization - calibration_dataset = nncf.Dataset(dataloader, multiple_inputs_transform_fn) - _ = nncf.quantize(model, calibration_dataset) + calibration_dataset = nncf.Dataset(dataloader, transform_fn) + nncf.quantize(model, calibration_dataset)