diff --git a/nncf/experimental/torch/quantization/__init__.py b/nncf/experimental/torch/quantization/__init__.py deleted file mode 100644 index 9b29b47534a..00000000000 --- a/nncf/experimental/torch/quantization/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) 2023 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nncf/experimental/torch/quantization/quantize_model.py b/nncf/experimental/torch/quantization/quantize_model.py deleted file mode 100644 index 714cc5ed702..00000000000 --- a/nncf/experimental/torch/quantization/quantize_model.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright (c) 2023 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Any, Dict, Optional, Tuple, Union - -import torch - -from nncf.common.quantization.structs import QuantizationPreset -from nncf.data import Dataset -from nncf.parameters import ModelType -from nncf.parameters import TargetDevice -from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization -from nncf.scopes import IgnoredScope -from nncf.torch.dynamic_graph.context import no_nncf_trace -from nncf.torch.dynamic_graph.io_handling import replicate_same_tensors -from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_inputs_with_objwalk -from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_outputs_with_objwalk -from nncf.torch.nested_objects_traversal import objwalk -from nncf.torch.nncf_network import NNCFNetwork -from nncf.torch.utils import get_model_device -from nncf.torch.utils import is_tensor -from nncf.torch.utils import training_mode_switcher - - -def create_nncf_network(model: torch.nn.Module, dataset: Dataset) -> NNCFNetwork: - """ - Creates NNCFNetwork instance for the PyTorch model where the first item of dataset - is used for model tracing. - - :param model: PyTorch model - :param dataset: Dataset for model tracing - :return: NNCFNetwork instance for the input model - """ - - def get_inputs(dataloader_output: Any) -> Tuple[Tuple, Dict]: - if not isinstance(dataloader_output, tuple): - dataloader_output = (dataloader_output,) - return dataloader_output, {} - - def wrap_inputs(args, kwargs): - return wrap_nncf_model_inputs_with_objwalk(args, kwargs) - - def wrap_outputs(retval): - return wrap_nncf_model_outputs_with_objwalk(retval) - - def create_dummy_forward_fn(dataset, device): - def dummy_forward(model): - with no_nncf_trace(): - args = next(iter(dataset.get_inference_data())) - args, kwargs = get_inputs(args) - - def send_to_device(tensor): - return tensor.to(device) - - args = objwalk(args, is_tensor, send_to_device) - kwargs = objwalk(kwargs, is_tensor, send_to_device) - - args, kwargs = wrap_inputs(args, kwargs) - retval = model(*args, **kwargs) - retval = replicate_same_tensors(retval) - return wrap_outputs(retval) - - return dummy_forward - - device = get_model_device(model) - dummy_forward_fn = create_dummy_forward_fn(dataset, device) - - with training_mode_switcher(model, is_training=False): - nncf_network = NNCFNetwork( - model, dummy_forward_fn=dummy_forward_fn, wrap_inputs_fn=wrap_inputs, wrap_outputs_fn=wrap_outputs - ) - - nncf_network.nncf.get_tracing_context().disable_trace_dynamic_graph() - - return nncf_network - - -def quantize_impl( - model: torch.nn.Module, - calibration_dataset: Dataset, - preset: Union[QuantizationPreset, None], - target_device: TargetDevice, - subset_size: int, - fast_bias_correction: bool, - model_type: Optional[ModelType] = None, - ignored_scope: Optional[IgnoredScope] = None, - advanced_parameters: Optional[AdvancedQuantizationParameters] = None, -) -> torch.nn.Module: - """ - Experimental implementation of the `quantize()` method for the PyTorch backend. - """ - if fast_bias_correction is False: - raise ValueError(f"fast_bias_correction={fast_bias_correction} is not supported") - if target_device == TargetDevice.CPU_SPR: - raise RuntimeError("target_device == CPU_SPR is not supported") - - nncf_network = create_nncf_network(model.eval(), calibration_dataset) - - quantization_algorithm = PostTrainingQuantization( - preset=preset, - target_device=target_device, - subset_size=subset_size, - fast_bias_correction=fast_bias_correction, - model_type=model_type, - ignored_scope=ignored_scope, - advanced_parameters=advanced_parameters, - ) - - quantized_model = quantization_algorithm.apply( - nncf_network, nncf_network.nncf.get_graph(), dataset=calibration_dataset - ) - - quantized_model.nncf.disable_dynamic_graph_building() - - return quantized_model diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index a1bc87caf9b..177128c9412 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -59,7 +59,7 @@ def quantize( - `performance`: Symmetric quantization of weights and activations. - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. Default value is None. In this case, `mixed` preset is used for `transformer` - model type otherwise `performace`. + model type otherwise `performance`. :type preset: nncf.QuantizationPreset :param target_device: A target device the specificity of which will be taken into account while compressing in order to obtain the best performance @@ -185,7 +185,7 @@ def quantize_with_accuracy_control( - `performance`: Symmetric quantization of weights and activations. - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. Default value is None. In this case, `mixed` preset is used for `transformer` - model type otherwise `performace`. + model type otherwise `performance`. :type preset: nncf.QuantizationPreset :param target_device: A target device the specificity of which will be taken into account while compressing in order to obtain the best performance @@ -317,7 +317,7 @@ def quantize_with_tune_hyperparams( - `performance`: Symmetric quantization of weights and activations. - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. Default value is None. In this case, `mixed` preset is used for `transformer` - model type otherwise `performace`. + model type otherwise `performance`. :param target_device: A target device the specificity of which will be taken into account while compressing in order to obtain the best performance for this type of device. diff --git a/nncf/torch/quantization/quantize_model.py b/nncf/torch/quantization/quantize_model.py index c37d69c3204..747afc9b21e 100644 --- a/nncf/torch/quantization/quantize_model.py +++ b/nncf/torch/quantization/quantize_model.py @@ -9,190 +9,79 @@ # See the License for the specific language governing permissions and # limitations under the License. -from copy import deepcopy from typing import Any, Dict, Optional, Tuple, Union import torch from nncf.common.quantization.structs import QuantizationPreset -from nncf.config import NNCFConfig -from nncf.config.structures import BNAdaptationInitArgs -from nncf.config.structures import QuantizationRangeInitArgs from nncf.data import Dataset -from nncf.parameters import CompressWeightsMode from nncf.parameters import ModelType from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters -from nncf.quantization.advanced_parameters import apply_advanced_parameters_to_config +from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization from nncf.scopes import IgnoredScope -from nncf.scopes import convert_ignored_scope_to_list from nncf.torch.dynamic_graph.context import no_nncf_trace from nncf.torch.dynamic_graph.io_handling import replicate_same_tensors from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_inputs_with_objwalk from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_outputs_with_objwalk -from nncf.torch.initialization import PTInitializingDataLoader -from nncf.torch.model_creation import create_compressed_model from nncf.torch.nested_objects_traversal import objwalk -from nncf.torch.nncf_module_replacement import replace_modules_by_nncf_modules -from nncf.torch.quantization.weights_compression import insert_pre_compression_operations +from nncf.torch.nncf_network import NNCFNetwork from nncf.torch.utils import get_model_device from nncf.torch.utils import is_tensor +from nncf.torch.utils import training_mode_switcher -DEFAULT_RANGE_TYPE = "mean_min_max" - -# TODO(alexsu52): It is a workaround and should be removed. -class CalibrationDataLoader(PTInitializingDataLoader): +def create_nncf_network(model: torch.nn.Module, dataset: Dataset) -> NNCFNetwork: """ - This class wraps the nncf.Dataset. + Creates NNCFNetwork instance for the PyTorch model where the first item of dataset + is used for model tracing. - This is required for proper initialization of certain compression algorithms. + :param model: PyTorch model + :param dataset: Dataset for model tracing + :return: NNCFNetwork instance for the input model """ - def __init__(self, data_loader: Dataset): - super().__init__(data_loader) - self._length = None - - @property - def batch_size(self): - data_source = getattr(self._data_loader, "_data_source") - return getattr(data_source, "batch_size", 1) - - def __iter__(self): - return iter(self._data_loader.get_inference_data()) - - def __len__(self): - if self._length is None: - data = self._data_loader.get_inference_data() - self._length = CalibrationDataLoader._get_length(data) - return self._length - - def get_inputs(self, dataloader_output: Any) -> Tuple[Tuple, Dict]: + def get_inputs(dataloader_output: Any) -> Tuple[Tuple, Dict]: if not isinstance(dataloader_output, tuple): dataloader_output = (dataloader_output,) return dataloader_output, {} - @staticmethod - def _get_length(iterable) -> int: - length = 0 - for _ in iterable: - length = length + 1 - - return length - - -def _get_transformer_quantization_config(preset: QuantizationPreset, subset_size: int) -> Dict[str, Any]: - """ - Returns the quantization config for transformer-based models. - - :param preset: A preset that controls the quantization mode - (symmetric and asymmetric). It can take the following values: - - `performance`: Symmetric quantization of weights and activations. - - `mixed`: Symmetric quantization of weights and asymmetric - quantization of activations. - :param subset_size: Size of a subset to calculate activations - statistics used for quantization. - :return: The quantization config for transformer-based models. - """ - return { - "algorithm": "quantization", - "preset": preset.value, - "initializer": { - "range": {"num_init_samples": subset_size, "type": DEFAULT_RANGE_TYPE}, - "batchnorm_adaptation": {"num_bn_adaptation_samples": 0}, - }, - "scope_overrides": {"activations": {"{re}.*matmul_0": {"mode": "symmetric"}}}, - "ignored_scopes": [ - "{re}.*Embeddings.*", - "{re}.*__add___[0-1]", - "{re}.*layer_norm_0", - "{re}.*matmul_1", - "{re}.*__truediv__*", - ], - "overflow_fix": "first_layer_only", - } - + def wrap_inputs(args, kwargs): + return wrap_nncf_model_inputs_with_objwalk(args, kwargs) -def _get_default_quantization_config(preset: QuantizationPreset, subset_size: int) -> Dict[str, Any]: - """ - Returns the default quantization config + def wrap_outputs(retval): + return wrap_nncf_model_outputs_with_objwalk(retval) - :param preset: A preset that controls the quantization mode - (symmetric and asymmetric). It can take the following values: - - `performance`: Symmetric quantization of weights and activations. - - `mixed`: Symmetric quantization of weights and asymmetric - quantization of activations. - :param subset_size: Size of a subset to calculate activations - statistics used for quantization. - :return: The default quantization config. - """ - return { - "algorithm": "quantization", - "preset": preset.value, - "initializer": { - "range": {"num_init_samples": subset_size, "type": DEFAULT_RANGE_TYPE}, - "batchnorm_adaptation": {"num_bn_adaptation_samples": subset_size}, - }, - "overflow_fix": "first_layer_only", - } + def create_dummy_forward_fn(dataset, device): + def dummy_forward(model): + with no_nncf_trace(): + args = next(iter(dataset.get_inference_data())) + args, kwargs = get_inputs(args) + def send_to_device(tensor): + return tensor.to(device) -def _create_nncf_config( - preset: Union[QuantizationPreset, None], - target_device: TargetDevice, - subset_size: int, - model_type: Union[ModelType, None], - ignored_scope: Union[IgnoredScope, None], - advanced_parameters: Union[AdvancedQuantizationParameters, None], -) -> NNCFConfig: - """ - Creates the NNCFConfig for the quantization algorithm. + args = objwalk(args, is_tensor, send_to_device) + kwargs = objwalk(kwargs, is_tensor, send_to_device) - :param preset: A preset that controls the quantization mode - (symmetric and asymmetric). It can take the following values: - - `performance`: Symmetric quantization of weights and activations. - - `mixed`: Symmetric quantization of weights and asymmetric - quantization of activations. - - `None`: `mixed` preset is used for `transformer` model type otherwise `performace`. - :param target_device: A target device the specificity of which will be taken - into account while compressing in order to obtain the best performance - for this type of device. - :param subset_size: Size of a subset to calculate activations - statistics used for quantization. - :param model_type: Model type is needed to specify additional patterns - in the model. - :param ignored_scope: An ignored scope that defined the list of model control - flow graph nodes to be ignored during quantization. - :param advanced_parameters: Advanced quantization parameters for - fine-tuning the quantization algorithm. - :return: NNCFConfig for the quantization algorithm. - """ - if preset is None: - if model_type == ModelType.TRANSFORMER: - preset = QuantizationPreset.MIXED - else: - preset = QuantizationPreset.PERFORMANCE + args, kwargs = wrap_inputs(args, kwargs) + retval = model(*args, **kwargs) + retval = replicate_same_tensors(retval) + return wrap_outputs(retval) - if model_type == ModelType.TRANSFORMER: - compression_config = _get_transformer_quantization_config(preset, subset_size) - else: - compression_config = _get_default_quantization_config(preset, subset_size) + return dummy_forward - if ignored_scope is not None: - _ignored_scope = convert_ignored_scope_to_list(ignored_scope) - if "ignored_scopes" in compression_config: - compression_config["ignored_scopes"].extend(_ignored_scope) - else: - compression_config["ignored_scopes"] = _ignored_scope - compression_config["validate_scopes"] = ignored_scope.validate + device = get_model_device(model) + dummy_forward_fn = create_dummy_forward_fn(dataset, device) - if advanced_parameters is not None: - compression_config = apply_advanced_parameters_to_config(compression_config, advanced_parameters) + with training_mode_switcher(model, is_training=False): + nncf_network = NNCFNetwork( + model, dummy_forward_fn=dummy_forward_fn, wrap_inputs_fn=wrap_inputs, wrap_outputs_fn=wrap_outputs + ) - if model_type == ModelType.TRANSFORMER: - compression_config["validate_scopes"] = False + nncf_network.nncf.get_tracing_context().disable_trace_dynamic_graph() - return NNCFConfig({"target_device": target_device.value, "compression": compression_config}) + return nncf_network def quantize_impl( @@ -210,99 +99,26 @@ def quantize_impl( Implementation of the `quantize()` method for the PyTorch backend. """ if fast_bias_correction is False: - raise ValueError(f"fast_bias_correction={fast_bias_correction} is not " "supported") - if ignored_scope is not None and ignored_scope.types: - raise RuntimeError( - "Quantization algorithm from the PyTorch backend " - "does not support operation types in the ignored " - "scopes yet" - ) + raise ValueError(f"fast_bias_correction={fast_bias_correction} is not supported") if target_device == TargetDevice.CPU_SPR: raise RuntimeError("target_device == CPU_SPR is not supported") - nncf_config = _create_nncf_config( - preset, target_device, subset_size, model_type, ignored_scope, advanced_parameters - ) + nncf_network = create_nncf_network(model.eval(), calibration_dataset) - calibration_data_loader = CalibrationDataLoader(calibration_dataset) - nncf_config.register_extra_structs( - [ - QuantizationRangeInitArgs(data_loader=calibration_data_loader), - BNAdaptationInitArgs(data_loader=calibration_data_loader), - ] + quantization_algorithm = PostTrainingQuantization( + preset=preset, + target_device=target_device, + subset_size=subset_size, + fast_bias_correction=fast_bias_correction, + model_type=model_type, + ignored_scope=ignored_scope, + advanced_parameters=advanced_parameters, ) - def wrap_inputs(args, kwargs): - return wrap_nncf_model_inputs_with_objwalk(args, kwargs) - - def wrap_outputs(retval): - return wrap_nncf_model_outputs_with_objwalk(retval) - - def create_dummy_forward_fn(data_loader, device): - def dummy_forward(model): - with no_nncf_trace(): - data_item = next(iter(data_loader)) - args, kwargs = data_loader.get_inputs(data_item) - - def send_to_device(tensor): - return tensor.to(device) - - args = objwalk(args, is_tensor, send_to_device) - kwargs = objwalk(kwargs, is_tensor, send_to_device) - - args, kwargs = wrap_inputs(args, kwargs) - retval = model(*args, **kwargs) - retval = replicate_same_tensors(retval) - return wrap_outputs(retval) - - return dummy_forward - - dummy_forward_fn = create_dummy_forward_fn(calibration_data_loader, get_model_device(model)) - - clone_model = deepcopy(model) - compression_ctrl, compressed_model = create_compressed_model( - model=clone_model, - config=nncf_config, - dummy_forward_fn=dummy_forward_fn, - wrap_inputs_fn=wrap_inputs, - wrap_outputs_fn=wrap_outputs, + quantized_model = quantization_algorithm.apply( + nncf_network, nncf_network.nncf.get_graph(), dataset=calibration_dataset ) - compression_ctrl.prepare_for_export() - compressed_model.nncf.disable_dynamic_graph_building() - return compressed_model - - -def compress_weights_impl( - model: torch.nn.Module, - mode=CompressWeightsMode.INT8, - ratio: Optional[float] = None, - group_size: Optional[int] = None, - ignored_scope: Optional[IgnoredScope] = None, -) -> torch.nn.Module: - """ - Implementation of the `compress_weights()` method for the PyTorch backend. Currently it supports INT8 - mode only with default ratio and group_size. - - :param model: a Torch model for compression. - :param mode: Defines a mode for weight compression. - INT8 stands for 8-bit integer quantization of all weights. - NF4 stands for a mixed-precision weights quantization to NF4 data type. The first and last layers - are always compressed to a backup precision which is 8-bit integer by default. All others are quantized whether - to NF4 or to a backup precision depending on criteria and the given ratio. - :param ratio: the ratio between baseline and backup precisions (e.g. 0.9 means 90% of layers quantized to NF4 - and the rest to INT8). - :param group_size: number of weights (e.g. 128) in the channel dimension that share quantization parameters (scale). - The value -1 means no grouping. - :param ignored_scope: An ignored scope that defined the list of model control - flow graph nodes to be ignored during quantization. - :return: The non-trainable model with compressed weights and dequantization operations. - """ - if ignored_scope is not None: - raise AttributeError("Torch backend does not support ignored scope.") - if mode != CompressWeightsMode.INT8: - raise AttributeError(f"Torch backend supports only INT8 mode for weight compression, but given {mode} mode.") - compressed_model, _ = replace_modules_by_nncf_modules(model) - insert_pre_compression_operations(model) + quantized_model.nncf.disable_dynamic_graph_building() - return compressed_model + return quantized_model diff --git a/tests/post_training/pipelines/base.py b/tests/post_training/pipelines/base.py index bc97a6a1b4b..3468028cef8 100644 --- a/tests/post_training/pipelines/base.py +++ b/tests/post_training/pipelines/base.py @@ -28,7 +28,6 @@ import nncf from nncf import TargetDevice -from nncf.experimental.torch.quantization.quantize_model import quantize_impl as pt_impl_experimental from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters DEFAULT_VAL_THREADS = 4 @@ -36,17 +35,16 @@ class BackendType(Enum): FP32 = "FP32" - OLD_TORCH = "OLD_TORCH" # Quantization via create_compressed_model - TORCH = "TORCH" # PTQ implementation + TORCH = "TORCH" ONNX = "ONNX" OV = "OV" POT = "POT" OPTIMUM = "OPTIMUM" -NNCF_PTQ_BACKENDS = [BackendType.OLD_TORCH, BackendType.TORCH, BackendType.ONNX, BackendType.OV] +NNCF_PTQ_BACKENDS = [BackendType.TORCH, BackendType.ONNX, BackendType.OV] ALL_PTQ_BACKENDS = NNCF_PTQ_BACKENDS + [BackendType.POT] -PT_BACKENDS = [BackendType.TORCH, BackendType.OLD_TORCH] +PT_BACKENDS = [BackendType.TORCH] OV_BACKENDS = [BackendType.OV, BackendType.POT, BackendType.OPTIMUM] @@ -169,23 +167,12 @@ def _quantize(self): quantizer = OVQuantizer.from_pretrained(self.model_hf) quantizer.quantize(calibration_dataset=self.calibration_dataset, save_directory=self.output_model_dir) else: - quantize_fn = nncf.quantize - if self.backend == BackendType.TORCH: - # Use experimental torch api - quantize_fn = pt_impl_experimental - if "preset" not in self.ptq_params: - self.ptq_params["preset"] = nncf.QuantizationPreset.PERFORMANCE - if "subset_size" not in self.ptq_params: - self.ptq_params["subset_size"] = 300 - if "fast_bias_correction" not in self.ptq_params: - self.ptq_params["fast_bias_correction"] = True - if self.backend == BackendType.POT: self.ptq_params["advanced_parameters"] = AdvancedQuantizationParameters( backend_params={"use_pot": True} ) - self.quantized_model = quantize_fn( + self.quantized_model = nncf.quantize( model=self.model, target_device=TargetDevice.CPU, calibration_dataset=self.calibration_dataset, @@ -198,7 +185,7 @@ def quantize(self) -> None: """ print("Quantization...") - if self.backend in [BackendType.TORCH, BackendType.OLD_TORCH]: + if self.backend in PT_BACKENDS: cpu_threads_num = os.environ.get("CPU_THREADS_NUM") if cpu_threads_num is not None: torch.set_num_threads(int(cpu_threads_num)) diff --git a/tests/post_training/pipelines/image_classification_timm.py b/tests/post_training/pipelines/image_classification_timm.py index 315a345fe9b..00171d701b7 100644 --- a/tests/post_training/pipelines/image_classification_timm.py +++ b/tests/post_training/pipelines/image_classification_timm.py @@ -121,9 +121,8 @@ def transform_fn(data_item): return transform_fn def prepare_calibration_dataset(self): - batch_size = 128 if self.backend == BackendType.OLD_TORCH else 1 dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform) - loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=2, shuffle=False) + loader = torch.utils.data.DataLoader(dataset, batch_size=1, num_workers=2, shuffle=False) self.calibration_dataset = nncf.Dataset(loader, self.get_transform_calibration_fn()) diff --git a/tests/torch/ptq/test_quantize_model_helpers.py b/tests/torch/ptq/test_quantize_model_helpers.py index 2931c4fd8a9..a9abe856d29 100644 --- a/tests/torch/ptq/test_quantize_model_helpers.py +++ b/tests/torch/ptq/test_quantize_model_helpers.py @@ -13,7 +13,7 @@ from torch import nn from nncf.data import Dataset -from nncf.experimental.torch.quantization.quantize_model import create_nncf_network +from nncf.torch.quantization.quantize_model import create_nncf_network class TestModel(nn.Module): diff --git a/tests/torch/ptq/test_strip.py b/tests/torch/ptq/test_strip.py index eaf59c75abe..d6cab4b784e 100644 --- a/tests/torch/ptq/test_strip.py +++ b/tests/torch/ptq/test_strip.py @@ -14,7 +14,6 @@ import nncf from nncf.data import Dataset -from nncf.experimental.torch.quantization.quantize_model import quantize_impl from nncf.parameters import TargetDevice from nncf.quantization import QuantizationPreset from nncf.torch.nncf_network import ExtraCompressionModuleType @@ -70,7 +69,7 @@ def transform_fn(data_item): dataset = Dataset(RandomDatasetMock(input_size), transform_fn) - quantized_model = quantize_impl( + quantized_model = nncf.quantize( model=model, calibration_dataset=dataset, preset=QuantizationPreset.MIXED,