Skip to content

Commit

Permalink
QuantizationPreset.MIXED for ModelType.TRANSFORMER by default (#2216)
Browse files Browse the repository at this point in the history
### Changes

Made the QuantizationPreset.MIXED preset for ModelType.TRANSFORMER by
default.

### Reason for changes

Quantization of transformer-based models with QuantizationPreset.MIXED
preset shows the best accuracy w/o performance degradation.

### Related tickets

ref: 123235

### Tests
test_create_nncf_config
test_quantization_preset
  • Loading branch information
alexsu52 authored Oct 25, 2023
1 parent 9cc57c0 commit d264494
Show file tree
Hide file tree
Showing 13 changed files with 165 additions and 60 deletions.
4 changes: 2 additions & 2 deletions nncf/experimental/torch/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any, Dict, Optional, Tuple
from typing import Any, Dict, Optional, Tuple, Union

import torch

Expand Down Expand Up @@ -87,7 +87,7 @@ def send_to_device(tensor):
def quantize_impl(
model: torch.nn.Module,
calibration_dataset: Dataset,
preset: QuantizationPreset,
preset: Union[QuantizationPreset, None],
target_device: TargetDevice,
subset_size: int,
fast_bias_correction: bool,
Expand Down
4 changes: 2 additions & 2 deletions nncf/onnx/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional
from typing import Optional, Union

import onnx

Expand All @@ -31,7 +31,7 @@
def quantize_impl(
model: onnx.ModelProto,
calibration_dataset: Dataset,
preset: QuantizationPreset,
preset: Union[QuantizationPreset, None],
target_device: TargetDevice,
subset_size: int,
fast_bias_correction: bool,
Expand Down
25 changes: 14 additions & 11 deletions nncf/openvino/pot/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import logging
import tempfile
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, Optional
from typing import Any, Callable, Dict, Iterable, Optional, Union

import openvino.runtime as ov
from openvino._offline_transformations import compress_quantize_weights_transformation
Expand Down Expand Up @@ -192,22 +192,22 @@ def _create_quantization_group_config(


def _create_quantization_config(
preset: QuantizationPreset,
preset: Union[QuantizationPreset, None],
target_device: TargetDevice,
subset_size: int,
fast_bias_correction: bool,
model_type: Optional[ModelType],
ignored_scope: Optional[IgnoredScope],
advanced_parameters: Optional[AdvancedQuantizationParameters],
model_type: Union[ModelType, None],
ignored_scope: Union[IgnoredScope, None],
advanced_parameters: Union[AdvancedQuantizationParameters, None],
) -> Dict[str, Any]:
"""
Creates a quantization configuration.
:param preset: A preset that controls the quantization mode
(symmetric and asymmetric). It can take the following values:
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
It can take the following values:
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric
quantization of activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
- `None`: `mixed` preset is used for `transformer` model type otherwise `performace`.
:param target_device: A target device the specificity of which will be
taken into account while compressing in order to obtain the best
performance for this type of device.
Expand All @@ -224,6 +224,9 @@ def _create_quantization_config(
fine-tuning the quantization algorithm.
:return: A POT quantization configuration as dict.
"""
if preset is None:
preset = QuantizationPreset.MIXED if model_type == ModelType.TRANSFORMER else QuantizationPreset.PERFORMANCE

config = {
"target_device": target_device.value,
"preset": preset.value,
Expand Down Expand Up @@ -320,7 +323,7 @@ def _create_engine_config(
def quantize_impl(
model: ov.Model,
calibration_dataset: Dataset,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down Expand Up @@ -423,7 +426,7 @@ def quantize_with_accuracy_control_impl(
validation_fn: Callable[[ov.CompiledModel, Iterable[Any]], float],
max_drop: float = 0.01,
drop_type: DropType = DropType.ABSOLUTE,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down
16 changes: 8 additions & 8 deletions nncf/openvino/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def dump_parameters(model: ov.Model, parameters: Dict, path: Optional[List] = No
def native_quantize_if_op_impl(
model: ov.Model,
calibration_dataset: Dataset,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down Expand Up @@ -138,7 +138,7 @@ def native_quantize_if_op_impl(
dump_parameters(
quantized_model,
{
"preset": preset.value,
"preset": preset,
"target_device": target_device.value,
"subset_size": subset_size,
"fast_bias_correction": fast_bias_correction,
Expand All @@ -154,7 +154,7 @@ def native_quantize_if_op_impl(
def native_quantize_impl(
model: ov.Model,
calibration_dataset: Dataset,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down Expand Up @@ -184,7 +184,7 @@ def native_quantize_impl(
dump_parameters(
quantized_model,
{
"preset": preset.value,
"preset": preset,
"target_device": target_device.value,
"subset_size": subset_size,
"fast_bias_correction": fast_bias_correction,
Expand All @@ -206,7 +206,7 @@ def native_quantize_with_accuracy_control_impl(
validation_fn: Callable[[Any, Iterable[Any]], Tuple[float, Union[None, List[float], List[List[TTensor]]]]],
max_drop: float = 0.01,
drop_type: DropType = DropType.ABSOLUTE,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down Expand Up @@ -321,7 +321,7 @@ def native_quantize_with_accuracy_control_impl(
dump_parameters(
quantized_model,
{
"preset": preset.value,
"preset": preset,
"target_device": target_device.value,
"subset_size": subset_size,
"fast_bias_correction": fast_bias_correction,
Expand All @@ -339,7 +339,7 @@ def native_quantize_with_accuracy_control_impl(
def quantize_impl(
model: ov.Model,
calibration_dataset: Dataset,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down Expand Up @@ -396,7 +396,7 @@ def quantize_with_accuracy_control_impl(
validation_fn: Callable[[Any, Iterable[Any]], float],
max_drop: float = 0.01,
drop_type: DropType = DropType.ABSOLUTE,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down
17 changes: 14 additions & 3 deletions nncf/quantization/algorithms/min_max/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class MinMaxQuantization(Algorithm):

def __init__(
self,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
model_type: Optional[ModelType] = None,
Expand All @@ -111,8 +111,12 @@ def __init__(
backend_params: Optional[Dict[str, Any]] = None,
):
"""
:param preset: A preset that controls the quantization mode,
defaults to QuantizationPreset.PERFORMANCE.
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
It can take the following values:
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
Default value is None. In this case, `mixed` preset is used for `transformer`
model type otherwise `performace`.
:param target_device: A target device the specificity of which will be taken
into account while compressing in order to obtain the best performance
for this type of device, defaults to TargetDevice.ANY.
Expand Down Expand Up @@ -157,6 +161,13 @@ def __init__(
QuantizerGroup.ACTIVATIONS: activations_range_estimator_params,
}

# preset definition
if preset is None:
if model_type == ModelType.TRANSFORMER:
preset = QuantizationPreset.MIXED
else:
preset = QuantizationPreset.PERFORMANCE

# Calculates global quantizer constraints
self._global_quantizer_constraints = {}
for quantizer_group in QuantizerGroup:
Expand Down
11 changes: 6 additions & 5 deletions nncf/quantization/algorithms/post_training/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class PostTrainingQuantization(Algorithm):

def __init__(
self,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand All @@ -47,11 +47,12 @@ def __init__(
advanced_parameters: Optional[AdvancedQuantizationParameters] = None,
):
"""
:param preset: A preset that controls the quantization mode
(symmetric and asymmetric). It can take the following values:
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
It can take the following values:
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric
quantization of activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
Default value is None. In this case, `mixed` preset is used for `transformer`
model type otherwise `performace`.
:param target_device: A target device the specificity of which will be taken
into account while compressing in order to obtain the best performance
for this type of device.
Expand Down
11 changes: 6 additions & 5 deletions nncf/quantization/algorithms/post_training/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@


def create_ptq_pipeline(
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand All @@ -47,11 +47,12 @@ def create_ptq_pipeline(
3) MinMaxQuantization
4) FastBiasCorrection or BiasCorrection
:param preset: A preset that controls the quantization mode
(symmetric and asymmetric). It can take the following values:
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
It can take the following values:
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric
quantization of activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
Default value is None. In this case, `mixed` preset is used for `transformer`
model type otherwise `performace`.
:param target_device: A target device the specificity of which will be taken
into account while compressing in order to obtain the best performance
for this type of device.
Expand Down
29 changes: 20 additions & 9 deletions nncf/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
def quantize(
model: TModel,
calibration_dataset: Dataset,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand All @@ -54,11 +54,12 @@ def quantize(
:param calibration_dataset: A representative dataset for the
calibration process.
:type calibration_dataset: nncf.Dataset
:param preset: A preset that controls the quantization mode
(symmetric and asymmetric). It can take the following values:
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
It can take the following values:
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric
quantization of activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
Default value is None. In this case, `mixed` preset is used for `transformer`
model type otherwise `performace`.
:type preset: nncf.QuantizationPreset
:param target_device: A target device the specificity of which will be taken
into account while compressing in order to obtain the best performance
Expand Down Expand Up @@ -152,7 +153,7 @@ def quantize_with_accuracy_control(
validation_fn: Callable[[Any, Iterable[Any]], float],
max_drop: float = 0.01,
drop_type: DropType = DropType.ABSOLUTE,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand All @@ -179,7 +180,12 @@ def quantize_with_accuracy_control(
:param max_drop: The maximum accuracy drop that should be achieved after the quantization.
:param drop_type: The accuracy drop type, which determines how the maximum accuracy
drop between the original model and the compressed model is calculated.
:param preset: A preset that controls the quantization mode.
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
It can take the following values:
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
Default value is None. In this case, `mixed` preset is used for `transformer`
model type otherwise `performace`.
:type preset: nncf.QuantizationPreset
:param target_device: A target device the specificity of which will be taken
into account while compressing in order to obtain the best performance
Expand Down Expand Up @@ -288,7 +294,7 @@ def quantize_with_tune_hyperparams(
initial_metric_results: MetricResults,
quantized_metric_results: MetricResults,
tuner_subset_size: int = 300,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand All @@ -306,7 +312,12 @@ def quantize_with_tune_hyperparams(
:param initial_metric_results: Initial metric results.
:param quantized_metric_results: Quantized metric results.
:param tuner_subset_size: Tuner subset size.
:param preset: A preset that controls the quantization mode.
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
It can take the following values:
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
Default value is None. In this case, `mixed` preset is used for `transformer`
model type otherwise `performace`.
:param target_device: A target device the specificity of which will be taken
into account while compressing in order to obtain the best performance
for this type of device.
Expand Down
7 changes: 5 additions & 2 deletions nncf/tensorflow/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any, Dict, Optional
from typing import Any, Dict, Optional, Union

import tensorflow as tf

Expand Down Expand Up @@ -133,7 +133,7 @@ def _create_nncf_config(
def quantize_impl(
model: tf.Module,
calibration_dataset: Dataset,
preset: QuantizationPreset,
preset: Union[QuantizationPreset, None],
target_device: TargetDevice,
subset_size: int,
fast_bias_correction: bool,
Expand All @@ -157,6 +157,9 @@ def quantize_impl(
if target_device == TargetDevice.CPU_SPR:
raise RuntimeError("target_device == CPU_SPR is not supported.")

if preset is None:
preset = QuantizationPreset.PERFORMANCE

nncf_config = _create_nncf_config(preset, target_device, subset_size, ignored_scope, advanced_parameters)

calibration_data_loader = CalibrationDataLoader(calibration_dataset)
Expand Down
Loading

0 comments on commit d264494

Please sign in to comment.