replied to comments

openvinotoolkit · Oct 25, 2023 · ef60e81 · ef60e81
1 parent a273d8e
commit ef60e81
Show file tree

Hide file tree

Showing 10 changed files with 63 additions and 43 deletions.
diff --git a/nncf/experimental/torch/quantization/quantize_model.py b/nncf/experimental/torch/quantization/quantize_model.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Dict, Optional, Tuple, Union
 
 import torch
 
@@ -87,7 +87,7 @@ def send_to_device(tensor):
 def quantize_impl(
     model: torch.nn.Module,
     calibration_dataset: Dataset,
-    preset: Optional[QuantizationPreset],
+    preset: Union[QuantizationPreset, None],
     target_device: TargetDevice,
     subset_size: int,
     fast_bias_correction: bool,

diff --git a/nncf/onnx/quantization/quantize_model.py b/nncf/onnx/quantization/quantize_model.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Optional
+from typing import Optional, Union
 
 import onnx
 
@@ -31,7 +31,7 @@
 def quantize_impl(
     model: onnx.ModelProto,
     calibration_dataset: Dataset,
-    preset: Optional[QuantizationPreset],
+    preset: Union[QuantizationPreset, None],
     target_device: TargetDevice,
     subset_size: int,
     fast_bias_correction: bool,

diff --git a/nncf/openvino/pot/quantization/quantize_model.py b/nncf/openvino/pot/quantization/quantize_model.py
@@ -12,7 +12,7 @@
 import logging
 import tempfile
 from pathlib import Path
-from typing import Any, Callable, Dict, Iterable, Optional
+from typing import Any, Callable, Dict, Iterable, Optional, Union
 
 import openvino.runtime as ov
 from openvino._offline_transformations import compress_quantize_weights_transformation
@@ -192,7 +192,7 @@ def _create_quantization_group_config(
 
 
 def _create_quantization_config(
-    preset: Optional[QuantizationPreset],
+    preset: Union[QuantizationPreset, None],
     target_device: TargetDevice,
     subset_size: int,
     fast_bias_correction: bool,
@@ -203,11 +203,11 @@ def _create_quantization_config(
     """
     Creates a quantization configuration.
 
-    :param preset: A preset that controls the quantization mode
-        (symmetric and asymmetric). It can take the following values:
+    :param preset: A preset controls the quantization mode (symmetric and asymmetric).
+        It can take the following values:
         - `performance`: Symmetric quantization of weights and activations.
-        - `mixed`: Symmetric quantization of weights and asymmetric
-          quantization of activations.
+        - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
+        - `None`: `mixed` preset is used for `transformer` model type otherwise `performace`.
     :param target_device: A target device the specificity of which will be
         taken into account while compressing in order to obtain the best
         performance for this type of device.
@@ -225,10 +225,7 @@ def _create_quantization_config(
     :return: A POT quantization configuration as dict.
     """
     if preset is None:
-        if model_type == ModelType.TRANSFORMER:
-            preset = QuantizationPreset.MIXED
-        else:
-            preset = QuantizationPreset.PERFORMANCE
+        preset = QuantizationPreset.MIXED if model_type == ModelType.TRANSFORMER else QuantizationPreset.PERFORMANCE
 
     config = {
         "target_device": target_device.value,

diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py
@@ -96,7 +96,7 @@ def dump_parameters(model: ov.Model, parameters: Dict, path: Optional[List] = No
 def native_quantize_if_op_impl(
     model: ov.Model,
     calibration_dataset: Dataset,
-    preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
+    preset: Optional[QuantizationPreset] = None,
     target_device: TargetDevice = TargetDevice.ANY,
     subset_size: int = 300,
     fast_bias_correction: bool = True,
@@ -138,7 +138,7 @@ def native_quantize_if_op_impl(
     dump_parameters(
         quantized_model,
         {
-            "preset": preset.value,
+            "preset": preset,
             "target_device": target_device.value,
             "subset_size": subset_size,
             "fast_bias_correction": fast_bias_correction,
@@ -154,7 +154,7 @@ def native_quantize_if_op_impl(
 def native_quantize_impl(
     model: ov.Model,
     calibration_dataset: Dataset,
-    preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
+    preset: Optional[QuantizationPreset] = None,
     target_device: TargetDevice = TargetDevice.ANY,
     subset_size: int = 300,
     fast_bias_correction: bool = True,
@@ -184,7 +184,7 @@ def native_quantize_impl(
     dump_parameters(
         quantized_model,
         {
-            "preset": preset.value,
+            "preset": preset,
             "target_device": target_device.value,
             "subset_size": subset_size,
             "fast_bias_correction": fast_bias_correction,
@@ -206,7 +206,7 @@ def native_quantize_with_accuracy_control_impl(
     validation_fn: Callable[[Any, Iterable[Any]], Tuple[float, Union[None, List[float], List[List[TTensor]]]]],
     max_drop: float = 0.01,
     drop_type: DropType = DropType.ABSOLUTE,
-    preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
+    preset: Optional[QuantizationPreset] = None,
     target_device: TargetDevice = TargetDevice.ANY,
     subset_size: int = 300,
     fast_bias_correction: bool = True,
@@ -321,7 +321,7 @@ def native_quantize_with_accuracy_control_impl(
     dump_parameters(
         quantized_model,
         {
-            "preset": preset.value,
+            "preset": preset,
             "target_device": target_device.value,
             "subset_size": subset_size,
             "fast_bias_correction": fast_bias_correction,
@@ -339,7 +339,7 @@ def native_quantize_with_accuracy_control_impl(
 def quantize_impl(
     model: ov.Model,
     calibration_dataset: Dataset,
-    preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
+    preset: Optional[QuantizationPreset] = None,
     target_device: TargetDevice = TargetDevice.ANY,
     subset_size: int = 300,
     fast_bias_correction: bool = True,
@@ -396,7 +396,7 @@ def quantize_with_accuracy_control_impl(
     validation_fn: Callable[[Any, Iterable[Any]], float],
     max_drop: float = 0.01,
     drop_type: DropType = DropType.ABSOLUTE,
-    preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
+    preset: Optional[QuantizationPreset] = None,
     target_device: TargetDevice = TargetDevice.ANY,
     subset_size: int = 300,
     fast_bias_correction: bool = True,

diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py
@@ -111,8 +111,12 @@ def __init__(
         backend_params: Optional[Dict[str, Any]] = None,
     ):
         """
-        :param preset: A preset that controls the quantization mode,
-            defaults to QuantizationPreset.PERFORMANCE.
+        :param preset: A preset controls the quantization mode (symmetric and asymmetric).
+            It can take the following values:
+            - `performance`: Symmetric quantization of weights and activations.
+            - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
+            Default value is None. In this case, `mixed` preset is used for `transformer`
+            model type otherwise `performace`.
         :param target_device: A target device the specificity of which will be taken
             into account while compressing in order to obtain the best performance
             for this type of device, defaults to TargetDevice.ANY.

diff --git a/nncf/quantization/algorithms/post_training/algorithm.py b/nncf/quantization/algorithms/post_training/algorithm.py
@@ -47,11 +47,12 @@ def __init__(
         advanced_parameters: Optional[AdvancedQuantizationParameters] = None,
     ):
         """
-        :param preset: A preset that controls the quantization mode
-            (symmetric and asymmetric). It can take the following values:
+        :param preset: A preset controls the quantization mode (symmetric and asymmetric).
+            It can take the following values:
             - `performance`: Symmetric quantization of weights and activations.
-            - `mixed`: Symmetric quantization of weights and asymmetric
-            quantization of activations.
+            - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
+            Default value is None. In this case, `mixed` preset is used for `transformer`
+            model type otherwise `performace`.
         :param target_device: A target device the specificity of which will be taken
             into account while compressing in order to obtain the best performance
             for this type of device.

diff --git a/nncf/quantization/algorithms/post_training/pipeline.py b/nncf/quantization/algorithms/post_training/pipeline.py
@@ -47,11 +47,12 @@ def create_ptq_pipeline(
         3) MinMaxQuantization
         4) FastBiasCorrection or BiasCorrection
 
-    :param preset: A preset that controls the quantization mode
-        (symmetric and asymmetric). It can take the following values:
+    :param preset: A preset controls the quantization mode (symmetric and asymmetric).
+        It can take the following values:
         - `performance`: Symmetric quantization of weights and activations.
-        - `mixed`: Symmetric quantization of weights and asymmetric
-        quantization of activations.
+        - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
+        Default value is None. In this case, `mixed` preset is used for `transformer`
+        model type otherwise `performace`.
     :param target_device: A target device the specificity of which will be taken
         into account while compressing in order to obtain the best performance
         for this type of device.

diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py
@@ -54,11 +54,12 @@ def quantize(
     :param calibration_dataset: A representative dataset for the
         calibration process.
     :type  calibration_dataset: nncf.Dataset
-    :param preset: A preset that controls the quantization mode
-        (symmetric and asymmetric). It can take the following values:
+    :param preset: A preset controls the quantization mode (symmetric and asymmetric).
+        It can take the following values:
         - `performance`: Symmetric quantization of weights and activations.
-        - `mixed`: Symmetric quantization of weights and asymmetric
-          quantization of activations.
+        - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
+        Default value is None. In this case, `mixed` preset is used for `transformer`
+        model type otherwise `performace`.
     :type  preset: nncf.QuantizationPreset
     :param target_device: A target device the specificity of which will be taken
         into account while compressing in order to obtain the best performance
@@ -179,7 +180,12 @@ def quantize_with_accuracy_control(
     :param max_drop: The maximum accuracy drop that should be achieved after the quantization.
     :param drop_type: The accuracy drop type, which determines how the maximum accuracy
         drop between the original model and the compressed model is calculated.
-    :param preset: A preset that controls the quantization mode.
+    :param preset: A preset controls the quantization mode (symmetric and asymmetric).
+        It can take the following values:
+        - `performance`: Symmetric quantization of weights and activations.
+        - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
+        Default value is None. In this case, `mixed` preset is used for `transformer`
+        model type otherwise `performace`.
     :type preset: nncf.QuantizationPreset
     :param target_device: A target device the specificity of which will be taken
         into account while compressing in order to obtain the best performance
@@ -306,7 +312,12 @@ def quantize_with_tune_hyperparams(
     :param initial_metric_results: Initial metric results.
     :param quantized_metric_results: Quantized metric results.
     :param tuner_subset_size: Tuner subset size.
-    :param preset: A preset that controls the quantization mode.
+    :param preset: A preset controls the quantization mode (symmetric and asymmetric).
+        It can take the following values:
+        - `performance`: Symmetric quantization of weights and activations.
+        - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
+        Default value is None. In this case, `mixed` preset is used for `transformer`
+        model type otherwise `performace`.
     :param target_device: A target device the specificity of which will be taken
         into account while compressing in order to obtain the best performance
         for this type of device.

diff --git a/nncf/tensorflow/quantization/quantize_model.py b/nncf/tensorflow/quantization/quantize_model.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
 
 import tensorflow as tf
 
@@ -133,7 +133,7 @@ def _create_nncf_config(
 def quantize_impl(
     model: tf.Module,
     calibration_dataset: Dataset,
-    preset: Optional[QuantizationPreset],
+    preset: Union[QuantizationPreset, None],
     target_device: TargetDevice,
     subset_size: int,
     fast_bias_correction: bool,

diff --git a/nncf/torch/quantization/quantize_model.py b/nncf/torch/quantization/quantize_model.py
@@ -10,7 +10,7 @@
 # limitations under the License.
 
 from copy import deepcopy
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Dict, Optional, Tuple, Union
 
 import torch
 
@@ -85,6 +85,11 @@ def _get_transformer_quantization_config(preset: QuantizationPreset, subset_size
     """
     Returns the quantization config for transformer-based models.
 
+    :param preset: A preset that controls the quantization mode
+        (symmetric and asymmetric). It can take the following values:
+        - `performance`: Symmetric quantization of weights and activations.
+        - `mixed`: Symmetric quantization of weights and asymmetric
+          quantization of activations.
     :param subset_size: Size of a subset to calculate activations
         statistics used for quantization.
     :return: The quantization config for transformer-based models.
@@ -133,7 +138,7 @@ def _get_default_quantization_config(preset: QuantizationPreset, subset_size: in
 
 
 def _create_nncf_config(
-    preset: Optional[QuantizationPreset],
+    preset: Union[QuantizationPreset, None],
     target_device: TargetDevice,
     subset_size: int,
     model_type: Optional[ModelType],
@@ -148,6 +153,7 @@ def _create_nncf_config(
         - `performance`: Symmetric quantization of weights and activations.
         - `mixed`: Symmetric quantization of weights and asymmetric
           quantization of activations.
+        - `None`: `mixed` preset is used for `transformer` model type otherwise `performace`.
     :param target_device: A target device the specificity of which will be taken
         into account while compressing in order to obtain the best performance
         for this type of device.
@@ -192,7 +198,7 @@ def _create_nncf_config(
 def quantize_impl(
     model: torch.nn.Module,
     calibration_dataset: Dataset,
-    preset: Optional[QuantizationPreset],
+    preset: Union[QuantizationPreset, None],
     target_device: TargetDevice,
     subset_size: int,
     fast_bias_correction: bool,