Skip to content

Commit

Permalink
replied to comments
Browse files Browse the repository at this point in the history
  • Loading branch information
alexsu52 committed Oct 25, 2023
1 parent a273d8e commit ef60e81
Show file tree
Hide file tree
Showing 10 changed files with 63 additions and 43 deletions.
4 changes: 2 additions & 2 deletions nncf/experimental/torch/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any, Dict, Optional, Tuple
from typing import Any, Dict, Optional, Tuple, Union

import torch

Expand Down Expand Up @@ -87,7 +87,7 @@ def send_to_device(tensor):
def quantize_impl(
model: torch.nn.Module,
calibration_dataset: Dataset,
preset: Optional[QuantizationPreset],
preset: Union[QuantizationPreset, None],
target_device: TargetDevice,
subset_size: int,
fast_bias_correction: bool,
Expand Down
4 changes: 2 additions & 2 deletions nncf/onnx/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional
from typing import Optional, Union

import onnx

Expand All @@ -31,7 +31,7 @@
def quantize_impl(
model: onnx.ModelProto,
calibration_dataset: Dataset,
preset: Optional[QuantizationPreset],
preset: Union[QuantizationPreset, None],
target_device: TargetDevice,
subset_size: int,
fast_bias_correction: bool,
Expand Down
17 changes: 7 additions & 10 deletions nncf/openvino/pot/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import logging
import tempfile
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, Optional
from typing import Any, Callable, Dict, Iterable, Optional, Union

import openvino.runtime as ov
from openvino._offline_transformations import compress_quantize_weights_transformation
Expand Down Expand Up @@ -192,7 +192,7 @@ def _create_quantization_group_config(


def _create_quantization_config(
preset: Optional[QuantizationPreset],
preset: Union[QuantizationPreset, None],
target_device: TargetDevice,
subset_size: int,
fast_bias_correction: bool,
Expand All @@ -203,11 +203,11 @@ def _create_quantization_config(
"""
Creates a quantization configuration.
:param preset: A preset that controls the quantization mode
(symmetric and asymmetric). It can take the following values:
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
It can take the following values:
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric
quantization of activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
- `None`: `mixed` preset is used for `transformer` model type otherwise `performace`.
:param target_device: A target device the specificity of which will be
taken into account while compressing in order to obtain the best
performance for this type of device.
Expand All @@ -225,10 +225,7 @@ def _create_quantization_config(
:return: A POT quantization configuration as dict.
"""
if preset is None:
if model_type == ModelType.TRANSFORMER:
preset = QuantizationPreset.MIXED
else:
preset = QuantizationPreset.PERFORMANCE
preset = QuantizationPreset.MIXED if model_type == ModelType.TRANSFORMER else QuantizationPreset.PERFORMANCE

config = {
"target_device": target_device.value,
Expand Down
16 changes: 8 additions & 8 deletions nncf/openvino/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def dump_parameters(model: ov.Model, parameters: Dict, path: Optional[List] = No
def native_quantize_if_op_impl(
model: ov.Model,
calibration_dataset: Dataset,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down Expand Up @@ -138,7 +138,7 @@ def native_quantize_if_op_impl(
dump_parameters(
quantized_model,
{
"preset": preset.value,
"preset": preset,
"target_device": target_device.value,
"subset_size": subset_size,
"fast_bias_correction": fast_bias_correction,
Expand All @@ -154,7 +154,7 @@ def native_quantize_if_op_impl(
def native_quantize_impl(
model: ov.Model,
calibration_dataset: Dataset,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down Expand Up @@ -184,7 +184,7 @@ def native_quantize_impl(
dump_parameters(
quantized_model,
{
"preset": preset.value,
"preset": preset,
"target_device": target_device.value,
"subset_size": subset_size,
"fast_bias_correction": fast_bias_correction,
Expand All @@ -206,7 +206,7 @@ def native_quantize_with_accuracy_control_impl(
validation_fn: Callable[[Any, Iterable[Any]], Tuple[float, Union[None, List[float], List[List[TTensor]]]]],
max_drop: float = 0.01,
drop_type: DropType = DropType.ABSOLUTE,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down Expand Up @@ -321,7 +321,7 @@ def native_quantize_with_accuracy_control_impl(
dump_parameters(
quantized_model,
{
"preset": preset.value,
"preset": preset,
"target_device": target_device.value,
"subset_size": subset_size,
"fast_bias_correction": fast_bias_correction,
Expand All @@ -339,7 +339,7 @@ def native_quantize_with_accuracy_control_impl(
def quantize_impl(
model: ov.Model,
calibration_dataset: Dataset,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down Expand Up @@ -396,7 +396,7 @@ def quantize_with_accuracy_control_impl(
validation_fn: Callable[[Any, Iterable[Any]], float],
max_drop: float = 0.01,
drop_type: DropType = DropType.ABSOLUTE,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down
8 changes: 6 additions & 2 deletions nncf/quantization/algorithms/min_max/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,12 @@ def __init__(
backend_params: Optional[Dict[str, Any]] = None,
):
"""
:param preset: A preset that controls the quantization mode,
defaults to QuantizationPreset.PERFORMANCE.
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
It can take the following values:
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
Default value is None. In this case, `mixed` preset is used for `transformer`
model type otherwise `performace`.
:param target_device: A target device the specificity of which will be taken
into account while compressing in order to obtain the best performance
for this type of device, defaults to TargetDevice.ANY.
Expand Down
9 changes: 5 additions & 4 deletions nncf/quantization/algorithms/post_training/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,12 @@ def __init__(
advanced_parameters: Optional[AdvancedQuantizationParameters] = None,
):
"""
:param preset: A preset that controls the quantization mode
(symmetric and asymmetric). It can take the following values:
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
It can take the following values:
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric
quantization of activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
Default value is None. In this case, `mixed` preset is used for `transformer`
model type otherwise `performace`.
:param target_device: A target device the specificity of which will be taken
into account while compressing in order to obtain the best performance
for this type of device.
Expand Down
9 changes: 5 additions & 4 deletions nncf/quantization/algorithms/post_training/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,12 @@ def create_ptq_pipeline(
3) MinMaxQuantization
4) FastBiasCorrection or BiasCorrection
:param preset: A preset that controls the quantization mode
(symmetric and asymmetric). It can take the following values:
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
It can take the following values:
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric
quantization of activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
Default value is None. In this case, `mixed` preset is used for `transformer`
model type otherwise `performace`.
:param target_device: A target device the specificity of which will be taken
into account while compressing in order to obtain the best performance
for this type of device.
Expand Down
23 changes: 17 additions & 6 deletions nncf/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,12 @@ def quantize(
:param calibration_dataset: A representative dataset for the
calibration process.
:type calibration_dataset: nncf.Dataset
:param preset: A preset that controls the quantization mode
(symmetric and asymmetric). It can take the following values:
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
It can take the following values:
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric
quantization of activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
Default value is None. In this case, `mixed` preset is used for `transformer`
model type otherwise `performace`.
:type preset: nncf.QuantizationPreset
:param target_device: A target device the specificity of which will be taken
into account while compressing in order to obtain the best performance
Expand Down Expand Up @@ -179,7 +180,12 @@ def quantize_with_accuracy_control(
:param max_drop: The maximum accuracy drop that should be achieved after the quantization.
:param drop_type: The accuracy drop type, which determines how the maximum accuracy
drop between the original model and the compressed model is calculated.
:param preset: A preset that controls the quantization mode.
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
It can take the following values:
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
Default value is None. In this case, `mixed` preset is used for `transformer`
model type otherwise `performace`.
:type preset: nncf.QuantizationPreset
:param target_device: A target device the specificity of which will be taken
into account while compressing in order to obtain the best performance
Expand Down Expand Up @@ -306,7 +312,12 @@ def quantize_with_tune_hyperparams(
:param initial_metric_results: Initial metric results.
:param quantized_metric_results: Quantized metric results.
:param tuner_subset_size: Tuner subset size.
:param preset: A preset that controls the quantization mode.
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
It can take the following values:
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
Default value is None. In this case, `mixed` preset is used for `transformer`
model type otherwise `performace`.
:param target_device: A target device the specificity of which will be taken
into account while compressing in order to obtain the best performance
for this type of device.
Expand Down
4 changes: 2 additions & 2 deletions nncf/tensorflow/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any, Dict, Optional
from typing import Any, Dict, Optional, Union

import tensorflow as tf

Expand Down Expand Up @@ -133,7 +133,7 @@ def _create_nncf_config(
def quantize_impl(
model: tf.Module,
calibration_dataset: Dataset,
preset: Optional[QuantizationPreset],
preset: Union[QuantizationPreset, None],
target_device: TargetDevice,
subset_size: int,
fast_bias_correction: bool,
Expand Down
12 changes: 9 additions & 3 deletions nncf/torch/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# limitations under the License.

from copy import deepcopy
from typing import Any, Dict, Optional, Tuple
from typing import Any, Dict, Optional, Tuple, Union

import torch

Expand Down Expand Up @@ -85,6 +85,11 @@ def _get_transformer_quantization_config(preset: QuantizationPreset, subset_size
"""
Returns the quantization config for transformer-based models.
:param preset: A preset that controls the quantization mode
(symmetric and asymmetric). It can take the following values:
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric
quantization of activations.
:param subset_size: Size of a subset to calculate activations
statistics used for quantization.
:return: The quantization config for transformer-based models.
Expand Down Expand Up @@ -133,7 +138,7 @@ def _get_default_quantization_config(preset: QuantizationPreset, subset_size: in


def _create_nncf_config(
preset: Optional[QuantizationPreset],
preset: Union[QuantizationPreset, None],
target_device: TargetDevice,
subset_size: int,
model_type: Optional[ModelType],
Expand All @@ -148,6 +153,7 @@ def _create_nncf_config(
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric
quantization of activations.
- `None`: `mixed` preset is used for `transformer` model type otherwise `performace`.
:param target_device: A target device the specificity of which will be taken
into account while compressing in order to obtain the best performance
for this type of device.
Expand Down Expand Up @@ -192,7 +198,7 @@ def _create_nncf_config(
def quantize_impl(
model: torch.nn.Module,
calibration_dataset: Dataset,
preset: Optional[QuantizationPreset],
preset: Union[QuantizationPreset, None],
target_device: TargetDevice,
subset_size: int,
fast_bias_correction: bool,
Expand Down

0 comments on commit ef60e81

Please sign in to comment.