diff --git a/examples/post_training_quantization/onnx/mobilenet_v2/main.py b/examples/post_training_quantization/onnx/mobilenet_v2/main.py index 007b537ceb6..5f9850f8cbe 100755 --- a/examples/post_training_quantization/onnx/mobilenet_v2/main.py +++ b/examples/post_training_quantization/onnx/mobilenet_v2/main.py @@ -16,7 +16,7 @@ import numpy as np import onnx -import openvino.runtime as ov +import openvino as ov import torch from fastdownload import FastDownload from fastdownload import download_url diff --git a/examples/post_training_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/main.py b/examples/post_training_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/main.py index daaa491e4ba..88393421801 100644 --- a/examples/post_training_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/main.py +++ b/examples/post_training_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/main.py @@ -19,7 +19,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple import numpy as np -import openvino.runtime as ov +import openvino as ov import torch from anomalib.data.mvtec import MVTec from anomalib.data.utils import download @@ -165,12 +165,16 @@ def transform_fn(data_item): # Benchmark performance, calculate compression rate and validate accuracy fp32_ir_path = f"{ROOT}/stfpm_fp32.xml" -ov.serialize(ov_model, fp32_ir_path) +ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False) print(f"[1/7] Save FP32 model: {fp32_ir_path}") fp32_size = get_model_size(fp32_ir_path, verbose=True) +# To avoid an accuracy drop when saving a model due to compression of unquantized +# weights to FP16, compress_to_fp16=False should be used. This is necessary because +# nncf.quantize_with_accuracy_control(...) keeps the most impactful operations within +# the model in the original precision to achieve the specified model accuracy. int8_ir_path = f"{ROOT}/stfpm_int8.xml" -ov.serialize(ov_quantized_model, int8_ir_path) +ov.save_model(ov_quantized_model, int8_ir_path, compress_to_fp16=False) print(f"[2/7] Save INT8 model: {int8_ir_path}") int8_size = get_model_size(int8_ir_path, verbose=True) diff --git a/examples/post_training_quantization/openvino/mobilenet_v2/main.py b/examples/post_training_quantization/openvino/mobilenet_v2/main.py index 2cc6ab0329f..d6533bd61d8 100644 --- a/examples/post_training_quantization/openvino/mobilenet_v2/main.py +++ b/examples/post_training_quantization/openvino/mobilenet_v2/main.py @@ -16,7 +16,7 @@ from typing import List, Optional import numpy as np -import openvino.runtime as ov +import openvino as ov import torch from fastdownload import FastDownload from sklearn.metrics import accuracy_score @@ -137,12 +137,12 @@ def transform_fn(data_item): # Benchmark performance, calculate compression rate and validate accuracy fp32_ir_path = f"{ROOT}/mobilenet_v2_fp32.xml" -ov.serialize(ov_model, fp32_ir_path) +ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False) print(f"[1/7] Save FP32 model: {fp32_ir_path}") fp32_model_size = get_model_size(fp32_ir_path, verbose=True) int8_ir_path = f"{ROOT}/mobilenet_v2_int8.xml" -ov.serialize(ov_quantized_model, int8_ir_path) +ov.save_model(ov_quantized_model, int8_ir_path, compress_to_fp16=False) print(f"[2/7] Save INT8 model: {int8_ir_path}") int8_model_size = get_model_size(int8_ir_path, verbose=True) diff --git a/examples/post_training_quantization/openvino/yolov8/main.py b/examples/post_training_quantization/openvino/yolov8/main.py index f20730970f6..8280d9391b4 100644 --- a/examples/post_training_quantization/openvino/yolov8/main.py +++ b/examples/post_training_quantization/openvino/yolov8/main.py @@ -14,7 +14,7 @@ from typing import Any, Dict, Tuple import numpy as np -import openvino.runtime as ov +import openvino as ov import torch from tqdm import tqdm from ultralytics.cfg import get_cfg @@ -158,7 +158,7 @@ def main(): # Quantize mode in OpenVINO representation quantized_model = quantize(ov_model, data_loader, validator) quantized_model_path = Path(f"{ROOT}/{MODEL_NAME}_openvino_model/{MODEL_NAME}_quantized.xml") - ov.serialize(quantized_model, str(quantized_model_path)) + ov.save_model(quantized_model, str(quantized_model_path), compress_to_fp16=False) # Validate FP32 model fp_stats, total_images, total_objects = validate(ov_model, tqdm(data_loader), validator) diff --git a/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/main.py b/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/main.py index a6e17830289..f56bbed26bf 100644 --- a/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/main.py +++ b/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/main.py @@ -15,7 +15,7 @@ from typing import Any, Dict, Tuple import numpy as np -import openvino.runtime as ov +import openvino as ov import torch from tqdm import tqdm from ultralytics.cfg import get_cfg @@ -222,7 +222,7 @@ def main(): quantized_model = quantize_ac(ov_model, data_loader, validator) quantized_model_path = Path(f"{ROOT}/{MODEL_NAME}_openvino_model/{MODEL_NAME}_quantized.xml") - ov.serialize(quantized_model, str(quantized_model_path)) + ov.save_model(quantized_model, str(quantized_model_path), compress_to_fp16=False) # Validate FP32 model fp_stats, total_images, total_objects = validate(ov_model, tqdm(data_loader), validator) diff --git a/examples/post_training_quantization/torch/mobilenet_v2/main.py b/examples/post_training_quantization/torch/mobilenet_v2/main.py index 9297d5cf94f..35f9b35c06c 100644 --- a/examples/post_training_quantization/torch/mobilenet_v2/main.py +++ b/examples/post_training_quantization/torch/mobilenet_v2/main.py @@ -16,7 +16,7 @@ from typing import List, Optional import numpy as np -import openvino.runtime as ov +import openvino as ov import torch from fastdownload import FastDownload from openvino.tools import mo @@ -173,12 +173,12 @@ def transform_fn(data_item): ov_quantized_model = mo.convert_model(int8_onnx_path) fp32_ir_path = f"{ROOT}/mobilenet_v2_fp32.xml" -ov.serialize(ov_model, fp32_ir_path) +ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False) print(f"[1/7] Save FP32 model: {fp32_ir_path}") fp32_model_size = get_model_size(fp32_ir_path, verbose=True) int8_ir_path = f"{ROOT}/mobilenet_v2_int8.xml" -ov.serialize(ov_quantized_model, int8_ir_path) +ov.save_model(ov_quantized_model, int8_ir_path, compress_to_fp16=False) print(f"[2/7] Save INT8 model: {int8_ir_path}") int8_model_size = get_model_size(int8_ir_path, verbose=True) diff --git a/examples/post_training_quantization/torch/ssd300_vgg16/main.py b/examples/post_training_quantization/torch/ssd300_vgg16/main.py index c90ee304e2c..6c495ec03ce 100644 --- a/examples/post_training_quantization/torch/ssd300_vgg16/main.py +++ b/examples/post_training_quantization/torch/ssd300_vgg16/main.py @@ -18,7 +18,7 @@ import nncf from nncf.torch import disable_tracing -import openvino.runtime as ov +import openvino as ov import torch import torchvision from fastdownload import FastDownload @@ -163,12 +163,12 @@ def main(): ov_quantized_model = mo.convert_model(int8_onnx_path) fp32_ir_path = f"{ROOT}/ssd300_vgg16_fp32.xml" - ov.serialize(ov_model, fp32_ir_path) + ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False) print(f"[1/7] Save FP32 model: {fp32_ir_path}") fp32_model_size = get_model_size(fp32_ir_path, verbose=True) int8_ir_path = f"{ROOT}/ssd300_vgg16_int8.xml" - ov.serialize(ov_quantized_model, int8_ir_path) + ov.save_model(ov_quantized_model, int8_ir_path, compress_to_fp16=False) print(f"[2/7] Save INT8 model: {int8_ir_path}") int8_model_size = get_model_size(int8_ir_path, verbose=True) diff --git a/nncf/experimental/torch/quantization/quantize_model.py b/nncf/experimental/torch/quantization/quantize_model.py index c97ab9c9675..714cc5ed702 100644 --- a/nncf/experimental/torch/quantization/quantize_model.py +++ b/nncf/experimental/torch/quantization/quantize_model.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, Optional, Tuple, Union import torch @@ -87,7 +87,7 @@ def send_to_device(tensor): def quantize_impl( model: torch.nn.Module, calibration_dataset: Dataset, - preset: QuantizationPreset, + preset: Union[QuantizationPreset, None], target_device: TargetDevice, subset_size: int, fast_bias_correction: bool, diff --git a/nncf/onnx/quantization/quantize_model.py b/nncf/onnx/quantization/quantize_model.py index a88a9213f36..7be23384964 100644 --- a/nncf/onnx/quantization/quantize_model.py +++ b/nncf/onnx/quantization/quantize_model.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional +from typing import Optional, Union import onnx @@ -31,7 +31,7 @@ def quantize_impl( model: onnx.ModelProto, calibration_dataset: Dataset, - preset: QuantizationPreset, + preset: Union[QuantizationPreset, None], target_device: TargetDevice, subset_size: int, fast_bias_correction: bool, diff --git a/nncf/openvino/graph/model_utils.py b/nncf/openvino/graph/model_utils.py index ec00ee00c72..1abf79ee14d 100644 --- a/nncf/openvino/graph/model_utils.py +++ b/nncf/openvino/graph/model_utils.py @@ -16,42 +16,9 @@ from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.transformations.layout import TransformationLayout from nncf.openvino.graph.metatypes.groups import FAKE_QUANTIZE_OPERATIONS -from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionBackpropDataMetatype -from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionMetatype -from nncf.openvino.graph.metatypes.openvino_metatypes import OVDepthwiseConvolutionMetatype -from nncf.openvino.graph.metatypes.openvino_metatypes import OVGroupConvolutionBackpropDataMetatype -from nncf.openvino.graph.metatypes.openvino_metatypes import OVGroupConvolutionMetatype -from nncf.openvino.graph.node_utils import create_bias_tensor -from nncf.openvino.graph.node_utils import is_node_with_bias from nncf.openvino.graph.transformations.command_creation import OVCommandCreator -def insert_null_biases(model: ov.Model, graph: NNCFGraph) -> ov.Model: - """ - This method finds and inserts zero biases for the layers that should have it. - - :param model: ov.Model instance. - :param graph: Model graph. - :return: Updated ov.Model instance with zero biases - """ - types_to_insert_bias = [ - OVConvolutionMetatype, - OVGroupConvolutionMetatype, - OVDepthwiseConvolutionMetatype, - OVConvolutionBackpropDataMetatype, - OVGroupConvolutionBackpropDataMetatype, - ] - nodes_without_biases = graph.get_nodes_by_metatypes(types_to_insert_bias) - nodes_without_biases = [node for node in nodes_without_biases if not is_node_with_bias(node, graph)] - transformation_layout = TransformationLayout() - model_transformer = ModelTransformerFactory.create(model) - for node_without_bias in nodes_without_biases: - const_value = create_bias_tensor(node_without_bias, graph, 0) - bias_insertion_command = OVCommandCreator.create_command_to_insert_bias(node_without_bias, const_value) - transformation_layout.register(bias_insertion_command) - return model_transformer.transform(transformation_layout) - - def remove_fq_from_inputs(model: ov.Model, graph: NNCFGraph) -> ov.Model: """ This method removes the activation Fake Quantize nodes from the model. diff --git a/nncf/openvino/pot/quantization/quantize_model.py b/nncf/openvino/pot/quantization/quantize_model.py index 5bb985bff9e..4e1d0afbed6 100644 --- a/nncf/openvino/pot/quantization/quantize_model.py +++ b/nncf/openvino/pot/quantization/quantize_model.py @@ -12,7 +12,7 @@ import logging import tempfile from pathlib import Path -from typing import Any, Callable, Dict, Iterable, Optional +from typing import Any, Callable, Dict, Iterable, Optional, Union import openvino.runtime as ov from openvino._offline_transformations import compress_quantize_weights_transformation @@ -192,22 +192,22 @@ def _create_quantization_group_config( def _create_quantization_config( - preset: QuantizationPreset, + preset: Union[QuantizationPreset, None], target_device: TargetDevice, subset_size: int, fast_bias_correction: bool, - model_type: Optional[ModelType], - ignored_scope: Optional[IgnoredScope], - advanced_parameters: Optional[AdvancedQuantizationParameters], + model_type: Union[ModelType, None], + ignored_scope: Union[IgnoredScope, None], + advanced_parameters: Union[AdvancedQuantizationParameters, None], ) -> Dict[str, Any]: """ Creates a quantization configuration. - :param preset: A preset that controls the quantization mode - (symmetric and asymmetric). It can take the following values: + :param preset: A preset controls the quantization mode (symmetric and asymmetric). + It can take the following values: - `performance`: Symmetric quantization of weights and activations. - - `mixed`: Symmetric quantization of weights and asymmetric - quantization of activations. + - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. + - `None`: `mixed` preset is used for `transformer` model type otherwise `performace`. :param target_device: A target device the specificity of which will be taken into account while compressing in order to obtain the best performance for this type of device. @@ -224,6 +224,9 @@ def _create_quantization_config( fine-tuning the quantization algorithm. :return: A POT quantization configuration as dict. """ + if preset is None: + preset = QuantizationPreset.MIXED if model_type == ModelType.TRANSFORMER else QuantizationPreset.PERFORMANCE + config = { "target_device": target_device.value, "preset": preset.value, @@ -320,7 +323,7 @@ def _create_engine_config( def quantize_impl( model: ov.Model, calibration_dataset: Dataset, - preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, fast_bias_correction: bool = True, @@ -423,7 +426,7 @@ def quantize_with_accuracy_control_impl( validation_fn: Callable[[ov.CompiledModel, Iterable[Any]], float], max_drop: float = 0.01, drop_type: DropType = DropType.ABSOLUTE, - preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, fast_bias_correction: bool = True, diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py index cbc4e03a842..2a45bd9b8f3 100644 --- a/nncf/openvino/quantization/quantize_model.py +++ b/nncf/openvino/quantization/quantize_model.py @@ -96,7 +96,7 @@ def dump_parameters(model: ov.Model, parameters: Dict, path: Optional[List] = No def native_quantize_if_op_impl( model: ov.Model, calibration_dataset: Dataset, - preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, fast_bias_correction: bool = True, @@ -138,7 +138,7 @@ def native_quantize_if_op_impl( dump_parameters( quantized_model, { - "preset": preset.value, + "preset": preset, "target_device": target_device.value, "subset_size": subset_size, "fast_bias_correction": fast_bias_correction, @@ -154,7 +154,7 @@ def native_quantize_if_op_impl( def native_quantize_impl( model: ov.Model, calibration_dataset: Dataset, - preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, fast_bias_correction: bool = True, @@ -184,7 +184,7 @@ def native_quantize_impl( dump_parameters( quantized_model, { - "preset": preset.value, + "preset": preset, "target_device": target_device.value, "subset_size": subset_size, "fast_bias_correction": fast_bias_correction, @@ -206,7 +206,7 @@ def native_quantize_with_accuracy_control_impl( validation_fn: Callable[[Any, Iterable[Any]], Tuple[float, Union[None, List[float], List[List[TTensor]]]]], max_drop: float = 0.01, drop_type: DropType = DropType.ABSOLUTE, - preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, fast_bias_correction: bool = True, @@ -321,7 +321,7 @@ def native_quantize_with_accuracy_control_impl( dump_parameters( quantized_model, { - "preset": preset.value, + "preset": preset, "target_device": target_device.value, "subset_size": subset_size, "fast_bias_correction": fast_bias_correction, @@ -339,7 +339,7 @@ def native_quantize_with_accuracy_control_impl( def quantize_impl( model: ov.Model, calibration_dataset: Dataset, - preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, fast_bias_correction: bool = True, @@ -396,7 +396,7 @@ def quantize_with_accuracy_control_impl( validation_fn: Callable[[Any, Iterable[Any]], float], max_drop: float = 0.01, drop_type: DropType = DropType.ABSOLUTE, - preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, fast_bias_correction: bool = True, diff --git a/nncf/quantization/algorithms/bias_correction/algorithm.py b/nncf/quantization/algorithms/bias_correction/algorithm.py index 76f4222c9bf..e0de55f2917 100644 --- a/nncf/quantization/algorithms/bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/bias_correction/algorithm.py @@ -134,7 +134,6 @@ def apply( dataset: Optional[Dataset] = None, ) -> TModel: self._set_backend_entity(model) - model = self._backend_entity.insert_null_biases(model, graph) main_transformations_layout = TransformationLayout() main_model_transformer = ModelTransformerFactory.create(model) @@ -488,8 +487,6 @@ def output_filter_func(point): def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: self._set_backend_entity(model) model_copy = self._backend_entity.remove_fq_from_inputs(copy_model(model), graph) - graph_copy = NNCFGraphFactory.create(model_copy) - model_copy = self._backend_entity.insert_null_biases(model_copy, graph_copy) nncf_graph = NNCFGraphFactory.create(model_copy) statistic_container = StatisticPointsContainer() diff --git a/nncf/quantization/algorithms/bias_correction/backend.py b/nncf/quantization/algorithms/bias_correction/backend.py index 08eb9d433c8..cc3e39ea526 100644 --- a/nncf/quantization/algorithms/bias_correction/backend.py +++ b/nncf/quantization/algorithms/bias_correction/backend.py @@ -203,14 +203,3 @@ def remove_fq_from_inputs(model: TModel, nncf_graph: NNCFGraph) -> TModel: :param nncf_graph: NNCFGraph instance. :return: TModel without activation Fake Quantize nodes (or Quantize-Dequantize pairs). """ - - @staticmethod - @abstractmethod - def insert_null_biases(model: TModel, nncf_graph: NNCFGraph) -> TModel: - """ - This method finds and inserts zero biases for the layers that should have it. - - :param model: TModel instance. - :param nncf_graph: NNCFGraph instance. - :return: TModel instance with zero biases - """ diff --git a/nncf/quantization/algorithms/bias_correction/onnx_backend.py b/nncf/quantization/algorithms/bias_correction/onnx_backend.py index 498f33d0aba..f65ed485248 100644 --- a/nncf/quantization/algorithms/bias_correction/onnx_backend.py +++ b/nncf/quantization/algorithms/bias_correction/onnx_backend.py @@ -117,7 +117,3 @@ def is_node_with_bias(node: NNCFNode, nncf_graph: NNCFGraph) -> bool: @staticmethod def remove_fq_from_inputs(model: onnx.ModelProto, nncf_graph: NNCFGraph) -> onnx.ModelProto: return remove_fq_from_inputs(model, nncf_graph) - - @staticmethod - def insert_null_biases(model: onnx.ModelProto, nncf_graph: NNCFGraph) -> onnx.ModelProto: - return model diff --git a/nncf/quantization/algorithms/bias_correction/openvino_backend.py b/nncf/quantization/algorithms/bias_correction/openvino_backend.py index 1af3bb9d4cf..f329f815fcb 100644 --- a/nncf/quantization/algorithms/bias_correction/openvino_backend.py +++ b/nncf/quantization/algorithms/bias_correction/openvino_backend.py @@ -19,7 +19,6 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.openvino.graph.metatypes.groups import FAKE_QUANTIZE_OPERATIONS -from nncf.openvino.graph.model_utils import insert_null_biases from nncf.openvino.graph.model_utils import remove_fq_from_inputs from nncf.openvino.graph.node_utils import get_bias_value from nncf.openvino.graph.node_utils import is_node_with_bias @@ -132,7 +131,3 @@ def is_node_with_bias(node: NNCFNode, nncf_graph: NNCFGraph) -> bool: @staticmethod def remove_fq_from_inputs(model: ov.Model, nncf_graph: NNCFGraph) -> ov.Model: return remove_fq_from_inputs(model, nncf_graph) - - @staticmethod - def insert_null_biases(model: ov.Model, nncf_graph: NNCFGraph) -> ov.Model: - return insert_null_biases(model, nncf_graph) diff --git a/nncf/quantization/algorithms/hyperparameter_tuner/param_grid.py b/nncf/quantization/algorithms/hyperparameter_tuner/param_grid.py index 4874fc80b42..cd621295f2f 100644 --- a/nncf/quantization/algorithms/hyperparameter_tuner/param_grid.py +++ b/nncf/quantization/algorithms/hyperparameter_tuner/param_grid.py @@ -13,6 +13,7 @@ from typing import Any, Dict, List from nncf.common.quantization.structs import QuantizationPreset +from nncf.common.utils.backend import BackendType from nncf.quantization.algorithms.bias_correction.algorithm import BiasCorrection from nncf.quantization.algorithms.channel_alignment.algorithm import ChannelAlignment from nncf.quantization.algorithms.fast_bias_correction.algorithm import FastBiasCorrection @@ -89,7 +90,7 @@ def _get_bias_correction_param_grid() -> ParamGrid: return {"fast_bias_correction": [True, False]} -def get_quantization_param_grids(pipeline: Pipeline) -> List[ParamGrid]: +def get_quantization_param_grids(pipeline: Pipeline, backend: BackendType) -> List[ParamGrid]: """ Returns params grid for post-training quantization algorithm. """ @@ -105,7 +106,10 @@ def get_quantization_param_grids(pipeline: Pipeline) -> List[ParamGrid]: for step in pipeline.pipeline_steps: param_grid = {} for algorithm in step: + if backend not in algorithm.available_backends: + continue param_grid.update(algorithm_cls_to_param_grid[algorithm.__class__]) - param_grids.append(param_grid) + if param_grid: + param_grids.append(param_grid) return param_grids diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index 55301f212d7..3a9cbdfe473 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -96,7 +96,7 @@ class MinMaxQuantization(Algorithm): def __init__( self, - preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, model_type: Optional[ModelType] = None, @@ -111,8 +111,12 @@ def __init__( backend_params: Optional[Dict[str, Any]] = None, ): """ - :param preset: A preset that controls the quantization mode, - defaults to QuantizationPreset.PERFORMANCE. + :param preset: A preset controls the quantization mode (symmetric and asymmetric). + It can take the following values: + - `performance`: Symmetric quantization of weights and activations. + - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. + Default value is None. In this case, `mixed` preset is used for `transformer` + model type otherwise `performace`. :param target_device: A target device the specificity of which will be taken into account while compressing in order to obtain the best performance for this type of device, defaults to TargetDevice.ANY. @@ -157,6 +161,13 @@ def __init__( QuantizerGroup.ACTIVATIONS: activations_range_estimator_params, } + # preset definition + if preset is None: + if model_type == ModelType.TRANSFORMER: + preset = QuantizationPreset.MIXED + else: + preset = QuantizationPreset.PERFORMANCE + # Calculates global quantizer constraints self._global_quantizer_constraints = {} for quantizer_group in QuantizerGroup: diff --git a/nncf/quantization/algorithms/pipeline.py b/nncf/quantization/algorithms/pipeline.py index 1b5db8a8614..2c02e3753c6 100644 --- a/nncf/quantization/algorithms/pipeline.py +++ b/nncf/quantization/algorithms/pipeline.py @@ -111,8 +111,8 @@ def run_step( current_model = model current_graph = graph - pipeline_step = self.pipeline_steps[step_index] - pipeline_step = self._remove_unsupported_algorithms(pipeline_step, get_backend(current_model)) + pipeline_steps = self._remove_unsupported_algorithms(get_backend(model)) + pipeline_step = pipeline_steps[step_index] for algorithm in pipeline_step[:-1]: current_model = algorithm.apply(current_model, current_graph, step_statistics) current_graph = NNCFGraphFactory.create(current_model) @@ -142,13 +142,14 @@ def run_from_step( :return: The updated model after executing the pipeline from the specified pipeline step to the end. """ + pipeline_steps = self._remove_unsupported_algorithms(get_backend(model)) if step_index_to_statistics is None: step_index_to_statistics = {} # The `step_model` and `step_graph` entities are required to execute `step_index`-th pipeline step step_model = model step_graph = graph - for step_index in range(start_step_index, len(self.pipeline_steps)): + for step_index in range(start_step_index, len(pipeline_steps)): # Create graph required to run current pipeline step if step_graph is None: step_graph = NNCFGraphFactory.create(step_model) @@ -178,8 +179,8 @@ def get_statistic_points_for_step( :return: Statistics that should be collected to execute `step_index`-th pipeline step. """ container = StatisticPointsContainer() - pipeline_step = self.pipeline_steps[step_index] - pipeline_step = self._remove_unsupported_algorithms(pipeline_step, get_backend(model)) + pipeline_steps = self._remove_unsupported_algorithms(get_backend(model)) + pipeline_step = pipeline_steps[step_index] for algorithm in pipeline_step: for statistic_points in algorithm.get_statistic_points(model, graph).values(): for statistic_point in statistic_points: @@ -187,13 +188,17 @@ def get_statistic_points_for_step( return container - @staticmethod - def _remove_unsupported_algorithms(pipeline_step: PipelineStep, backend: BackendType) -> PipelineStep: - step = [] - for algorithm in pipeline_step: - if backend not in algorithm.available_backends: - nncf_logger.debug(f"{backend.name} does not support {algorithm.__class__.__name__} algorithm yet.") - continue - step.append(algorithm) + def _remove_unsupported_algorithms(self, backend: BackendType) -> List[PipelineStep]: + pipeline_steps = [] + for pipeline_step in self._pipeline_steps: + step = [] + for algorithm in pipeline_step: + if backend not in algorithm.available_backends: + nncf_logger.debug(f"{backend.name} does not support {algorithm.__class__.__name__} algorithm yet.") + continue + step.append(algorithm) + + if step: + pipeline_steps.append(step) - return step + return pipeline_steps diff --git a/nncf/quantization/algorithms/post_training/algorithm.py b/nncf/quantization/algorithms/post_training/algorithm.py index e4993f07b1f..4db613c1bb7 100644 --- a/nncf/quantization/algorithms/post_training/algorithm.py +++ b/nncf/quantization/algorithms/post_training/algorithm.py @@ -38,7 +38,7 @@ class PostTrainingQuantization(Algorithm): def __init__( self, - preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, fast_bias_correction: bool = True, @@ -47,11 +47,12 @@ def __init__( advanced_parameters: Optional[AdvancedQuantizationParameters] = None, ): """ - :param preset: A preset that controls the quantization mode - (symmetric and asymmetric). It can take the following values: + :param preset: A preset controls the quantization mode (symmetric and asymmetric). + It can take the following values: - `performance`: Symmetric quantization of weights and activations. - - `mixed`: Symmetric quantization of weights and asymmetric - quantization of activations. + - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. + Default value is None. In this case, `mixed` preset is used for `transformer` + model type otherwise `performace`. :param target_device: A target device the specificity of which will be taken into account while compressing in order to obtain the best performance for this type of device. diff --git a/nncf/quantization/algorithms/post_training/pipeline.py b/nncf/quantization/algorithms/post_training/pipeline.py index 7b522a39724..7d46828023b 100644 --- a/nncf/quantization/algorithms/post_training/pipeline.py +++ b/nncf/quantization/algorithms/post_training/pipeline.py @@ -30,7 +30,7 @@ def create_ptq_pipeline( - preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, fast_bias_correction: bool = True, @@ -47,11 +47,12 @@ def create_ptq_pipeline( 3) MinMaxQuantization 4) FastBiasCorrection or BiasCorrection - :param preset: A preset that controls the quantization mode - (symmetric and asymmetric). It can take the following values: + :param preset: A preset controls the quantization mode (symmetric and asymmetric). + It can take the following values: - `performance`: Symmetric quantization of weights and activations. - - `mixed`: Symmetric quantization of weights and asymmetric - quantization of activations. + - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. + Default value is None. In this case, `mixed` preset is used for `transformer` + model type otherwise `performace`. :param target_device: A target device the specificity of which will be taken into account while compressing in order to obtain the best performance for this type of device. diff --git a/nncf/quantization/passes.py b/nncf/quantization/passes.py index 881b7f0a8c9..b3eb27c18c7 100644 --- a/nncf/quantization/passes.py +++ b/nncf/quantization/passes.py @@ -14,8 +14,6 @@ from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.operator_metatypes import OperatorMetatype -from nncf.common.utils.backend import BackendType -from nncf.common.utils.backend import get_backend TModel = TypeVar("TModel") @@ -173,19 +171,3 @@ def filter_constant_nodes( constant_nodes = [node for node in nncf_graph.get_all_nodes() if node not in visited_nodes] nncf_graph.remove_nodes_from(constant_nodes) return nncf_graph - - -def insert_null_biases_pass(model: TModel, graph: NNCFGraph) -> TModel: - """ - This pass finds and inserts zero biases to the given model for the layers that should have it. - - :param model: Model instance. - :param graph: NNCFGraph instance. - :return: Updated Model instance with zero biases - """ - model_backend = get_backend(model) - if model_backend == BackendType.OPENVINO: - from nncf.openvino.graph.model_utils import insert_null_biases - - return insert_null_biases(model, graph) - return model diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index 2c64328f6cb..a1bc87caf9b 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -38,7 +38,7 @@ def quantize( model: TModel, calibration_dataset: Dataset, - preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, fast_bias_correction: bool = True, @@ -54,11 +54,12 @@ def quantize( :param calibration_dataset: A representative dataset for the calibration process. :type calibration_dataset: nncf.Dataset - :param preset: A preset that controls the quantization mode - (symmetric and asymmetric). It can take the following values: + :param preset: A preset controls the quantization mode (symmetric and asymmetric). + It can take the following values: - `performance`: Symmetric quantization of weights and activations. - - `mixed`: Symmetric quantization of weights and asymmetric - quantization of activations. + - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. + Default value is None. In this case, `mixed` preset is used for `transformer` + model type otherwise `performace`. :type preset: nncf.QuantizationPreset :param target_device: A target device the specificity of which will be taken into account while compressing in order to obtain the best performance @@ -152,7 +153,7 @@ def quantize_with_accuracy_control( validation_fn: Callable[[Any, Iterable[Any]], float], max_drop: float = 0.01, drop_type: DropType = DropType.ABSOLUTE, - preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, fast_bias_correction: bool = True, @@ -179,7 +180,12 @@ def quantize_with_accuracy_control( :param max_drop: The maximum accuracy drop that should be achieved after the quantization. :param drop_type: The accuracy drop type, which determines how the maximum accuracy drop between the original model and the compressed model is calculated. - :param preset: A preset that controls the quantization mode. + :param preset: A preset controls the quantization mode (symmetric and asymmetric). + It can take the following values: + - `performance`: Symmetric quantization of weights and activations. + - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. + Default value is None. In this case, `mixed` preset is used for `transformer` + model type otherwise `performace`. :type preset: nncf.QuantizationPreset :param target_device: A target device the specificity of which will be taken into account while compressing in order to obtain the best performance @@ -288,7 +294,7 @@ def quantize_with_tune_hyperparams( initial_metric_results: MetricResults, quantized_metric_results: MetricResults, tuner_subset_size: int = 300, - preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, fast_bias_correction: bool = True, @@ -306,7 +312,12 @@ def quantize_with_tune_hyperparams( :param initial_metric_results: Initial metric results. :param quantized_metric_results: Quantized metric results. :param tuner_subset_size: Tuner subset size. - :param preset: A preset that controls the quantization mode. + :param preset: A preset controls the quantization mode (symmetric and asymmetric). + It can take the following values: + - `performance`: Symmetric quantization of weights and activations. + - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. + Default value is None. In this case, `mixed` preset is used for `transformer` + model type otherwise `performace`. :param target_device: A target device the specificity of which will be taken into account while compressing in order to obtain the best performance for this type of device. diff --git a/nncf/tensorflow/quantization/quantize_model.py b/nncf/tensorflow/quantization/quantize_model.py index 4d726d88651..75187d321d2 100644 --- a/nncf/tensorflow/quantization/quantize_model.py +++ b/nncf/tensorflow/quantization/quantize_model.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Union import tensorflow as tf @@ -133,7 +133,7 @@ def _create_nncf_config( def quantize_impl( model: tf.Module, calibration_dataset: Dataset, - preset: QuantizationPreset, + preset: Union[QuantizationPreset, None], target_device: TargetDevice, subset_size: int, fast_bias_correction: bool, @@ -157,6 +157,9 @@ def quantize_impl( if target_device == TargetDevice.CPU_SPR: raise RuntimeError("target_device == CPU_SPR is not supported.") + if preset is None: + preset = QuantizationPreset.PERFORMANCE + nncf_config = _create_nncf_config(preset, target_device, subset_size, ignored_scope, advanced_parameters) calibration_data_loader = CalibrationDataLoader(calibration_dataset) diff --git a/nncf/torch/quantization/quantize_model.py b/nncf/torch/quantization/quantize_model.py index 6c6e0c747c8..c37d69c3204 100644 --- a/nncf/torch/quantization/quantize_model.py +++ b/nncf/torch/quantization/quantize_model.py @@ -10,7 +10,7 @@ # limitations under the License. from copy import deepcopy -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, Optional, Tuple, Union import torch @@ -81,17 +81,22 @@ def _get_length(iterable) -> int: return length -def _get_transformer_quantization_config(subset_size: int) -> Dict[str, Any]: +def _get_transformer_quantization_config(preset: QuantizationPreset, subset_size: int) -> Dict[str, Any]: """ Returns the quantization config for transformer-based models. + :param preset: A preset that controls the quantization mode + (symmetric and asymmetric). It can take the following values: + - `performance`: Symmetric quantization of weights and activations. + - `mixed`: Symmetric quantization of weights and asymmetric + quantization of activations. :param subset_size: Size of a subset to calculate activations statistics used for quantization. :return: The quantization config for transformer-based models. """ return { "algorithm": "quantization", - "preset": "mixed", + "preset": preset.value, "initializer": { "range": {"num_init_samples": subset_size, "type": DEFAULT_RANGE_TYPE}, "batchnorm_adaptation": {"num_bn_adaptation_samples": 0}, @@ -133,12 +138,12 @@ def _get_default_quantization_config(preset: QuantizationPreset, subset_size: in def _create_nncf_config( - preset: QuantizationPreset, + preset: Union[QuantizationPreset, None], target_device: TargetDevice, subset_size: int, - model_type: Optional[ModelType], - ignored_scope: Optional[IgnoredScope], - advanced_parameters: Optional[AdvancedQuantizationParameters], + model_type: Union[ModelType, None], + ignored_scope: Union[IgnoredScope, None], + advanced_parameters: Union[AdvancedQuantizationParameters, None], ) -> NNCFConfig: """ Creates the NNCFConfig for the quantization algorithm. @@ -148,6 +153,7 @@ def _create_nncf_config( - `performance`: Symmetric quantization of weights and activations. - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. + - `None`: `mixed` preset is used for `transformer` model type otherwise `performace`. :param target_device: A target device the specificity of which will be taken into account while compressing in order to obtain the best performance for this type of device. @@ -161,10 +167,16 @@ def _create_nncf_config( fine-tuning the quantization algorithm. :return: NNCFConfig for the quantization algorithm. """ - if model_type is None: + if preset is None: + if model_type == ModelType.TRANSFORMER: + preset = QuantizationPreset.MIXED + else: + preset = QuantizationPreset.PERFORMANCE + + if model_type == ModelType.TRANSFORMER: + compression_config = _get_transformer_quantization_config(preset, subset_size) + else: compression_config = _get_default_quantization_config(preset, subset_size) - elif model_type == ModelType.TRANSFORMER: - compression_config = _get_transformer_quantization_config(subset_size) if ignored_scope is not None: _ignored_scope = convert_ignored_scope_to_list(ignored_scope) @@ -186,7 +198,7 @@ def _create_nncf_config( def quantize_impl( model: torch.nn.Module, calibration_dataset: Dataset, - preset: QuantizationPreset, + preset: Union[QuantizationPreset, None], target_device: TargetDevice, subset_size: int, fast_bias_correction: bool, diff --git a/tests/common/quantization/test_minmax.py b/tests/common/quantization/test_minmax.py new file mode 100644 index 00000000000..719012a5a02 --- /dev/null +++ b/tests/common/quantization/test_minmax.py @@ -0,0 +1,40 @@ +# Copyright (c) 2023 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationPreset +from nncf.common.quantization.structs import QuantizerGroup +from nncf.parameters import ModelType +from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization + + +@pytest.mark.parametrize( + "preset,model_type,activation_mode,weights_mode", + [ + (None, None, QuantizationMode.SYMMETRIC, QuantizationMode.SYMMETRIC), + (QuantizationPreset.PERFORMANCE, None, QuantizationMode.SYMMETRIC, QuantizationMode.SYMMETRIC), + (QuantizationPreset.MIXED, None, QuantizationMode.ASYMMETRIC, QuantizationMode.SYMMETRIC), + (None, ModelType.TRANSFORMER, QuantizationMode.ASYMMETRIC, QuantizationMode.SYMMETRIC), + (QuantizationPreset.PERFORMANCE, ModelType.TRANSFORMER, QuantizationMode.SYMMETRIC, QuantizationMode.SYMMETRIC), + (QuantizationPreset.MIXED, ModelType.TRANSFORMER, QuantizationMode.ASYMMETRIC, QuantizationMode.SYMMETRIC), + ], +) +def test_quantization_preset(preset, model_type, activation_mode, weights_mode): + minmax = MinMaxQuantization(preset=preset, model_type=model_type) + + global_quantizer_constraints = getattr(minmax, "_global_quantizer_constraints") + assert ( + global_quantizer_constraints[QuantizerGroup.ACTIVATIONS].qconf_attr_vs_constraint_dict["mode"] + == activation_mode + ) + assert global_quantizer_constraints[QuantizerGroup.WEIGHTS].qconf_attr_vs_constraint_dict["mode"] == weights_mode diff --git a/tests/openvino/conftest.py b/tests/openvino/conftest.py index 738ba7efc44..b107b3db32a 100644 --- a/tests/openvino/conftest.py +++ b/tests/openvino/conftest.py @@ -32,6 +32,14 @@ def data(request): return Path(option) +@pytest.fixture(name="omz_cache_dir") +def models(request): + option = request.config.getoption("--data") + if option is None: + return Path(MODELS_PATH) + return Path(option) + + # Custom markers specifying tests to be run only if a specific option # is present on the pytest command line must be registered here. MARKS_VS_OPTIONS = {**COMMON_SCOPE_MARKS_VS_OPTIONS} @@ -46,4 +54,6 @@ def pytest_collection_modifyitems(config, items): OPENVINO_NATIVE_TEST_ROOT = OPENVINO_TEST_ROOT / "native" AC_CONFIGS_DIR = OPENVINO_TEST_ROOT / "data" / "ac_configs" OPENVINO_DATASET_DEFINITIONS_PATH = OPENVINO_TEST_ROOT / "data" / "ov_dataset_definitions.yml" -DATASET_PATH = "~/.cache/nncf/datasets" +NNCF_CACHE_PATH = Path("~/.cache/nncf") +DATASET_PATH = NNCF_CACHE_PATH / "datasets" +MODELS_PATH = NNCF_CACHE_PATH / "models" diff --git a/tests/openvino/native/data/2023.2/reference_scales/yolo-v4-tiny-tf_mixed.json b/tests/openvino/native/data/2023.2/reference_scales/yolo-v4-tiny-tf_mixed.json index f452fd9ab28..c589abba014 100644 --- a/tests/openvino/native/data/2023.2/reference_scales/yolo-v4-tiny-tf_mixed.json +++ b/tests/openvino/native/data/2023.2/reference_scales/yolo-v4-tiny-tf_mixed.json @@ -7149,13 +7149,13 @@ ] ] }, - "Transpose_1760/fq_output_0": { + "Transpose_2267/fq_output_0": { "input_low": -0.3571314811706543, "input_high": 4.435948848724365, "output_low": -0.3571314811706543, "output_high": 4.435948848724365 }, - "Multiply_3985/fq_weights_1": { + "Multiply_4809/fq_weights_1": { "input_low": [ [ [ @@ -14333,13 +14333,13 @@ ] ] }, - "Transpose_1630/fq_output_0": { + "Transpose_2025/fq_output_0": { "input_low": -0.7855930924415588, "input_high": 3.5693252086639404, "output_low": -0.7855930924415588, "output_high": 3.5693252086639404 }, - "Multiply_3929/fq_weights_1": { + "Multiply_4753/fq_weights_1": { "input_low": [ [ [ @@ -21517,13 +21517,13 @@ ] ] }, - "Transpose_1566/fq_output_0": { + "Transpose_1905/fq_output_0": { "input_low": -0.5701775550842285, - "input_high": 4.62251091003418, + "input_high": 4.6225104331970215, "output_low": -0.5701775550842285, - "output_high": 4.62251091003418 + "output_high": 4.6225104331970215 }, - "Multiply_3901/fq_weights_1": { + "Multiply_4725/fq_weights_1": { "input_low": [ [ [ @@ -25117,13 +25117,13 @@ ] ] }, - "Transpose_1531/fq_output_0": { + "Transpose_1842/fq_output_0": { "input_low": -0.7855930924415588, "input_high": 3.5693252086639404, "output_low": -0.7855930924415588, "output_high": 3.5693252086639404 }, - "Multiply_3887/fq_weights_1": { + "Multiply_4711/fq_weights_1": { "input_low": [ [ [ @@ -32301,13 +32301,13 @@ ] ] }, - "Transpose_1497/fq_output_0": { - "input_low": -0.9937790632247925, + "Transpose_1780/fq_output_0": { + "input_low": -0.9937791228294373, "input_high": 5.855238914489746, - "output_low": -0.9937790632247925, + "output_low": -0.9937791228294373, "output_high": 5.855238914489746 }, - "Multiply_3873/fq_weights_1": { + "Multiply_4697/fq_weights_1": { "input_low": [ [ [ @@ -35901,13 +35901,13 @@ ] ] }, - "Transpose_1433/fq_output_0": { + "Transpose_1660/fq_output_0": { "input_low": -0.7652094960212708, "input_high": 9.504707336425781, "output_low": -0.7652094960212708, "output_high": 9.504707336425781 }, - "Multiply_3845/fq_weights_1": { + "Multiply_4669/fq_weights_1": { "input_low": [ [ [ @@ -37709,13 +37709,13 @@ ] ] }, - "Transpose_1398/fq_output_0": { - "input_low": -0.9937790632247925, + "Transpose_1597/fq_output_0": { + "input_low": -0.9937791228294373, "input_high": 5.855238914489746, - "output_low": -0.9937790632247925, + "output_low": -0.9937791228294373, "output_high": 5.855238914489746 }, - "Multiply_3831/fq_weights_1": { + "Multiply_4655/fq_weights_1": { "input_low": [ [ [ @@ -41309,13 +41309,13 @@ ] ] }, - "Transpose_1364/fq_output_0": { + "Transpose_1535/fq_output_0": { "input_low": -1.2554621696472168, - "input_high": 10.178211212158203, + "input_high": 10.178210258483887, "output_low": -1.2554621696472168, - "output_high": 10.178211212158203 + "output_high": 10.178210258483887 }, - "Multiply_3817/fq_weights_1": { + "Multiply_4641/fq_weights_1": { "input_low": [ [ [ @@ -43117,13 +43117,13 @@ ] ] }, - "Transpose_1300/fq_output_0": { + "Transpose_1415/fq_output_0": { "input_low": -0.791810154914856, - "input_high": 6.170658588409424, + "input_high": 6.170658111572266, "output_low": -0.791810154914856, - "output_high": 6.170658588409424 + "output_high": 6.170658111572266 }, - "Multiply_3789/fq_weights_1": { + "Multiply_4613/fq_weights_1": { "input_low": [ [ [ @@ -44029,13 +44029,13 @@ ] ] }, - "Transpose_1265/fq_output_0": { + "Transpose_1352/fq_output_0": { "input_low": -1.2554621696472168, - "input_high": 10.178211212158203, + "input_high": 10.178210258483887, "output_low": -1.2554621696472168, - "output_high": 10.178211212158203 + "output_high": 10.178210258483887 }, - "Multiply_3775/fq_weights_1": { + "Multiply_4599/fq_weights_1": { "input_low": [ [ [ @@ -45837,13 +45837,13 @@ ] ] }, - "Transpose_1235/fq_output_0": { + "Transpose_1294/fq_output_0": { "input_low": -0.5645939111709595, "input_high": 12.523719787597656, "output_low": -0.5645939111709595, "output_high": 12.523719787597656 }, - "Multiply_3761/fq_weights_1": { + "Multiply_4585/fq_weights_1": { "input_low": [ [ [ @@ -47645,13 +47645,13 @@ ] ] }, - "Transpose_1178/fq_output_0": { + "Transpose_1209/fq_output_0": { "input_low": -1.0276108980178833, "input_high": 2.375516176223755, "output_low": -1.0276108980178833, "output_high": 2.375516176223755 }, - "Multiply_3747/fq_weights_1": { + "Multiply_4571/fq_weights_1": { "input_low": [ [ [ @@ -48563,13 +48563,13 @@ "output_low": 0.0, "output_high": 0.9999967813491821 }, - "Transpose_1330/fq_output_0": { + "Transpose_1473/fq_output_0": { "input_low": -0.791810154914856, - "input_high": 6.170658588409424, + "input_high": 6.170658111572266, "output_low": -0.791810154914856, - "output_high": 6.170658588409424 + "output_high": 6.170658111572266 }, - "Multiply_3803/fq_weights_1": { + "Multiply_4627/fq_weights_1": { "input_low": [ [ [ @@ -49475,13 +49475,13 @@ ] ] }, - "Transpose_1463/fq_output_0": { + "Transpose_1718/fq_output_0": { "input_low": -0.7652094960212708, "input_high": 9.504707336425781, "output_low": -0.7652094960212708, "output_high": 9.504707336425781 }, - "Multiply_3859/fq_weights_1": { + "Multiply_4683/fq_weights_1": { "input_low": [ [ [ @@ -51283,13 +51283,13 @@ ] ] }, - "Transpose_1596/fq_output_0": { + "Transpose_1963/fq_output_0": { "input_low": -0.5701775550842285, - "input_high": 4.62251091003418, + "input_high": 4.6225104331970215, "output_low": -0.5701775550842285, - "output_high": 4.62251091003418 + "output_high": 4.6225104331970215 }, - "Multiply_3915/fq_weights_1": { + "Multiply_4739/fq_weights_1": { "input_low": [ [ [ @@ -54889,13 +54889,13 @@ "output_low": -0.7855930924415588, "output_high": 3.5693252086639404 }, - "Transpose_1724/fq_output_0": { + "Transpose_2203/fq_output_0": { "input_low": -0.4952194392681122, - "input_high": 3.578359842300415, + "input_high": 3.578359603881836, "output_low": -0.4952194392681122, - "output_high": 3.578359842300415 + "output_high": 3.578359603881836 }, - "Multiply_3971/fq_weights_1": { + "Multiply_4795/fq_weights_1": { "input_low": [ [ [ @@ -58489,13 +58489,13 @@ ] ] }, - "Transpose_1694/fq_output_0": { + "Transpose_2145/fq_output_0": { "input_low": -0.6254710555076599, "input_high": 2.1244447231292725, "output_low": -0.6254710555076599, "output_high": 2.1244447231292725 }, - "Multiply_3957/fq_weights_1": { + "Multiply_4781/fq_weights_1": { "input_low": [ [ [ @@ -65673,13 +65673,13 @@ ] ] }, - "Transpose_1664/fq_output_0": { + "Transpose_2087/fq_output_0": { "input_low": -0.5939716100692749, "input_high": 1.480860710144043, "output_low": -0.5939716100692749, "output_high": 1.480860710144043 }, - "Multiply_3943/fq_weights_1": { + "Multiply_4767/fq_weights_1": { "input_low": [ [ [ @@ -87175,13 +87175,13 @@ ] ] }, - "Transpose_1796/fq_output_0": { + "Transpose_2331/fq_output_0": { "input_low": -0.6007991433143616, "input_high": 4.870764255523682, "output_low": -0.6007991433143616, "output_high": 4.870764255523682 }, - "Multiply_3999/fq_weights_1": { + "Multiply_4823/fq_weights_1": { "input_low": [ [ [ diff --git a/tests/openvino/native/data/2023.2/reference_scales/yolo-v4-tiny-tf_performance.json b/tests/openvino/native/data/2023.2/reference_scales/yolo-v4-tiny-tf_performance.json index 89c067cdfb6..1a79f4cebd1 100644 --- a/tests/openvino/native/data/2023.2/reference_scales/yolo-v4-tiny-tf_performance.json +++ b/tests/openvino/native/data/2023.2/reference_scales/yolo-v4-tiny-tf_performance.json @@ -7149,13 +7149,13 @@ ] ] }, - "Transpose_1760/fq_output_0": { + "Transpose_2267/fq_output_0": { "input_low": -4.470877647399902, "input_high": 4.435948848724365, "output_low": -4.470877647399902, "output_high": 4.435948848724365 }, - "Multiply_3985/fq_weights_1": { + "Multiply_4809/fq_weights_1": { "input_low": [ [ [ @@ -14333,13 +14333,13 @@ ] ] }, - "Transpose_1630/fq_output_0": { + "Transpose_2025/fq_output_0": { "input_low": -3.5974302291870117, "input_high": 3.5693252086639404, "output_low": -3.5974302291870117, "output_high": 3.5693252086639404 }, - "Multiply_3929/fq_weights_1": { + "Multiply_4753/fq_weights_1": { "input_low": [ [ [ @@ -21517,13 +21517,13 @@ ] ] }, - "Transpose_1566/fq_output_0": { + "Transpose_1905/fq_output_0": { "input_low": -4.5784101486206055, "input_high": 4.5426411628723145, "output_low": -4.5784101486206055, "output_high": 4.5426411628723145 }, - "Multiply_3901/fq_weights_1": { + "Multiply_4725/fq_weights_1": { "input_low": [ [ [ @@ -25117,13 +25117,13 @@ ] ] }, - "Transpose_1531/fq_output_0": { + "Transpose_1842/fq_output_0": { "input_low": -3.5974302291870117, "input_high": 3.5693252086639404, "output_low": -3.5974302291870117, "output_high": 3.5693252086639404 }, - "Multiply_3887/fq_weights_1": { + "Multiply_4711/fq_weights_1": { "input_low": [ [ [ @@ -32301,13 +32301,13 @@ ] ] }, - "Transpose_1497/fq_output_0": { + "Transpose_1780/fq_output_0": { "input_low": -5.90134334564209, "input_high": 5.855238914489746, "output_low": -5.90134334564209, "output_high": 5.855238914489746 }, - "Multiply_3873/fq_weights_1": { + "Multiply_4697/fq_weights_1": { "input_low": [ [ [ @@ -35901,13 +35901,13 @@ ] ] }, - "Transpose_1433/fq_output_0": { + "Transpose_1660/fq_output_0": { "input_low": -9.338682174682617, "input_high": 9.265724182128906, "output_low": -9.338682174682617, "output_high": 9.265724182128906 }, - "Multiply_3845/fq_weights_1": { + "Multiply_4669/fq_weights_1": { "input_low": [ [ [ @@ -37709,13 +37709,13 @@ ] ] }, - "Transpose_1398/fq_output_0": { + "Transpose_1597/fq_output_0": { "input_low": -5.90134334564209, "input_high": 5.855238914489746, "output_low": -5.90134334564209, "output_high": 5.855238914489746 }, - "Multiply_3831/fq_weights_1": { + "Multiply_4655/fq_weights_1": { "input_low": [ [ [ @@ -41309,13 +41309,13 @@ ] ] }, - "Transpose_1364/fq_output_0": { + "Transpose_1535/fq_output_0": { "input_low": -10.120508193969727, "input_high": 10.041441917419434, "output_low": -10.120508193969727, "output_high": 10.041441917419434 }, - "Multiply_3817/fq_weights_1": { + "Multiply_4641/fq_weights_1": { "input_low": [ [ [ @@ -43117,13 +43117,13 @@ ] ] }, - "Transpose_1300/fq_output_0": { + "Transpose_1415/fq_output_0": { "input_low": -6.1309123039245605, "input_high": 6.083014488220215, "output_low": -6.1309123039245605, "output_high": 6.083014488220215 }, - "Multiply_3789/fq_weights_1": { + "Multiply_4613/fq_weights_1": { "input_low": [ [ [ @@ -44029,13 +44029,13 @@ ] ] }, - "Transpose_1265/fq_output_0": { + "Transpose_1352/fq_output_0": { "input_low": -10.120508193969727, "input_high": 10.041441917419434, "output_low": -10.120508193969727, "output_high": 10.041441917419434 }, - "Multiply_3775/fq_weights_1": { + "Multiply_4599/fq_weights_1": { "input_low": [ [ [ @@ -45837,13 +45837,13 @@ ] ] }, - "Transpose_1235/fq_output_0": { + "Transpose_1294/fq_output_0": { "input_low": -12.622331619262695, "input_high": 12.523719787597656, "output_low": -12.622331619262695, "output_high": 12.523719787597656 }, - "Multiply_3761/fq_weights_1": { + "Multiply_4585/fq_weights_1": { "input_low": [ [ [ @@ -47645,13 +47645,13 @@ ] ] }, - "Transpose_1178/fq_output_0": { + "Transpose_1209/fq_output_0": { "input_low": -2.385321617126465, "input_high": 2.3666863441467285, "output_low": -2.385321617126465, "output_high": 2.3666863441467285 }, - "Multiply_3747/fq_weights_1": { + "Multiply_4571/fq_weights_1": { "input_low": [ [ [ @@ -48563,13 +48563,13 @@ "output_low": 0.0, "output_high": 0.9999967813491821 }, - "Transpose_1330/fq_output_0": { + "Transpose_1473/fq_output_0": { "input_low": -6.1309123039245605, "input_high": 6.083014488220215, "output_low": -6.1309123039245605, "output_high": 6.083014488220215 }, - "Multiply_3803/fq_weights_1": { + "Multiply_4627/fq_weights_1": { "input_low": [ [ [ @@ -49475,13 +49475,13 @@ ] ] }, - "Transpose_1463/fq_output_0": { + "Transpose_1718/fq_output_0": { "input_low": -9.338682174682617, "input_high": 9.265724182128906, "output_low": -9.338682174682617, "output_high": 9.265724182128906 }, - "Multiply_3859/fq_weights_1": { + "Multiply_4683/fq_weights_1": { "input_low": [ [ [ @@ -51283,13 +51283,13 @@ ] ] }, - "Transpose_1596/fq_output_0": { + "Transpose_1963/fq_output_0": { "input_low": -4.5784101486206055, "input_high": 4.5426411628723145, "output_low": -4.5784101486206055, "output_high": 4.5426411628723145 }, - "Multiply_3915/fq_weights_1": { + "Multiply_4739/fq_weights_1": { "input_low": [ [ [ @@ -54889,13 +54889,13 @@ "output_low": -3.5974302291870117, "output_high": 3.5693252086639404 }, - "Transpose_1724/fq_output_0": { + "Transpose_2203/fq_output_0": { "input_low": -3.5974302291870117, "input_high": 3.5693252086639404, "output_low": -3.5974302291870117, "output_high": 3.5693252086639404 }, - "Multiply_3971/fq_weights_1": { + "Multiply_4795/fq_weights_1": { "input_low": [ [ [ @@ -58489,13 +58489,13 @@ ] ] }, - "Transpose_1694/fq_output_0": { + "Transpose_2145/fq_output_0": { "input_low": -2.1335461139678955, "input_high": 2.116877794265747, "output_low": -2.1335461139678955, "output_high": 2.116877794265747 }, - "Multiply_3957/fq_weights_1": { + "Multiply_4781/fq_weights_1": { "input_low": [ [ [ @@ -65673,13 +65673,13 @@ ] ] }, - "Transpose_1664/fq_output_0": { + "Transpose_2087/fq_output_0": { "input_low": -1.492521047592163, "input_high": 1.480860710144043, "output_low": -1.492521047592163, "output_high": 1.480860710144043 }, - "Multiply_3943/fq_weights_1": { + "Multiply_4767/fq_weights_1": { "input_low": [ [ [ @@ -87175,13 +87175,13 @@ ] ] }, - "Transpose_1796/fq_output_0": { + "Transpose_2331/fq_output_0": { "input_low": -4.909116744995117, "input_high": 4.870764255523682, "output_low": -4.909116744995117, "output_high": 4.870764255523682 }, - "Multiply_3999/fq_weights_1": { + "Multiply_4823/fq_weights_1": { "input_low": [ [ [ diff --git a/tests/openvino/native/quantization/test_fq_params_calculation.py b/tests/openvino/native/quantization/test_fq_params_calculation.py index 180eb7190c9..3e4ca766e41 100644 --- a/tests/openvino/native/quantization/test_fq_params_calculation.py +++ b/tests/openvino/native/quantization/test_fq_params_calculation.py @@ -133,8 +133,8 @@ def test_overflow_fix_scales(overflow_fix): ids=[QuantizationPreset.PERFORMANCE.value, QuantizationPreset.MIXED.value], ) @pytest.mark.parametrize("model_name", OMZ_MODELS) -def test_omz_models_fq_scales(model_name, preset, inplace_statistics, tmp_path): - download_model(model_name, tmp_path) +def test_omz_models_fq_scales(model_name, preset, inplace_statistics, tmp_path, omz_cache_dir): + download_model(model_name, tmp_path, omz_cache_dir) convert_model(model_name, tmp_path) model_path = tmp_path / "public" / model_name / "FP32" / f"{model_name}.xml" model = ov.Core().read_model(model_path) diff --git a/tests/openvino/native/quantization/test_graphs.py b/tests/openvino/native/quantization/test_graphs.py index 9073a4bb581..949e2b6d08c 100644 --- a/tests/openvino/native/quantization/test_graphs.py +++ b/tests/openvino/native/quantization/test_graphs.py @@ -77,11 +77,11 @@ def test_depthwise_models_fq_placement(model_creator_func): @pytest.mark.parametrize("model_name_params", OMZ_MODELS_QUANTIZE_PARAMS.items(), ids=list(OMZ_MODELS_QUANTIZE_PARAMS)) -def test_omz_models_fq_placement(model_name_params, tmp_path): +def test_omz_models_fq_placement(model_name_params, tmp_path, omz_cache_dir): model_name, q_params = model_name_params params_str = "_".join([param.value for param in q_params.values()]) q_params.update({"inplace_statistics": True}) - download_model(model_name, tmp_path) + download_model(model_name, tmp_path, omz_cache_dir) convert_model(model_name, tmp_path) model_path = tmp_path / "public" / model_name / "FP32" / f"{model_name}.xml" model = ov.Core().read_model(model_path) @@ -116,10 +116,10 @@ def test_transformer_models_fq_placement(model_creator_func, tmp_path): @pytest.mark.parametrize("model_name_params", OMZ_MODELS_SQ_PARAMS.items(), ids=list(OMZ_MODELS_SQ_PARAMS)) -def test_omz_models_sq_placement(model_name_params, tmp_path): +def test_omz_models_sq_placement(model_name_params, tmp_path, omz_cache_dir): model_name, q_params = model_name_params q_params.update({"inplace_statistics": True}) - download_model(model_name, tmp_path) + download_model(model_name, tmp_path, omz_cache_dir) convert_model(model_name, tmp_path) model_path = tmp_path / "public" / model_name / "FP32" / f"{model_name}.xml" model = ov.Core().read_model(model_path) diff --git a/tests/openvino/native/quantization/test_quantization_pipeline.py b/tests/openvino/native/quantization/test_quantization_pipeline.py index 915f15f8b4f..34cc986e048 100644 --- a/tests/openvino/native/quantization/test_quantization_pipeline.py +++ b/tests/openvino/native/quantization/test_quantization_pipeline.py @@ -98,7 +98,7 @@ def test_meta_information(model_creator_func, ignored_options): def check_parameters(quantized_model, parameters, path): for key, value in parameters.items(): rt_path = path + [key] - if isinstance(value, (QuantizationPreset, TargetDevice)): + if isinstance(value, TargetDevice): value = value.value if isinstance(value, IgnoredScope): check_parameters(quantized_model, value.__dict__, rt_path) diff --git a/tests/openvino/native/quantization/test_sanity.py b/tests/openvino/native/quantization/test_sanity.py index 2bbe4ad38d5..911ce9a3854 100644 --- a/tests/openvino/native/quantization/test_sanity.py +++ b/tests/openvino/native/quantization/test_sanity.py @@ -46,11 +46,11 @@ @pytest.mark.parametrize( "model, dataset, ref_metrics, advanced_params", OMZ_MODELS, ids=[model[0] for model in OMZ_MODELS] ) -def test_compression(data_dir, tmp_path, model, dataset, ref_metrics, advanced_params): +def test_compression(data_dir, tmp_path, model, dataset, ref_metrics, advanced_params, omz_cache_dir): extracted_data_dir = os.path.dirname(get_dataset_for_test(dataset, data_dir)) config_path = AC_CONFIGS_DIR / f"{model}.yml" - download_model(model, tmp_path) + download_model(model, tmp_path, omz_cache_dir) convert_model(model, tmp_path) model_path = tmp_path / "public" / model / "FP32" / f"{model}.xml" diff --git a/tests/openvino/native/test_nncf_graph_builder.py b/tests/openvino/native/test_nncf_graph_builder.py index 8b21431aa67..1f75de1f084 100644 --- a/tests/openvino/native/test_nncf_graph_builder.py +++ b/tests/openvino/native/test_nncf_graph_builder.py @@ -45,8 +45,8 @@ def test_compare_nncf_graph_synthetic_models(model_cls_to_test): @pytest.mark.parametrize("model_name", OMZ_MODELS) -def test_compare_nncf_graph_omz_models(tmp_path, model_name): - download_model(model_name, tmp_path) +def test_compare_nncf_graph_omz_models(tmp_path, omz_cache_dir, model_name): + download_model(model_name, tmp_path, omz_cache_dir) convert_model(model_name, tmp_path) model_path = tmp_path / "public" / model_name / "FP32" / f"{model_name}.xml" model = ov.Core().read_model(model_path) diff --git a/tests/openvino/omz_helpers.py b/tests/openvino/omz_helpers.py index 713f84be5ba..77787fd6aa7 100644 --- a/tests/openvino/omz_helpers.py +++ b/tests/openvino/omz_helpers.py @@ -25,8 +25,8 @@ def run_command(command: List[str]): return cmd_output -def download_model(name, path): - com_line = ["omz_downloader", "--name", name, "-o", str(path)] +def download_model(name, path, omz_cache_dir): + com_line = ["omz_downloader", "--name", name, "-o", str(path), "--cache_dir", str(omz_cache_dir)] _ = run_command(com_line) diff --git a/tests/openvino/pot/quantization/test_sanity.py b/tests/openvino/pot/quantization/test_sanity.py index dc413236d39..36ad34d9b66 100644 --- a/tests/openvino/pot/quantization/test_sanity.py +++ b/tests/openvino/pot/quantization/test_sanity.py @@ -32,11 +32,11 @@ @pytest.mark.parametrize("model, dataset, ref_metrics", OMZ_MODELS, ids=[model[0] for model in OMZ_MODELS]) -def test_compression(data_dir, tmp_path, model, dataset, ref_metrics): +def test_compression(data_dir, tmp_path, omz_cache_dir, model, dataset, ref_metrics): extracted_data_dir = os.path.dirname(get_dataset_for_test(dataset, data_dir)) config_path = AC_CONFIGS_DIR / f"{model}.yml" - download_model(model, tmp_path) + download_model(model, tmp_path, omz_cache_dir) convert_model(model, tmp_path) model_path = tmp_path / "public" / model / "FP32" / f"{model}.xml" diff --git a/tests/torch/ptq/test_ptq_params.py b/tests/torch/ptq/test_ptq_params.py index cdc09a7b81a..0ef53c946a1 100644 --- a/tests/torch/ptq/test_ptq_params.py +++ b/tests/torch/ptq/test_ptq_params.py @@ -180,6 +180,22 @@ def ignored_scopes_data(self, request): @pytest.mark.parametrize( "params", ( + { + "preset": None, + "target_device": TargetDevice.ANY, + "subset_size": 1, + "model_type": ModelType.TRANSFORMER, + "ignored_scope": IgnoredScope(), + "advanced_parameters": AdvancedQuantizationParameters(), + }, + { + "preset": None, + "target_device": TargetDevice.ANY, + "subset_size": 1, + "model_type": None, + "ignored_scope": IgnoredScope(), + "advanced_parameters": AdvancedQuantizationParameters(), + }, { "preset": QuantizationPreset.MIXED, "target_device": TargetDevice.ANY, @@ -234,7 +250,14 @@ def test_create_nncf_config(params): assert config["compression"]["overflow_fix"] == params["advanced_parameters"].overflow_fix.value assert config["compression"]["quantize_outputs"] == params["advanced_parameters"].quantize_outputs - assert config["compression"]["preset"] == params["preset"].value + preset = params["preset"] + if params["preset"] is None: + if params["model_type"] == ModelType.TRANSFORMER: + preset = QuantizationPreset.MIXED + else: + preset = QuantizationPreset.PERFORMANCE + + assert config["compression"]["preset"] == preset.value range_config = config["compression"]["initializer"]["range"] if isinstance(range_config, dict):