diff --git a/examples/post_training_quantization/onnx/mobilenet_v2/main.py b/examples/post_training_quantization/onnx/mobilenet_v2/main.py index 007b537ceb6..5f9850f8cbe 100755 --- a/examples/post_training_quantization/onnx/mobilenet_v2/main.py +++ b/examples/post_training_quantization/onnx/mobilenet_v2/main.py @@ -16,7 +16,7 @@ import numpy as np import onnx -import openvino.runtime as ov +import openvino as ov import torch from fastdownload import FastDownload from fastdownload import download_url diff --git a/examples/post_training_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/main.py b/examples/post_training_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/main.py index daaa491e4ba..88393421801 100644 --- a/examples/post_training_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/main.py +++ b/examples/post_training_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/main.py @@ -19,7 +19,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple import numpy as np -import openvino.runtime as ov +import openvino as ov import torch from anomalib.data.mvtec import MVTec from anomalib.data.utils import download @@ -165,12 +165,16 @@ def transform_fn(data_item): # Benchmark performance, calculate compression rate and validate accuracy fp32_ir_path = f"{ROOT}/stfpm_fp32.xml" -ov.serialize(ov_model, fp32_ir_path) +ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False) print(f"[1/7] Save FP32 model: {fp32_ir_path}") fp32_size = get_model_size(fp32_ir_path, verbose=True) +# To avoid an accuracy drop when saving a model due to compression of unquantized +# weights to FP16, compress_to_fp16=False should be used. This is necessary because +# nncf.quantize_with_accuracy_control(...) keeps the most impactful operations within +# the model in the original precision to achieve the specified model accuracy. int8_ir_path = f"{ROOT}/stfpm_int8.xml" -ov.serialize(ov_quantized_model, int8_ir_path) +ov.save_model(ov_quantized_model, int8_ir_path, compress_to_fp16=False) print(f"[2/7] Save INT8 model: {int8_ir_path}") int8_size = get_model_size(int8_ir_path, verbose=True) diff --git a/examples/post_training_quantization/openvino/mobilenet_v2/main.py b/examples/post_training_quantization/openvino/mobilenet_v2/main.py index 2cc6ab0329f..d6533bd61d8 100644 --- a/examples/post_training_quantization/openvino/mobilenet_v2/main.py +++ b/examples/post_training_quantization/openvino/mobilenet_v2/main.py @@ -16,7 +16,7 @@ from typing import List, Optional import numpy as np -import openvino.runtime as ov +import openvino as ov import torch from fastdownload import FastDownload from sklearn.metrics import accuracy_score @@ -137,12 +137,12 @@ def transform_fn(data_item): # Benchmark performance, calculate compression rate and validate accuracy fp32_ir_path = f"{ROOT}/mobilenet_v2_fp32.xml" -ov.serialize(ov_model, fp32_ir_path) +ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False) print(f"[1/7] Save FP32 model: {fp32_ir_path}") fp32_model_size = get_model_size(fp32_ir_path, verbose=True) int8_ir_path = f"{ROOT}/mobilenet_v2_int8.xml" -ov.serialize(ov_quantized_model, int8_ir_path) +ov.save_model(ov_quantized_model, int8_ir_path, compress_to_fp16=False) print(f"[2/7] Save INT8 model: {int8_ir_path}") int8_model_size = get_model_size(int8_ir_path, verbose=True) diff --git a/examples/post_training_quantization/openvino/yolov8/main.py b/examples/post_training_quantization/openvino/yolov8/main.py index f20730970f6..8280d9391b4 100644 --- a/examples/post_training_quantization/openvino/yolov8/main.py +++ b/examples/post_training_quantization/openvino/yolov8/main.py @@ -14,7 +14,7 @@ from typing import Any, Dict, Tuple import numpy as np -import openvino.runtime as ov +import openvino as ov import torch from tqdm import tqdm from ultralytics.cfg import get_cfg @@ -158,7 +158,7 @@ def main(): # Quantize mode in OpenVINO representation quantized_model = quantize(ov_model, data_loader, validator) quantized_model_path = Path(f"{ROOT}/{MODEL_NAME}_openvino_model/{MODEL_NAME}_quantized.xml") - ov.serialize(quantized_model, str(quantized_model_path)) + ov.save_model(quantized_model, str(quantized_model_path), compress_to_fp16=False) # Validate FP32 model fp_stats, total_images, total_objects = validate(ov_model, tqdm(data_loader), validator) diff --git a/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/main.py b/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/main.py index a6e17830289..f56bbed26bf 100644 --- a/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/main.py +++ b/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/main.py @@ -15,7 +15,7 @@ from typing import Any, Dict, Tuple import numpy as np -import openvino.runtime as ov +import openvino as ov import torch from tqdm import tqdm from ultralytics.cfg import get_cfg @@ -222,7 +222,7 @@ def main(): quantized_model = quantize_ac(ov_model, data_loader, validator) quantized_model_path = Path(f"{ROOT}/{MODEL_NAME}_openvino_model/{MODEL_NAME}_quantized.xml") - ov.serialize(quantized_model, str(quantized_model_path)) + ov.save_model(quantized_model, str(quantized_model_path), compress_to_fp16=False) # Validate FP32 model fp_stats, total_images, total_objects = validate(ov_model, tqdm(data_loader), validator) diff --git a/examples/post_training_quantization/torch/mobilenet_v2/main.py b/examples/post_training_quantization/torch/mobilenet_v2/main.py index 9297d5cf94f..35f9b35c06c 100644 --- a/examples/post_training_quantization/torch/mobilenet_v2/main.py +++ b/examples/post_training_quantization/torch/mobilenet_v2/main.py @@ -16,7 +16,7 @@ from typing import List, Optional import numpy as np -import openvino.runtime as ov +import openvino as ov import torch from fastdownload import FastDownload from openvino.tools import mo @@ -173,12 +173,12 @@ def transform_fn(data_item): ov_quantized_model = mo.convert_model(int8_onnx_path) fp32_ir_path = f"{ROOT}/mobilenet_v2_fp32.xml" -ov.serialize(ov_model, fp32_ir_path) +ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False) print(f"[1/7] Save FP32 model: {fp32_ir_path}") fp32_model_size = get_model_size(fp32_ir_path, verbose=True) int8_ir_path = f"{ROOT}/mobilenet_v2_int8.xml" -ov.serialize(ov_quantized_model, int8_ir_path) +ov.save_model(ov_quantized_model, int8_ir_path, compress_to_fp16=False) print(f"[2/7] Save INT8 model: {int8_ir_path}") int8_model_size = get_model_size(int8_ir_path, verbose=True) diff --git a/examples/post_training_quantization/torch/ssd300_vgg16/main.py b/examples/post_training_quantization/torch/ssd300_vgg16/main.py index c90ee304e2c..6c495ec03ce 100644 --- a/examples/post_training_quantization/torch/ssd300_vgg16/main.py +++ b/examples/post_training_quantization/torch/ssd300_vgg16/main.py @@ -18,7 +18,7 @@ import nncf from nncf.torch import disable_tracing -import openvino.runtime as ov +import openvino as ov import torch import torchvision from fastdownload import FastDownload @@ -163,12 +163,12 @@ def main(): ov_quantized_model = mo.convert_model(int8_onnx_path) fp32_ir_path = f"{ROOT}/ssd300_vgg16_fp32.xml" - ov.serialize(ov_model, fp32_ir_path) + ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False) print(f"[1/7] Save FP32 model: {fp32_ir_path}") fp32_model_size = get_model_size(fp32_ir_path, verbose=True) int8_ir_path = f"{ROOT}/ssd300_vgg16_int8.xml" - ov.serialize(ov_quantized_model, int8_ir_path) + ov.save_model(ov_quantized_model, int8_ir_path, compress_to_fp16=False) print(f"[2/7] Save INT8 model: {int8_ir_path}") int8_model_size = get_model_size(int8_ir_path, verbose=True)