Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into dl/torch/levit
Browse files Browse the repository at this point in the history
  • Loading branch information
daniil-lyakhov committed Oct 26, 2023
2 parents 83851ef + 76bb1f0 commit 23687f5
Show file tree
Hide file tree
Showing 37 changed files with 330 additions and 276 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import numpy as np
import onnx
import openvino.runtime as ov
import openvino as ov
import torch
from fastdownload import FastDownload
from fastdownload import download_url
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from typing import Any, Dict, Iterable, List, Optional, Tuple

import numpy as np
import openvino.runtime as ov
import openvino as ov
import torch
from anomalib.data.mvtec import MVTec
from anomalib.data.utils import download
Expand Down Expand Up @@ -165,12 +165,16 @@ def transform_fn(data_item):
# Benchmark performance, calculate compression rate and validate accuracy

fp32_ir_path = f"{ROOT}/stfpm_fp32.xml"
ov.serialize(ov_model, fp32_ir_path)
ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False)
print(f"[1/7] Save FP32 model: {fp32_ir_path}")
fp32_size = get_model_size(fp32_ir_path, verbose=True)

# To avoid an accuracy drop when saving a model due to compression of unquantized
# weights to FP16, compress_to_fp16=False should be used. This is necessary because
# nncf.quantize_with_accuracy_control(...) keeps the most impactful operations within
# the model in the original precision to achieve the specified model accuracy.
int8_ir_path = f"{ROOT}/stfpm_int8.xml"
ov.serialize(ov_quantized_model, int8_ir_path)
ov.save_model(ov_quantized_model, int8_ir_path, compress_to_fp16=False)
print(f"[2/7] Save INT8 model: {int8_ir_path}")
int8_size = get_model_size(int8_ir_path, verbose=True)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from typing import List, Optional

import numpy as np
import openvino.runtime as ov
import openvino as ov
import torch
from fastdownload import FastDownload
from sklearn.metrics import accuracy_score
Expand Down Expand Up @@ -137,12 +137,12 @@ def transform_fn(data_item):
# Benchmark performance, calculate compression rate and validate accuracy

fp32_ir_path = f"{ROOT}/mobilenet_v2_fp32.xml"
ov.serialize(ov_model, fp32_ir_path)
ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False)
print(f"[1/7] Save FP32 model: {fp32_ir_path}")
fp32_model_size = get_model_size(fp32_ir_path, verbose=True)

int8_ir_path = f"{ROOT}/mobilenet_v2_int8.xml"
ov.serialize(ov_quantized_model, int8_ir_path)
ov.save_model(ov_quantized_model, int8_ir_path, compress_to_fp16=False)
print(f"[2/7] Save INT8 model: {int8_ir_path}")
int8_model_size = get_model_size(int8_ir_path, verbose=True)

Expand Down
4 changes: 2 additions & 2 deletions examples/post_training_quantization/openvino/yolov8/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from typing import Any, Dict, Tuple

import numpy as np
import openvino.runtime as ov
import openvino as ov
import torch
from tqdm import tqdm
from ultralytics.cfg import get_cfg
Expand Down Expand Up @@ -158,7 +158,7 @@ def main():
# Quantize mode in OpenVINO representation
quantized_model = quantize(ov_model, data_loader, validator)
quantized_model_path = Path(f"{ROOT}/{MODEL_NAME}_openvino_model/{MODEL_NAME}_quantized.xml")
ov.serialize(quantized_model, str(quantized_model_path))
ov.save_model(quantized_model, str(quantized_model_path), compress_to_fp16=False)

# Validate FP32 model
fp_stats, total_images, total_objects = validate(ov_model, tqdm(data_loader), validator)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from typing import Any, Dict, Tuple

import numpy as np
import openvino.runtime as ov
import openvino as ov
import torch
from tqdm import tqdm
from ultralytics.cfg import get_cfg
Expand Down Expand Up @@ -222,7 +222,7 @@ def main():
quantized_model = quantize_ac(ov_model, data_loader, validator)

quantized_model_path = Path(f"{ROOT}/{MODEL_NAME}_openvino_model/{MODEL_NAME}_quantized.xml")
ov.serialize(quantized_model, str(quantized_model_path))
ov.save_model(quantized_model, str(quantized_model_path), compress_to_fp16=False)

# Validate FP32 model
fp_stats, total_images, total_objects = validate(ov_model, tqdm(data_loader), validator)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from typing import List, Optional

import numpy as np
import openvino.runtime as ov
import openvino as ov
import torch
from fastdownload import FastDownload
from openvino.tools import mo
Expand Down Expand Up @@ -173,12 +173,12 @@ def transform_fn(data_item):
ov_quantized_model = mo.convert_model(int8_onnx_path)

fp32_ir_path = f"{ROOT}/mobilenet_v2_fp32.xml"
ov.serialize(ov_model, fp32_ir_path)
ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False)
print(f"[1/7] Save FP32 model: {fp32_ir_path}")
fp32_model_size = get_model_size(fp32_ir_path, verbose=True)

int8_ir_path = f"{ROOT}/mobilenet_v2_int8.xml"
ov.serialize(ov_quantized_model, int8_ir_path)
ov.save_model(ov_quantized_model, int8_ir_path, compress_to_fp16=False)
print(f"[2/7] Save INT8 model: {int8_ir_path}")
int8_model_size = get_model_size(int8_ir_path, verbose=True)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import nncf
from nncf.torch import disable_tracing

import openvino.runtime as ov
import openvino as ov
import torch
import torchvision
from fastdownload import FastDownload
Expand Down Expand Up @@ -163,12 +163,12 @@ def main():
ov_quantized_model = mo.convert_model(int8_onnx_path)

fp32_ir_path = f"{ROOT}/ssd300_vgg16_fp32.xml"
ov.serialize(ov_model, fp32_ir_path)
ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False)
print(f"[1/7] Save FP32 model: {fp32_ir_path}")
fp32_model_size = get_model_size(fp32_ir_path, verbose=True)

int8_ir_path = f"{ROOT}/ssd300_vgg16_int8.xml"
ov.serialize(ov_quantized_model, int8_ir_path)
ov.save_model(ov_quantized_model, int8_ir_path, compress_to_fp16=False)
print(f"[2/7] Save INT8 model: {int8_ir_path}")
int8_model_size = get_model_size(int8_ir_path, verbose=True)

Expand Down
4 changes: 2 additions & 2 deletions nncf/experimental/torch/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any, Dict, Optional, Tuple
from typing import Any, Dict, Optional, Tuple, Union

import torch

Expand Down Expand Up @@ -87,7 +87,7 @@ def send_to_device(tensor):
def quantize_impl(
model: torch.nn.Module,
calibration_dataset: Dataset,
preset: QuantizationPreset,
preset: Union[QuantizationPreset, None],
target_device: TargetDevice,
subset_size: int,
fast_bias_correction: bool,
Expand Down
4 changes: 2 additions & 2 deletions nncf/onnx/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional
from typing import Optional, Union

import onnx

Expand All @@ -31,7 +31,7 @@
def quantize_impl(
model: onnx.ModelProto,
calibration_dataset: Dataset,
preset: QuantizationPreset,
preset: Union[QuantizationPreset, None],
target_device: TargetDevice,
subset_size: int,
fast_bias_correction: bool,
Expand Down
33 changes: 0 additions & 33 deletions nncf/openvino/graph/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,42 +16,9 @@
from nncf.common.graph.graph import NNCFGraph
from nncf.common.graph.transformations.layout import TransformationLayout
from nncf.openvino.graph.metatypes.groups import FAKE_QUANTIZE_OPERATIONS
from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionBackpropDataMetatype
from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionMetatype
from nncf.openvino.graph.metatypes.openvino_metatypes import OVDepthwiseConvolutionMetatype
from nncf.openvino.graph.metatypes.openvino_metatypes import OVGroupConvolutionBackpropDataMetatype
from nncf.openvino.graph.metatypes.openvino_metatypes import OVGroupConvolutionMetatype
from nncf.openvino.graph.node_utils import create_bias_tensor
from nncf.openvino.graph.node_utils import is_node_with_bias
from nncf.openvino.graph.transformations.command_creation import OVCommandCreator


def insert_null_biases(model: ov.Model, graph: NNCFGraph) -> ov.Model:
"""
This method finds and inserts zero biases for the layers that should have it.
:param model: ov.Model instance.
:param graph: Model graph.
:return: Updated ov.Model instance with zero biases
"""
types_to_insert_bias = [
OVConvolutionMetatype,
OVGroupConvolutionMetatype,
OVDepthwiseConvolutionMetatype,
OVConvolutionBackpropDataMetatype,
OVGroupConvolutionBackpropDataMetatype,
]
nodes_without_biases = graph.get_nodes_by_metatypes(types_to_insert_bias)
nodes_without_biases = [node for node in nodes_without_biases if not is_node_with_bias(node, graph)]
transformation_layout = TransformationLayout()
model_transformer = ModelTransformerFactory.create(model)
for node_without_bias in nodes_without_biases:
const_value = create_bias_tensor(node_without_bias, graph, 0)
bias_insertion_command = OVCommandCreator.create_command_to_insert_bias(node_without_bias, const_value)
transformation_layout.register(bias_insertion_command)
return model_transformer.transform(transformation_layout)


def remove_fq_from_inputs(model: ov.Model, graph: NNCFGraph) -> ov.Model:
"""
This method removes the activation Fake Quantize nodes from the model.
Expand Down
25 changes: 14 additions & 11 deletions nncf/openvino/pot/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import logging
import tempfile
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, Optional
from typing import Any, Callable, Dict, Iterable, Optional, Union

import openvino.runtime as ov
from openvino._offline_transformations import compress_quantize_weights_transformation
Expand Down Expand Up @@ -192,22 +192,22 @@ def _create_quantization_group_config(


def _create_quantization_config(
preset: QuantizationPreset,
preset: Union[QuantizationPreset, None],
target_device: TargetDevice,
subset_size: int,
fast_bias_correction: bool,
model_type: Optional[ModelType],
ignored_scope: Optional[IgnoredScope],
advanced_parameters: Optional[AdvancedQuantizationParameters],
model_type: Union[ModelType, None],
ignored_scope: Union[IgnoredScope, None],
advanced_parameters: Union[AdvancedQuantizationParameters, None],
) -> Dict[str, Any]:
"""
Creates a quantization configuration.
:param preset: A preset that controls the quantization mode
(symmetric and asymmetric). It can take the following values:
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
It can take the following values:
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric
quantization of activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
- `None`: `mixed` preset is used for `transformer` model type otherwise `performace`.
:param target_device: A target device the specificity of which will be
taken into account while compressing in order to obtain the best
performance for this type of device.
Expand All @@ -224,6 +224,9 @@ def _create_quantization_config(
fine-tuning the quantization algorithm.
:return: A POT quantization configuration as dict.
"""
if preset is None:
preset = QuantizationPreset.MIXED if model_type == ModelType.TRANSFORMER else QuantizationPreset.PERFORMANCE

config = {
"target_device": target_device.value,
"preset": preset.value,
Expand Down Expand Up @@ -320,7 +323,7 @@ def _create_engine_config(
def quantize_impl(
model: ov.Model,
calibration_dataset: Dataset,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down Expand Up @@ -423,7 +426,7 @@ def quantize_with_accuracy_control_impl(
validation_fn: Callable[[ov.CompiledModel, Iterable[Any]], float],
max_drop: float = 0.01,
drop_type: DropType = DropType.ABSOLUTE,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down
16 changes: 8 additions & 8 deletions nncf/openvino/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def dump_parameters(model: ov.Model, parameters: Dict, path: Optional[List] = No
def native_quantize_if_op_impl(
model: ov.Model,
calibration_dataset: Dataset,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down Expand Up @@ -138,7 +138,7 @@ def native_quantize_if_op_impl(
dump_parameters(
quantized_model,
{
"preset": preset.value,
"preset": preset,
"target_device": target_device.value,
"subset_size": subset_size,
"fast_bias_correction": fast_bias_correction,
Expand All @@ -154,7 +154,7 @@ def native_quantize_if_op_impl(
def native_quantize_impl(
model: ov.Model,
calibration_dataset: Dataset,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down Expand Up @@ -184,7 +184,7 @@ def native_quantize_impl(
dump_parameters(
quantized_model,
{
"preset": preset.value,
"preset": preset,
"target_device": target_device.value,
"subset_size": subset_size,
"fast_bias_correction": fast_bias_correction,
Expand All @@ -206,7 +206,7 @@ def native_quantize_with_accuracy_control_impl(
validation_fn: Callable[[Any, Iterable[Any]], Tuple[float, Union[None, List[float], List[List[TTensor]]]]],
max_drop: float = 0.01,
drop_type: DropType = DropType.ABSOLUTE,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down Expand Up @@ -321,7 +321,7 @@ def native_quantize_with_accuracy_control_impl(
dump_parameters(
quantized_model,
{
"preset": preset.value,
"preset": preset,
"target_device": target_device.value,
"subset_size": subset_size,
"fast_bias_correction": fast_bias_correction,
Expand All @@ -339,7 +339,7 @@ def native_quantize_with_accuracy_control_impl(
def quantize_impl(
model: ov.Model,
calibration_dataset: Dataset,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down Expand Up @@ -396,7 +396,7 @@ def quantize_with_accuracy_control_impl(
validation_fn: Callable[[Any, Iterable[Any]], float],
max_drop: float = 0.01,
drop_type: DropType = DropType.ABSOLUTE,
preset: QuantizationPreset = QuantizationPreset.PERFORMANCE,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
subset_size: int = 300,
fast_bias_correction: bool = True,
Expand Down
3 changes: 0 additions & 3 deletions nncf/quantization/algorithms/bias_correction/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ def apply(
dataset: Optional[Dataset] = None,
) -> TModel:
self._set_backend_entity(model)
model = self._backend_entity.insert_null_biases(model, graph)
main_transformations_layout = TransformationLayout()
main_model_transformer = ModelTransformerFactory.create(model)

Expand Down Expand Up @@ -488,8 +487,6 @@ def output_filter_func(point):
def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer:
self._set_backend_entity(model)
model_copy = self._backend_entity.remove_fq_from_inputs(copy_model(model), graph)
graph_copy = NNCFGraphFactory.create(model_copy)
model_copy = self._backend_entity.insert_null_biases(model_copy, graph_copy)
nncf_graph = NNCFGraphFactory.create(model_copy)
statistic_container = StatisticPointsContainer()

Expand Down
11 changes: 0 additions & 11 deletions nncf/quantization/algorithms/bias_correction/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,14 +203,3 @@ def remove_fq_from_inputs(model: TModel, nncf_graph: NNCFGraph) -> TModel:
:param nncf_graph: NNCFGraph instance.
:return: TModel without activation Fake Quantize nodes (or Quantize-Dequantize pairs).
"""

@staticmethod
@abstractmethod
def insert_null_biases(model: TModel, nncf_graph: NNCFGraph) -> TModel:
"""
This method finds and inserts zero biases for the layers that should have it.
:param model: TModel instance.
:param nncf_graph: NNCFGraph instance.
:return: TModel instance with zero biases
"""
Loading

0 comments on commit 23687f5

Please sign in to comment.