Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OV] Introduce support of quantization If operation #2101

Merged
merged 66 commits into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
66 commits
Select commit Hold shift + click to select a range
e7eddd0
Intorduce support of quantization If op for OV
kshpv Sep 1, 2023
846f0fd
add backend entities
kshpv Sep 1, 2023
9a6fcd4
code improvement
kshpv Sep 4, 2023
704a72d
typo
kshpv Sep 4, 2023
8314131
add reinitialization of cached variables for MinMax
kshpv Sep 5, 2023
3fe3b5e
Apply comments
kshpv Sep 6, 2023
9f7400c
update logic
kshpv Sep 6, 2023
e097cd4
Implement dfs approach
kshpv Sep 6, 2023
5139a18
typo
kshpv Sep 6, 2023
3b00f73
remove torch onnx backend impl
kshpv Sep 6, 2023
9ea3ff2
code improvements
kshpv Sep 6, 2023
b168892
make collect_dataitems_for_children_models common
kshpv Sep 7, 2023
c2473a9
generalize make_dataset_for_child_models
kshpv Sep 7, 2023
9201534
Merge remote-tracking branch 'remote/develop' into ov_if_op_support
kshpv Sep 7, 2023
c192331
unification
kshpv Sep 7, 2023
f20892d
generalize and remove set_child_model
kshpv Sep 7, 2023
6a7aeaf
update dataset calculation for submodels
kshpv Sep 8, 2023
9d0c27b
generalize logic
kshpv Sep 8, 2023
4970d9d
add extract if subgraph transform
kshpv Sep 8, 2023
589493b
Merge remote-tracking branch 'remote/develop' into ov_if_op_support
kshpv Sep 8, 2023
538c4b8
make private
kshpv Sep 8, 2023
10de119
code improvements
kshpv Sep 8, 2023
fb4559f
code improvements
kshpv Sep 8, 2023
08776a0
Merge remote-tracking branch 'remote/develop' into ov_if_op_support
kshpv Sep 11, 2023
24d88ca
doctrings; update method names
kshpv Sep 11, 2023
4ac706e
typo
kshpv Sep 11, 2023
09c8693
lint
kshpv Sep 11, 2023
9cd5dca
separate method for if condition input name and submodel input names
kshpv Sep 11, 2023
db79117
fix merge typos
kshpv Sep 11, 2023
68d04a8
lint
kshpv Sep 11, 2023
f374441
add minimum statistic sample
kshpv Sep 12, 2023
b7ec929
Merge remote-tracking branch 'remote/develop' into ov_if_op_support
kshpv Sep 12, 2023
0a9696d
apply comments
kshpv Sep 12, 2023
62c1954
make IF input quantizible for OV
kshpv Sep 12, 2023
25717a9
add hw config for IF to CPU
kshpv Sep 12, 2023
e35d1a3
Update placement of IF op quantization logic
kshpv Sep 15, 2023
af127ac
Remove model_cnt and dumping model method
kshpv Sep 15, 2023
dabd0d2
Rename module
kshpv Sep 15, 2023
bab6a71
remove intermediate_model_dir param for PTQ
kshpv Sep 15, 2023
1160ac5
revert MIN_SAMPLES_NUM
kshpv Sep 15, 2023
88873c6
typo
kshpv Sep 15, 2023
443332c
revert collectores changes
kshpv Sep 15, 2023
2958405
optimize dataset collection
kshpv Sep 15, 2023
b8e1dc6
update main method
kshpv Sep 15, 2023
67032d4
Update method name
kshpv Sep 15, 2023
f13ba41
revert formatting of config
kshpv Sep 18, 2023
9cd0f78
add error if turn on BiasCorrection
kshpv Sep 18, 2023
30d80a0
Merge remote-tracking branch 'remote/develop' into ov_if_op_support
kshpv Sep 18, 2023
3e831fb
Apply comments
kshpv Sep 18, 2023
587d5e7
update logs
kshpv Sep 19, 2023
8f34fc1
Improve logging
kshpv Sep 19, 2023
346951e
Apply comments
kshpv Sep 20, 2023
2c6f21b
typo
kshpv Sep 20, 2023
52adfd4
update logs
kshpv Sep 20, 2023
4135289
update track and revert FBC changes
kshpv Sep 20, 2023
7bd9e2e
update log msg
kshpv Sep 20, 2023
6e70357
Merge remote-tracking branch 'remote/develop' into ov_if_op_support
kshpv Sep 20, 2023
7a32199
Remove loggin from PTQ; put loggin into MinMax
kshpv Sep 21, 2023
6af3309
add WA for new OV
kshpv Sep 21, 2023
e9b4131
Merge remote-tracking branch 'remote/develop' into ov_if_op_support
kshpv Sep 21, 2023
b21cd59
lint
kshpv Sep 21, 2023
372141e
Merge remote-tracking branch 'remote/develop' into ov_if_op_support
kshpv Sep 22, 2023
df6cfcb
add graph test
kshpv Sep 22, 2023
1255f1e
minor
kshpv Sep 22, 2023
781e26c
typehints
kshpv Sep 22, 2023
36381a7
lint
kshpv Sep 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions nncf/quantization/advanced_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ class AdvancedQuantizationParameters:
:type bias_correction_params: nncf.quantization.advanced_parameters.AdvancedBiasCorrectionParameters
:param backend_params: Backend-specific parameters.
:type backend_params: Dict[str, Any]
:param intermediate_model_dir: Path to a directory for intermediated models saving.
:type intermediate_model_dir: str
"""

# General parameters
Expand All @@ -170,6 +172,8 @@ class AdvancedQuantizationParameters:
# Backend specific parameters
backend_params: Dict[str, Any] = field(default_factory=dict)

intermediate_model_dir: Optional[str] = None
KodiaqQ marked this conversation as resolved.
Show resolved Hide resolved


@api()
@dataclass
Expand Down
7 changes: 5 additions & 2 deletions nncf/quantization/algorithms/min_max/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,16 @@ def __init__(
quantizer_group, preset, self._quantization_params[quantizer_group]
)

self._reset_cache()
self._algorithm_key = f"MMQ_{hash(self)}"

def _reset_cache(self):
# It prevents the duplicate weight quantizers from being added.
# It can happen when you have layers that share the identical weight tensor.
self._quantization_target_points_to_qconfig = (
collections.OrderedDict()
) # type: OrderedDict[TargetPoint, QuantizerConfig]
self._unified_scale_groups = []
self._algorithm_key = f"MMQ_{hash(self)}"

@property
def available_backends(self) -> Dict[str, BackendType]:
Expand Down Expand Up @@ -677,7 +680,7 @@ def filter_func(point: StatisticPoint) -> bool:

def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer:
self._set_backend_entity(model)

self._reset_cache()
quantization_target_points, _ = self._get_quantization_target_points(model, graph)
output = StatisticPointsContainer()
for quantization_target_point, qconfig in quantization_target_points.items():
Expand Down
128 changes: 105 additions & 23 deletions nncf/quantization/algorithms/post_training/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# limitations under the License.

from dataclasses import dataclass
from typing import Callable, Dict, List, Optional, TypeVar
from typing import Callable, Dict, List, Optional, Tuple, TypeVar

from nncf import Dataset
from nncf.common.deprecation import warning_deprecated
Expand Down Expand Up @@ -88,12 +88,14 @@ def __init__(
super().__init__()
self.algorithms = []
self.first_stage_algorithms: List[self.FirstStageAlgorithm] = []
self.subset_size = subset_size
alexsu52 marked this conversation as resolved.
Show resolved Hide resolved

if target_device is TargetDevice.VPU:
warning_deprecated("VPU device is deprecated and will no longer be supported in the future.")

if advanced_parameters is None:
advanced_parameters = AdvancedQuantizationParameters()
self.intermediate_model_dir = advanced_parameters.intermediate_model_dir

if model_type == ModelType.TRANSFORMER:
smooth_quant_algorithm = SmoothQuant(
Expand Down Expand Up @@ -163,6 +165,45 @@ def __init__(
def available_backends(self) -> Dict[str, BackendType]:
return

def _is_single_model(self, model: TModel) -> bool:
model_backend = get_backend(model)
if model_backend == BackendType.ONNX:
return True
elif model_backend == BackendType.OPENVINO:
from nncf.quantization.algorithms.post_training.openvino_backend import OVPostTrainingBackend

return OVPostTrainingBackend.is_single_model(model)
elif model_backend == BackendType.TORCH:
return True
else:
raise RuntimeError(
"Cannot return backend-specific entity because {} is not supported!".format(model_backend)
)

def _set_backend_entity(self, model: TModel) -> None:
"""
Creates a helper class with a backed-specific logic of the algorithm

:param model: backend-specific input model
"""
model_backend = get_backend(model)
if model_backend == BackendType.ONNX:
raise RuntimeError(
"Cannot return backend-specific entity because {} is not supported!".format(model_backend)
)
elif model_backend == BackendType.OPENVINO:
from nncf.quantization.algorithms.post_training.openvino_backend import OVPostTrainingBackend

self._backend_entity = OVPostTrainingBackend()
elif model_backend == BackendType.TORCH:
raise RuntimeError(
"Cannot return backend-specific entity because {} is not supported!".format(model_backend)
)
else:
raise RuntimeError(
"Cannot return backend-specific entity because {} is not supported!".format(model_backend)
)

def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer:
if self.first_stage_algorithms:
raise NotImplementedError(
Expand All @@ -176,20 +217,16 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin
output.add_statistic_point(statistic_point)
return output

def apply(
def _apply(
self,
model: TModel,
graph: NNCFGraph,
statistic_points: Optional[StatisticPointsContainer] = None,
dataset: Optional[Dataset] = None,
) -> TModel:
modified_model = copy_model(model)
modified_model_graph = graph
backend = get_backend(modified_model)

for first_stage_algorithm in self.first_stage_algorithms:
algorithm = first_stage_algorithm.algorithm

backend = get_backend(model)
if isinstance(algorithm, SmoothQuant) and backend != BackendType.OPENVINO:
nncf_logger.debug(f"{backend.name} does not support SmoothQuant algorithm yet.")
continue
Expand All @@ -199,31 +236,76 @@ def apply(
continue

for pre_pass in first_stage_algorithm.pre_passes:
modified_model = pre_pass(modified_model, modified_model_graph)
modified_model_graph = NNCFGraphFactory.create(modified_model)
model = pre_pass(model, graph)
graph = NNCFGraphFactory.create(model)

statistics_aggregator = StatisticsAggregatorFactory.create(modified_model, dataset)
algo_statistic_points = algorithm.get_statistic_points(modified_model, modified_model_graph)
statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset)
algo_statistic_points = algorithm.get_statistic_points(model, graph)
statistics_aggregator.register_statistic_points(algo_statistic_points)
statistics_aggregator.collect_statistics(modified_model, modified_model_graph)
modified_model = algorithm.apply(
modified_model, modified_model_graph, statistics_aggregator.statistic_points
)
modified_model_graph = NNCFGraphFactory.create(modified_model)
statistics_aggregator.collect_statistics(model, graph)
model = algorithm.apply(model, graph, statistics_aggregator.statistic_points)
model = NNCFGraphFactory.create(model)

if statistic_points is None:
statistics_aggregator = StatisticsAggregatorFactory.create(modified_model, dataset)
statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset)
for algorithm in self.algorithms:
algo_statistic_points = algorithm.get_statistic_points(modified_model, modified_model_graph)
algo_statistic_points = algorithm.get_statistic_points(model, graph)
statistics_aggregator.register_statistic_points(algo_statistic_points)

statistics_aggregator.collect_statistics(modified_model, modified_model_graph)
statistics_aggregator.collect_statistics(model, graph)
statistic_points = statistics_aggregator.statistic_points

for algorithm in self.algorithms[:-1]:
modified_model = algorithm.apply(modified_model, modified_model_graph, statistic_points)
modified_model_graph = NNCFGraphFactory.create(modified_model)
model = algorithm.apply(model, graph, statistic_points)
graph = NNCFGraphFactory.create(model)
# building the model graph is not required after the last algorithm
modified_model = self.algorithms[-1].apply(modified_model, modified_model_graph, statistic_points)
model = self.algorithms[-1].apply(model, graph, statistic_points)

return model

def apply(
self,
model: TModel,
graph: NNCFGraph,
statistic_points: Optional[StatisticPointsContainer] = None,
dataset: Optional[Dataset] = None,
) -> TModel:
model_copy = copy_model(model)
if self._is_single_model(model_copy):
return self._apply(model_copy, graph, statistic_points, dataset)
self._set_backend_entity(model)
nncf_logger.info("The model consists of child submodels. The iteratively each submodel will be quantized.")
alexsu52 marked this conversation as resolved.
Show resolved Hide resolved
quantized_model, _ = self._dfs_quantize_models(model_copy, graph, dataset, statistic_points, 0)
return quantized_model

def _dfs_quantize_models(
self,
parent_model: TModel,
parent_graph: NNCFGraph,
parent_dataset: Dataset,
parent_statistic_points: Optional[StatisticPointsContainer],
parent_model_cnt: int,
KodiaqQ marked this conversation as resolved.
Show resolved Hide resolved
) -> Tuple[TModel, int]:
if not self._backend_entity.is_single_model(parent_model):
parent_model_with_additional_outputs = self._backend_entity.add_additional_outputs(parent_model)
dataitems = self._backend_entity.collect_dataitems_for_children_models(
parent_model_with_additional_outputs, parent_dataset, self.subset_size, parent_model_cnt
)
global_model_cnt = parent_model_cnt
for child_model, backend_params in self._backend_entity.get_child_models(parent_model):
child_dataset = self._backend_entity.make_dataset_for_child_models(dataitems, **backend_params)

child_q_model, model_cnt = self._dfs_quantize_models(
child_model, NNCFGraphFactory.create(child_model), child_dataset, None, global_model_cnt + 1
)
global_model_cnt = model_cnt

nncf_logger.info(f"Set quantized model number {model_cnt} to the original model")
self._backend_entity.set_child_model(child_q_model, **backend_params)
if self.intermediate_model_dir:
nncf_logger.info(f"Save quantized model number {model_cnt} to dir {self.intermediate_model_dir}")
self._backend_entity.dump_model(child_q_model, self.intermediate_model_dir, **backend_params)

return modified_model
nncf_logger.info(f"Quantize a model number {parent_model_cnt}")
quantized_model = self._apply(parent_model, parent_graph, parent_statistic_points, parent_dataset)
return quantized_model, parent_model_cnt
96 changes: 96 additions & 0 deletions nncf/quantization/algorithms/post_training/backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Copyright (c) 2023 Intel Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from abc import ABC
from abc import abstractmethod
from pathlib import Path
from typing import Any, Dict, Iterable, List, Tuple

from nncf import Dataset
from nncf.data.dataset import DataItem
from nncf.quantization.algorithms.post_training.algorithm import TModel


class PostTrainingBackend(ABC):
alexsu52 marked this conversation as resolved.
Show resolved Hide resolved
@staticmethod
@abstractmethod
def collect_dataitems_for_children_models(
alexsu52 marked this conversation as resolved.
Show resolved Hide resolved
model: TModel, calibration_dataset: Dataset, subset_size: int, model_cnt: int
) -> Iterable[DataItem]:
"""
Returns dataitems for children models of the main model.

:param model: Model to infer to collect dataitems.
:param calibration_dataset: Dataset is used to collect new dataitems.
:param subset_size: Size of dataitems to collect
:param model_cnt: Global model number.
"""

@staticmethod
@abstractmethod
def make_dataset_for_child_models(dataitems: Iterable[DataItem], backend_params: Dict[str, Any]) -> Dataset:
alexsu52 marked this conversation as resolved.
Show resolved Hide resolved
"""
Return dataset for child models.

:param dataitems: Data items to collect into dataset.
:param backend_params: Backend-specific parameters.
"""

@staticmethod
@abstractmethod
def is_single_model(model: TModel) -> bool:
alexsu52 marked this conversation as resolved.
Show resolved Hide resolved
"""
Chechks whether a model has inner subgraphs to quantize.

:param model: Model to check.
:return: True if the model has no inner subgraphs, otherwise - False.
"""

@staticmethod
@abstractmethod
def get_child_models(model: TModel) -> List[Tuple[TModel, Dict[str, Any]]]:
alexsu52 marked this conversation as resolved.
Show resolved Hide resolved
"""
Returns all child models of passed model.

:param model: Model to seek for child models.
:return: Models with backend specific parameters.
"""

@staticmethod
@abstractmethod
def add_additional_outputs(model: TModel) -> TModel:
alexsu52 marked this conversation as resolved.
Show resolved Hide resolved
"""
Returns the model with additional outputs to collect statistics for child models.

:param model: Model to update.
:return: Updated model with extra outputs.
"""

@staticmethod
@abstractmethod
def dump_model(model: TModel, dir: Path, backend_params: Dict[str, Any]) -> None:
"""
Save a model to a directory. Backend params are used to determine the model name to dump.

:param model: Model to dump.
:param dir: Directory path.
:param backend_params: Backend specific parameters.
"""

@staticmethod
@abstractmethod
def set_child_model(child_model: TModel, backend_params: Dict[str, Any]) -> None:
alexsu52 marked this conversation as resolved.
Show resolved Hide resolved
"""
Set subgraph model to an original model.

:param subgraph_model: Model to set.
:param backend_params: Backend specific parameters.
"""
Loading