Skip to content

Commit

Permalink
Restoring state after inference only for stateful models (openvinotoo…
Browse files Browse the repository at this point in the history
…lkit#2445)

### Changes

Restoring state after inference only for stateful models

### Reason for changes

Inference optimization

### Related tickets

131141

### Tests

test_examples 230
  • Loading branch information
alexsu52 authored Feb 6, 2024
1 parent edb3987 commit b49390e
Show file tree
Hide file tree
Showing 8 changed files with 113 additions and 74 deletions.
8 changes: 0 additions & 8 deletions nncf/common/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@
from nncf.common.graph.transformations.command_creation import CommandCreator
from nncf.common.tensor_statistics import aggregator
from nncf.common.utils.backend import BackendType
from nncf.common.utils.backend import get_available_backends
from nncf.common.utils.backend import get_backend
from nncf.common.utils.backend import is_openvino_compiled_model
from nncf.data.dataset import Dataset

TModel = TypeVar("TModel")
Expand Down Expand Up @@ -88,12 +86,6 @@ def create(model: TModel) -> Engine:
:param model: backend-specific model instance.
:return: backend-specific Engine instance.
"""
available_backends = get_available_backends()
if BackendType.OPENVINO in available_backends and is_openvino_compiled_model(model):
from nncf.openvino.engine import OVCompiledModelEngine

return OVCompiledModelEngine(model)

model_backend = get_backend(model)
if model_backend == BackendType.ONNX:
from nncf.onnx.engine import ONNXEngine
Expand Down
15 changes: 8 additions & 7 deletions nncf/openvino/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import nncf
from nncf.common.engine import Engine
from nncf.openvino.graph.model_utils import model_has_state
from nncf.parameters import TargetDevice


Expand All @@ -28,13 +29,12 @@ class OVCompiledModelEngine(Engine):
to infer the compiled model.
"""

def __init__(self, model: ov.CompiledModel):
self.compiled_model = model
self.infer_request = model.create_infer_request()
self.reset_state = hasattr(self.infer_request, "reset_state")
def __init__(self, compiled_model: ov.CompiledModel, stateful: bool):
self.infer_request = compiled_model.create_infer_request()
self.reset_state = stateful and hasattr(self.infer_request, "reset_state")
self.input_tensor_names = set()
self.number_of_inputs = len(model.inputs)
for model_input in model.inputs:
self.number_of_inputs = len(compiled_model.inputs)
for model_input in compiled_model.inputs:
self.input_tensor_names.update(model_input.get_names())

def _check_input_data_format(
Expand Down Expand Up @@ -95,8 +95,9 @@ def __init__(self, model: ov.Model, target_device: TargetDevice = TargetDevice.C
target_device = TargetDevice.CPU

ie = ov.Core()
stateful = model_has_state(model)
compiled_model = ie.compile_model(model, target_device.value)
self.engine = OVCompiledModelEngine(compiled_model)
self.engine = OVCompiledModelEngine(compiled_model, stateful)

def infer(
self, input_data: Union[np.ndarray, List[np.ndarray], Tuple[np.ndarray], Dict[str, np.ndarray]]
Expand Down
10 changes: 10 additions & 0 deletions nncf/openvino/graph/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,13 @@ def get_start_nodes_for_activation_path_tracing(nncf_graph: NNCFGraph) -> List[N
:return: Target NNCFGraph input nodes.
"""
return nncf_graph.get_input_nodes() + nncf_graph.get_nodes_by_metatypes([OVReadValueMetatype])


def model_has_state(model: ov.Model) -> bool:
"""
Returns True if model has state else False
:param model: OpenVINO model
:return: True if model has state else False
"""
return len(model.get_sinks()) > 0
42 changes: 30 additions & 12 deletions nncf/quantization/algorithms/accuracy_control/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from abc import abstractmethod
from typing import Any, List, Optional, TypeVar

from nncf.common.engine import Engine
from nncf.common.graph.graph import NNCFGraph
from nncf.common.graph.graph import NNCFNode
from nncf.common.graph.operator_metatypes import OperatorMetatype
Expand All @@ -21,6 +22,35 @@
TPModel = TypeVar("TPModel")


class PreparedModel(ABC):
@property
@abstractmethod
def model_for_inference(self) -> TPModel:
"""
Returns prepared model for inference.
:return: Prepared model for inference.
"""

@property
@abstractmethod
def engine(self) -> Engine:
"""
Returns the engine for inference the prepared model.
:return: The engine for inference the prepared model.
"""

def __call__(self, input_data: Any) -> Any:
"""
Runs model on the provided input data and returns the raw model outputs.
:param input_data: inputs for the model
:return: raw model outputs
"""
return self.engine.infer(input_data)


class AccuracyControlAlgoBackend(ABC):
# Metatypes

Expand Down Expand Up @@ -158,15 +188,3 @@ def get_model_size(model: TModel) -> int:
:param model: A model
:return: Model size (in bytes)
"""

# Preparation of model

@staticmethod
@abstractmethod
def prepare_for_inference(model: TModel) -> TPModel:
"""
Prepares model for inference.
:param model: A model that should be prepared.
:return: Prepared model for inference.
"""
58 changes: 26 additions & 32 deletions nncf/quantization/algorithms/accuracy_control/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,14 @@
from typing import Any, Callable, Iterable, List, Optional, Tuple, TypeVar, Union

import nncf
from nncf.common.factory import EngineFactory
from nncf.common.logging import nncf_logger
from nncf.common.utils.backend import BackendType
from nncf.common.utils.backend import get_backend
from nncf.common.utils.timer import timer
from nncf.data.dataset import Dataset
from nncf.quantization.algorithms.accuracy_control.backend import PreparedModel

TModel = TypeVar("TModel")
TPModel = TypeVar("TPModel")
TTensor = TypeVar("TTensor")


Expand Down Expand Up @@ -112,7 +111,7 @@ def is_metric_mode(self) -> bool:
"""
return self._metric_mode

def prepare_model_for_inference(self, model: TModel) -> TPModel:
def prepare_model(self, model: TModel) -> PreparedModel:
"""
Prepares model for inference.
Expand All @@ -122,21 +121,19 @@ def prepare_model_for_inference(self, model: TModel) -> TPModel:
backend = get_backend(model)

if backend == BackendType.OPENVINO:
import openvino.runtime as ov
from nncf.quantization.algorithms.accuracy_control.openvino_backend import OVPreparedModel

return ov.compile_model(model)
return OVPreparedModel(model)

raise NotImplementedError(
f"The `prepare_model_for_inference()` method is not implemented for the {backend} backend."
)
raise NotImplementedError(f"The `prepare_model()` method is not implemented for the {backend} backend.")

def validate_model_for_inference(
self, model_for_inference: TPModel, dataset: Dataset, indices: Optional[List[int]] = None
def validate_prepared_model(
self, prepared_model: PreparedModel, dataset: Dataset, indices: Optional[List[int]] = None
):
"""
Validates prepared model for inference.
:param model: Prepared model to validate.
:param prepared_model: Prepared model to validate.
:param dataset: Dataset to validate the model.
:param indices: Zero-based indices of data items that should be selected from
the dataset.
Expand All @@ -148,7 +145,7 @@ def validate_model_for_inference(
item.
"""
if self._metric_mode is None:
self._metric_mode = Evaluator.determine_mode(model_for_inference, dataset, self._validation_fn)
self._metric_mode = Evaluator.determine_mode(prepared_model, dataset, self._validation_fn)

if not self.is_metric_mode() and indices is not None:
raise ValueError("The `indices` parameter can be used only if Evaluator.is_metric_mode() = True")
Expand All @@ -157,7 +154,7 @@ def validate_model_for_inference(
if self._enable_iteration_count:
validation_dataset = IterationCounter(validation_dataset)

metric, values_for_each_item = self._validation_fn(model_for_inference, validation_dataset)
metric, values_for_each_item = self._validation_fn(prepared_model.model_for_inference, validation_dataset)

self._num_passed_iterations = validation_dataset.num_iterations if self._enable_iteration_count else 0

Expand Down Expand Up @@ -190,20 +187,20 @@ def validate(
Otherwise, if the condition is false, it represents list of logits for each
item.
"""
model_for_inference = self.prepare_model_for_inference(model)
return self.validate_model_for_inference(model_for_inference, dataset, indices)
prepared_model = self.prepare_model(model)
return self.validate_prepared_model(prepared_model, dataset, indices)

@staticmethod
def determine_mode(
model_for_inference: TPModel,
prepared_model: PreparedModel,
dataset: Dataset,
validation_fn: Callable[[Any, Iterable[Any]], Tuple[float, Union[None, List[float], List[List[TTensor]]]]],
) -> bool:
"""
Determines mode based on the type of returned value from the
validation function.
:param model_for_inference: Model to validate.
:param prepared_model: Model to validate.
:param dataset: Dataset to validate the model.
:param validation_fn: Validation function to validate model.
:return: A boolean indicator where `True` means that the `Evaluator` collects
Expand All @@ -215,7 +212,7 @@ def determine_mode(
data_item = dataset.get_data([0])

try:
metric_value, values_for_each_item = validation_fn(model_for_inference, data_item)
metric_value, values_for_each_item = validation_fn(prepared_model.model_for_inference, data_item)
except Exception:
metric_mode = False

Expand Down Expand Up @@ -262,15 +259,15 @@ def determine_mode(

return metric_mode

def collect_values_for_each_item_using_model_for_inference(
self, model_for_inference: TPModel, dataset: Dataset, indices: Optional[List[int]] = None
def collect_values_for_each_item_using_prepared_model(
self, prepared_model: PreparedModel, dataset: Dataset, indices: Optional[List[int]] = None
) -> Union[List[float], List[List[TTensor]]]:
"""
Collects value for each item from the dataset using prepared model for inference.
If `is_metric_mode()` returns `True` then i-th value is a metric for i-th data item.
It is an output of the model for i-th data item otherwise.
:param model: Model to infer.
:param prepared_model: Model to infer.
:param dataset: Dataset to collect values.
:param indices: The zero-based indices of data items that should be selected from
the dataset.
Expand All @@ -279,15 +276,14 @@ def collect_values_for_each_item_using_model_for_inference(
if self._metric_mode:
# Collect metrics for each item
values_for_each_item = [
self._validation_fn(model_for_inference, [data_item])[0] for data_item in dataset.get_data(indices)
self._validation_fn(prepared_model.model_for_inference, [data_item])[0]
for data_item in dataset.get_data(indices)
]
else:
# Collect outputs for each item
engine = EngineFactory.create(model_for_inference)

values_for_each_item = []
for data_item in dataset.get_inference_data(indices):
logits = engine.infer(data_item)
logits = prepared_model(data_item)
values_for_each_item.append(list(logits.values()))

self._num_passed_iterations = len(values_for_each_item) if self._enable_iteration_count else 0
Expand All @@ -308,8 +304,8 @@ def collect_values_for_each_item(
the dataset.
:return: Collected values.
"""
model_for_inference = self.prepare_model_for_inference(model)
return self.collect_values_for_each_item_using_model_for_inference(model_for_inference, dataset, indices)
prepared_model = self.prepare_model(model)
return self.collect_values_for_each_item_using_prepared_model(prepared_model, dataset, indices)

def collect_metric_results(self, model: TModel, dataset: Dataset, model_name: str = "") -> MetricResults:
"""
Expand All @@ -323,18 +319,16 @@ def collect_metric_results(self, model: TModel, dataset: Dataset, model_name: st
nncf_logger.info(f"Validation of {model_name} model was started")

with timer() as preparation_time:
model_for_inference = self.prepare_model_for_inference(model)
prepared_model = self.prepare_model(model)

with timer() as validation_time:
metric, values_for_each_item = self.validate_model_for_inference(model_for_inference, dataset)
metric, values_for_each_item = self.validate_prepared_model(prepared_model, dataset)

nncf_logger.info(f"Metric of {model_name} model: {metric}")

if values_for_each_item is None:
nncf_logger.info(f"Collecting values for each data item using the {model_name} model")
with timer():
values_for_each_item = self.collect_values_for_each_item_using_model_for_inference(
model_for_inference, dataset
)
values_for_each_item = self.collect_values_for_each_item_using_prepared_model(prepared_model, dataset)

return MetricResults(metric, values_for_each_item, preparation_time(), validation_time())
30 changes: 24 additions & 6 deletions nncf/quantization/algorithms/accuracy_control/openvino_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from nncf.common.graph import NNCFGraph
from nncf.common.graph import NNCFNode
from nncf.openvino.engine import OVCompiledModelEngine
from nncf.openvino.graph.layer_attributes import OVLayerAttributes
from nncf.openvino.graph.metatypes.groups import CONSTANT_OPERATIONS
from nncf.openvino.graph.metatypes.groups import FAKE_QUANTIZE_OPERATIONS
Expand All @@ -26,10 +27,33 @@
from nncf.openvino.graph.metatypes.openvino_metatypes import OVConcatMetatype
from nncf.openvino.graph.metatypes.openvino_metatypes import OVOpMetatype
from nncf.openvino.graph.model_utils import get_start_nodes_for_activation_path_tracing
from nncf.openvino.graph.model_utils import model_has_state
from nncf.openvino.graph.node_utils import get_bias_value
from nncf.openvino.graph.node_utils import get_weight_value
from nncf.openvino.graph.node_utils import is_node_with_bias
from nncf.quantization.algorithms.accuracy_control.backend import AccuracyControlAlgoBackend
from nncf.quantization.algorithms.accuracy_control.backend import PreparedModel


class OVPreparedModel(PreparedModel):
"""
Implementation of the `PreparedModel` for OpenVINO backend.
"""

def __init__(self, model: ov.Model):
self._stateful = model_has_state(model)
self._compiled_model = ov.compile_model(model)
self._engine = None

@property
def model_for_inference(self) -> ov.CompiledModel:
return self._compiled_model

@property
def engine(self) -> OVCompiledModelEngine:
if self._engine is None:
self._engine = OVCompiledModelEngine(self._compiled_model, self._stateful)
return self._engine


class OVAccuracyControlAlgoBackend(AccuracyControlAlgoBackend):
Expand Down Expand Up @@ -97,9 +121,3 @@ def get_model_size(model: ov.Model) -> int:
model_size += op.data.nbytes

return model_size

# Preparation of model

@staticmethod
def prepare_for_inference(model: ov.Model) -> ov.CompiledModel:
return ov.compile_model(model)
8 changes: 4 additions & 4 deletions nncf/quantization/algorithms/accuracy_control/ranker.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def _sequential_calculation_ranking_score(
self._algo_backend.get_op_with_weights_metatypes(),
)

prepared_model = self._algo_backend.prepare_for_inference(modified_model)
prepared_model = self._evaluator.prepare_model(modified_model)
ranking_score = self._calculate_ranking_score(
prepared_model, ranking_subset_indices, reference_values_for_each_item
)
Expand Down Expand Up @@ -229,7 +229,7 @@ def _multithreading_calculation_ranking_score(
self._algo_backend.get_op_with_weights_metatypes(),
)

prepared_model_queue.append(executor.submit(self._algo_backend.prepare_for_inference, modified_model))
prepared_model_queue.append(executor.submit(self._evaluator.prepare_model, modified_model))

if idx >= (self._num_workers - 1):
prepared_model = prepared_model_queue.pop(0).result()
Expand Down Expand Up @@ -263,12 +263,12 @@ def _calculate_ranking_score(
"""
if self._evaluator.is_metric_mode():
# Calculate ranking score based on metric
ranking_score, _ = self._evaluator.validate_model_for_inference(
ranking_score, _ = self._evaluator.validate_prepared_model(
prepared_model, self._dataset, ranking_subset_indices
)
else:
# Calculate ranking score based on differences in logits
approximate_outputs = self._evaluator.collect_values_for_each_item_using_model_for_inference(
approximate_outputs = self._evaluator.collect_values_for_each_item_using_prepared_model(
prepared_model, self._dataset, ranking_subset_indices
)
reference_outputs = [reference_values_for_each_item[i] for i in ranking_subset_indices]
Expand Down
Loading

0 comments on commit b49390e

Please sign in to comment.