diff --git a/ReleaseNotes.md b/ReleaseNotes.md index ab248964257..86b10fe2a43 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -1,5 +1,16 @@ # Release Notes +## New in Release 2.8.1 + +Post-training Quantization: + +- Bugfixes: + - (Common) Fixed issue with `nncf.compress_weights()` to avoid overflows on 32-bit Windows systems. + - (Common) Fixed performance issue with `nncf.compress_weights()` on LLama models. + - (Common) Fixed `nncf.quantize_with_accuracy_control` pipeline with `tune_hyperparams=True` enabled option. + - (OpenVINO) Fixed issue for stateful LLM models and added state restoring after the inference for it. + - (PyTorch) Fixed issue with `nncf.compress_weights()` for LLM models with the executing `is_floating_point` with tracing. + ## New in Release 2.8.0 Post-training Quantization: diff --git a/docs/Installation.md b/docs/Installation.md index 3ec4dc38878..9f0dbce77f9 100644 --- a/docs/Installation.md +++ b/docs/Installation.md @@ -70,6 +70,7 @@ as well as the supported versions of Python: | NNCF | OpenVINO | PyTorch | ONNX | TensorFlow | Python | |-----------|------------|----------|----------|------------|--------| | `develop` | `2023.3.0` | `2.1.2` | `1.13.1` | `2.12.0` | `3.8` | +| `2.8.1` | `2023.3.0` | `2.1.2` | `1.13.1` | `2.12.0` | `3.8` | | `2.8.0` | `2023.3.0` | `2.1.2` | `1.13.1` | `2.12.0` | `3.8` | | `2.7.0` | `2023.2.0` | `2.1` | `1.13.1` | `2.12.0` | `3.8` | | `2.6.0` | `2023.1.0` | `2.0.1` | `1.13.1` | `2.12.0` | `3.8` | diff --git a/nncf/common/factory.py b/nncf/common/factory.py index 8c3a4ba2e19..f2d85ecefa9 100644 --- a/nncf/common/factory.py +++ b/nncf/common/factory.py @@ -17,9 +17,7 @@ from nncf.common.graph.transformations.command_creation import CommandCreator from nncf.common.tensor_statistics import aggregator from nncf.common.utils.backend import BackendType -from nncf.common.utils.backend import get_available_backends from nncf.common.utils.backend import get_backend -from nncf.common.utils.backend import is_openvino_compiled_model from nncf.data.dataset import Dataset TModel = TypeVar("TModel") @@ -86,12 +84,6 @@ def create(model: TModel) -> Engine: :param model: backend-specific model instance. :return: backend-specific Engine instance. """ - available_backends = get_available_backends() - if BackendType.OPENVINO in available_backends and is_openvino_compiled_model(model): - from nncf.openvino.engine import OVCompiledModelEngine - - return OVCompiledModelEngine(model) - model_backend = get_backend(model) if model_backend == BackendType.ONNX: from nncf.onnx.engine import ONNXEngine diff --git a/nncf/openvino/engine.py b/nncf/openvino/engine.py index decd31a6364..248f8912af3 100644 --- a/nncf/openvino/engine.py +++ b/nncf/openvino/engine.py @@ -15,6 +15,7 @@ import openvino.runtime as ov from nncf.common.engine import Engine +from nncf.openvino.graph.model_utils import model_has_state from nncf.parameters import TargetDevice @@ -27,11 +28,12 @@ class OVCompiledModelEngine(Engine): to infer the compiled model. """ - def __init__(self, model: ov.CompiledModel): - self.compiled_model = model + def __init__(self, compiled_model: ov.CompiledModel, stateful: bool): + self.infer_request = compiled_model.create_infer_request() + self.reset_state = stateful and hasattr(self.infer_request, "reset_state") self.input_tensor_names = set() - self.number_of_inputs = len(model.inputs) - for model_input in model.inputs: + self.number_of_inputs = len(compiled_model.inputs) + for model_input in compiled_model.inputs: self.input_tensor_names.update(model_input.get_names()) def _check_input_data_format( @@ -63,7 +65,11 @@ def infer( :return output_data: Model's output. """ self._check_input_data_format(input_data) - model_outputs = self.compiled_model(input_data) + + if self.reset_state: + self.infer_request.reset_state() + + model_outputs = self.infer_request.infer(input_data, share_inputs=True) output_data = {} for tensor, value in model_outputs.items(): @@ -86,8 +92,9 @@ def __init__(self, model: ov.Model, target_device: TargetDevice = TargetDevice.C target_device = TargetDevice.CPU ie = ov.Core() + stateful = model_has_state(model) compiled_model = ie.compile_model(model, target_device.value) - self.engine = OVCompiledModelEngine(compiled_model) + self.engine = OVCompiledModelEngine(compiled_model, stateful) def infer( self, input_data: Union[np.ndarray, List[np.ndarray], Tuple[np.ndarray], Dict[str, np.ndarray]] diff --git a/nncf/openvino/graph/model_utils.py b/nncf/openvino/graph/model_utils.py index 21013bc07c7..733a61fa7be 100644 --- a/nncf/openvino/graph/model_utils.py +++ b/nncf/openvino/graph/model_utils.py @@ -60,3 +60,13 @@ def get_start_nodes_for_activation_path_tracing(nncf_graph: NNCFGraph) -> List[N :return: Target NNCFGraph input nodes. """ return nncf_graph.get_input_nodes() + nncf_graph.get_nodes_by_metatypes([OVReadValueMetatype]) + + +def model_has_state(model: ov.Model) -> bool: + """ + Returns True if model has state else False + + :param model: OpenVINO model + :return: True if model has state else False + """ + return len(model.get_sinks()) > 0 diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py index 8e8102eaf1c..7ed0ffb05b5 100644 --- a/nncf/openvino/quantization/quantize_model.py +++ b/nncf/openvino/quantization/quantize_model.py @@ -263,7 +263,7 @@ def native_quantize_with_accuracy_control_impl( fast_bias_correction, model_type, ignored_scope, - advanced_quantization_parameters, + copied_parameters, ) tuned_quantized_metric_results = evaluator.collect_metric_results( tuned_quantized_model, validation_dataset, model_name="tuned" diff --git a/nncf/quantization/algorithms/accuracy_control/backend.py b/nncf/quantization/algorithms/accuracy_control/backend.py index 2607aa1a7a4..de964936880 100644 --- a/nncf/quantization/algorithms/accuracy_control/backend.py +++ b/nncf/quantization/algorithms/accuracy_control/backend.py @@ -13,6 +13,7 @@ from abc import abstractmethod from typing import Any, List, Optional, TypeVar +from nncf.common.engine import Engine from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype @@ -21,6 +22,35 @@ TPModel = TypeVar("TPModel") +class PreparedModel(ABC): + @property + @abstractmethod + def model_for_inference(self) -> TPModel: + """ + Returns prepared model for inference. + + :return: Prepared model for inference. + """ + + @property + @abstractmethod + def engine(self) -> Engine: + """ + Returns the engine for inference the prepared model. + + :return: The engine for inference the prepared model. + """ + + def __call__(self, input_data: Any) -> Any: + """ + Runs model on the provided input data and returns the raw model outputs. + + :param input_data: inputs for the model + :return: raw model outputs + """ + return self.engine.infer(input_data) + + class AccuracyControlAlgoBackend(ABC): # Metatypes @@ -158,15 +188,3 @@ def get_model_size(model: TModel) -> int: :param model: A model :return: Model size (in bytes) """ - - # Preparation of model - - @staticmethod - @abstractmethod - def prepare_for_inference(model: TModel) -> TPModel: - """ - Prepares model for inference. - - :param model: A model that should be prepared. - :return: Prepared model for inference. - """ diff --git a/nncf/quantization/algorithms/accuracy_control/evaluator.py b/nncf/quantization/algorithms/accuracy_control/evaluator.py index cb1e3173e6c..cda9a5e2c9b 100644 --- a/nncf/quantization/algorithms/accuracy_control/evaluator.py +++ b/nncf/quantization/algorithms/accuracy_control/evaluator.py @@ -12,15 +12,14 @@ from dataclasses import dataclass from typing import Any, Callable, Iterable, List, Optional, Tuple, TypeVar, Union -from nncf.common.factory import EngineFactory from nncf.common.logging import nncf_logger from nncf.common.utils.backend import BackendType from nncf.common.utils.backend import get_backend from nncf.common.utils.timer import timer from nncf.data.dataset import Dataset +from nncf.quantization.algorithms.accuracy_control.backend import PreparedModel TModel = TypeVar("TModel") -TPModel = TypeVar("TPModel") TTensor = TypeVar("TTensor") @@ -111,7 +110,7 @@ def is_metric_mode(self) -> bool: """ return self._metric_mode - def prepare_model_for_inference(self, model: TModel) -> TPModel: + def prepare_model(self, model: TModel) -> PreparedModel: """ Prepares model for inference. @@ -121,21 +120,19 @@ def prepare_model_for_inference(self, model: TModel) -> TPModel: backend = get_backend(model) if backend == BackendType.OPENVINO: - import openvino.runtime as ov + from nncf.quantization.algorithms.accuracy_control.openvino_backend import OVPreparedModel - return ov.compile_model(model) + return OVPreparedModel(model) - raise NotImplementedError( - f"The `prepare_model_for_inference()` method is not implemented for the {backend} backend." - ) + raise NotImplementedError(f"The `prepare_model()` method is not implemented for the {backend} backend.") - def validate_model_for_inference( - self, model_for_inference: TPModel, dataset: Dataset, indices: Optional[List[int]] = None + def validate_prepared_model( + self, prepared_model: PreparedModel, dataset: Dataset, indices: Optional[List[int]] = None ): """ Validates prepared model for inference. - :param model: Prepared model to validate. + :param prepared_model: Prepared model to validate. :param dataset: Dataset to validate the model. :param indices: Zero-based indices of data items that should be selected from the dataset. @@ -147,7 +144,7 @@ def validate_model_for_inference( item. """ if self._metric_mode is None: - self._metric_mode = Evaluator.determine_mode(model_for_inference, dataset, self._validation_fn) + self._metric_mode = Evaluator.determine_mode(prepared_model, dataset, self._validation_fn) if not self.is_metric_mode() and indices is not None: raise ValueError("The `indices` parameter can be used only if Evaluator.is_metric_mode() = True") @@ -156,7 +153,7 @@ def validate_model_for_inference( if self._enable_iteration_count: validation_dataset = IterationCounter(validation_dataset) - metric, values_for_each_item = self._validation_fn(model_for_inference, validation_dataset) + metric, values_for_each_item = self._validation_fn(prepared_model.model_for_inference, validation_dataset) self._num_passed_iterations = validation_dataset.num_iterations if self._enable_iteration_count else 0 @@ -189,12 +186,12 @@ def validate( Otherwise, if the condition is false, it represents list of logits for each item. """ - model_for_inference = self.prepare_model_for_inference(model) - return self.validate_model_for_inference(model_for_inference, dataset, indices) + prepared_model = self.prepare_model(model) + return self.validate_prepared_model(prepared_model, dataset, indices) @staticmethod def determine_mode( - model_for_inference: TPModel, + prepared_model: PreparedModel, dataset: Dataset, validation_fn: Callable[[Any, Iterable[Any]], Tuple[float, Union[None, List[float], List[List[TTensor]]]]], ) -> bool: @@ -202,7 +199,7 @@ def determine_mode( Determines mode based on the type of returned value from the validation function. - :param model_for_inference: Model to validate. + :param prepared_model: Model to validate. :param dataset: Dataset to validate the model. :param validation_fn: Validation function to validate model. :return: A boolean indicator where `True` means that the `Evaluator` collects @@ -214,7 +211,7 @@ def determine_mode( data_item = dataset.get_data([0]) try: - metric_value, values_for_each_item = validation_fn(model_for_inference, data_item) + metric_value, values_for_each_item = validation_fn(prepared_model.model_for_inference, data_item) except Exception: metric_mode = False @@ -261,15 +258,15 @@ def determine_mode( return metric_mode - def collect_values_for_each_item_using_model_for_inference( - self, model_for_inference: TPModel, dataset: Dataset, indices: Optional[List[int]] = None + def collect_values_for_each_item_using_prepared_model( + self, prepared_model: PreparedModel, dataset: Dataset, indices: Optional[List[int]] = None ) -> Union[List[float], List[List[TTensor]]]: """ Collects value for each item from the dataset using prepared model for inference. If `is_metric_mode()` returns `True` then i-th value is a metric for i-th data item. It is an output of the model for i-th data item otherwise. - :param model: Model to infer. + :param prepared_model: Model to infer. :param dataset: Dataset to collect values. :param indices: The zero-based indices of data items that should be selected from the dataset. @@ -278,15 +275,14 @@ def collect_values_for_each_item_using_model_for_inference( if self._metric_mode: # Collect metrics for each item values_for_each_item = [ - self._validation_fn(model_for_inference, [data_item])[0] for data_item in dataset.get_data(indices) + self._validation_fn(prepared_model.model_for_inference, [data_item])[0] + for data_item in dataset.get_data(indices) ] else: # Collect outputs for each item - engine = EngineFactory.create(model_for_inference) - values_for_each_item = [] for data_item in dataset.get_inference_data(indices): - logits = engine.infer(data_item) + logits = prepared_model(data_item) values_for_each_item.append(list(logits.values())) self._num_passed_iterations = len(values_for_each_item) if self._enable_iteration_count else 0 @@ -307,8 +303,8 @@ def collect_values_for_each_item( the dataset. :return: Collected values. """ - model_for_inference = self.prepare_model_for_inference(model) - return self.collect_values_for_each_item_using_model_for_inference(model_for_inference, dataset, indices) + prepared_model = self.prepare_model(model) + return self.collect_values_for_each_item_using_prepared_model(prepared_model, dataset, indices) def collect_metric_results(self, model: TModel, dataset: Dataset, model_name: str = "") -> MetricResults: """ @@ -322,18 +318,16 @@ def collect_metric_results(self, model: TModel, dataset: Dataset, model_name: st nncf_logger.info(f"Validation of {model_name} model was started") with timer() as preparation_time: - model_for_inference = self.prepare_model_for_inference(model) + prepared_model = self.prepare_model(model) with timer() as validation_time: - metric, values_for_each_item = self.validate_model_for_inference(model_for_inference, dataset) + metric, values_for_each_item = self.validate_prepared_model(prepared_model, dataset) nncf_logger.info(f"Metric of {model_name} model: {metric}") if values_for_each_item is None: nncf_logger.info(f"Collecting values for each data item using the {model_name} model") with timer(): - values_for_each_item = self.collect_values_for_each_item_using_model_for_inference( - model_for_inference, dataset - ) + values_for_each_item = self.collect_values_for_each_item_using_prepared_model(prepared_model, dataset) return MetricResults(metric, values_for_each_item, preparation_time(), validation_time()) diff --git a/nncf/quantization/algorithms/accuracy_control/openvino_backend.py b/nncf/quantization/algorithms/accuracy_control/openvino_backend.py index acf41100e8e..b330704d54b 100644 --- a/nncf/quantization/algorithms/accuracy_control/openvino_backend.py +++ b/nncf/quantization/algorithms/accuracy_control/openvino_backend.py @@ -16,6 +16,7 @@ from nncf.common.graph import NNCFGraph from nncf.common.graph import NNCFNode +from nncf.openvino.engine import OVCompiledModelEngine from nncf.openvino.graph.layer_attributes import OVLayerAttributes from nncf.openvino.graph.metatypes.groups import CONSTANT_OPERATIONS from nncf.openvino.graph.metatypes.groups import FAKE_QUANTIZE_OPERATIONS @@ -26,10 +27,33 @@ from nncf.openvino.graph.metatypes.openvino_metatypes import OVConcatMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVOpMetatype from nncf.openvino.graph.model_utils import get_start_nodes_for_activation_path_tracing +from nncf.openvino.graph.model_utils import model_has_state from nncf.openvino.graph.node_utils import get_bias_value from nncf.openvino.graph.node_utils import get_weight_value from nncf.openvino.graph.node_utils import is_node_with_bias from nncf.quantization.algorithms.accuracy_control.backend import AccuracyControlAlgoBackend +from nncf.quantization.algorithms.accuracy_control.backend import PreparedModel + + +class OVPreparedModel(PreparedModel): + """ + Implementation of the `PreparedModel` for OpenVINO backend. + """ + + def __init__(self, model: ov.Model): + self._stateful = model_has_state(model) + self._compiled_model = ov.compile_model(model) + self._engine = None + + @property + def model_for_inference(self) -> ov.CompiledModel: + return self._compiled_model + + @property + def engine(self) -> OVCompiledModelEngine: + if self._engine is None: + self._engine = OVCompiledModelEngine(self._compiled_model, self._stateful) + return self._engine class OVAccuracyControlAlgoBackend(AccuracyControlAlgoBackend): @@ -97,9 +121,3 @@ def get_model_size(model: ov.Model) -> int: model_size += op.data.nbytes return model_size - - # Preparation of model - - @staticmethod - def prepare_for_inference(model: ov.Model) -> ov.CompiledModel: - return ov.compile_model(model) diff --git a/nncf/quantization/algorithms/accuracy_control/ranker.py b/nncf/quantization/algorithms/accuracy_control/ranker.py index 933e472c06f..c0627a5d912 100644 --- a/nncf/quantization/algorithms/accuracy_control/ranker.py +++ b/nncf/quantization/algorithms/accuracy_control/ranker.py @@ -200,7 +200,7 @@ def _sequential_calculation_ranking_score( self._algo_backend.get_op_with_weights_metatypes(), ) - prepared_model = self._algo_backend.prepare_for_inference(modified_model) + prepared_model = self._evaluator.prepare_model(modified_model) ranking_score = self._calculate_ranking_score( prepared_model, ranking_subset_indices, reference_values_for_each_item ) @@ -229,7 +229,7 @@ def _multithreading_calculation_ranking_score( self._algo_backend.get_op_with_weights_metatypes(), ) - prepared_model_queue.append(executor.submit(self._algo_backend.prepare_for_inference, modified_model)) + prepared_model_queue.append(executor.submit(self._evaluator.prepare_model, modified_model)) if idx >= (self._num_workers - 1): prepared_model = prepared_model_queue.pop(0).result() @@ -263,12 +263,12 @@ def _calculate_ranking_score( """ if self._evaluator.is_metric_mode(): # Calculate ranking score based on metric - ranking_score, _ = self._evaluator.validate_model_for_inference( + ranking_score, _ = self._evaluator.validate_prepared_model( prepared_model, self._dataset, ranking_subset_indices ) else: # Calculate ranking score based on differences in logits - approximate_outputs = self._evaluator.collect_values_for_each_item_using_model_for_inference( + approximate_outputs = self._evaluator.collect_values_for_each_item_using_prepared_model( prepared_model, self._dataset, ranking_subset_indices ) reference_outputs = [reference_values_for_each_item[i] for i in ranking_subset_indices] diff --git a/nncf/quantization/algorithms/weight_compression/config.py b/nncf/quantization/algorithms/weight_compression/config.py index d0ecb1bcab1..915b9ba23a7 100644 --- a/nncf/quantization/algorithms/weight_compression/config.py +++ b/nncf/quantization/algorithms/weight_compression/config.py @@ -11,6 +11,8 @@ from dataclasses import dataclass from typing import Optional, TypeVar +import numpy as np + from nncf.common.graph.graph import NNCFNode from nncf.parameters import CompressWeightsMode @@ -54,6 +56,11 @@ class WeightCompressionParameters: weight_name: str node_with_weight: NNCFNode weight_port_id: int - num_weights: int + num_weights: np.uint64 reduction_axis: int compression_config = WeightCompressionConfig() + + def __post_init__(self): + # Explicitly cast num_weights to avoid overflow on finding total number of weights. + # The issue happens on Windows, because np.ndarray.size() returns np.int32 and sum of weights is more than 2^32. + self.num_weights = np.uint64(self.num_weights) diff --git a/nncf/quantization/algorithms/weight_compression/mixed_precision.py b/nncf/quantization/algorithms/weight_compression/mixed_precision.py index 120a736bb53..aac509bfe7c 100644 --- a/nncf/quantization/algorithms/weight_compression/mixed_precision.py +++ b/nncf/quantization/algorithms/weight_compression/mixed_precision.py @@ -17,6 +17,7 @@ from nncf.common.utils.registry import Registry from nncf.experimental.tensor import Tensor from nncf.experimental.tensor import functions as fns +from nncf.experimental.tensor.definitions import TensorDataType from nncf.parameters import SensitivityMetric from nncf.quantization.algorithms.weight_compression.backend import WeightCompressionAlgoBackend from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig @@ -170,9 +171,12 @@ def _calc_weight_sensitivity(self, weight_param: WeightCompressionParameters) -> reduction_axis = weight_param.reduction_axis orig_shape = weight.shape + + if weight.dtype != TensorDataType.float32: + weight = weight.astype(TensorDataType.float32) + compressed_weights, scale, zero_point = do_integer_quantization(weight, reduction_axis, backup_config) - decompressed_weight = compressed_weights.astype(dtype=scale.dtype) - decompressed_weight = (compressed_weights - zero_point) * scale + decompressed_weight = (compressed_weights - zero_point).astype(weight.dtype) * scale decompressed_weight = decompressed_weight.reshape(orig_shape) return fns.linalg.norm(decompressed_weight - weight, ord="fro").item() diff --git a/nncf/quantization/algorithms/weight_compression/torch_backend.py b/nncf/quantization/algorithms/weight_compression/torch_backend.py index 3cbe72f4d20..89484afa54e 100644 --- a/nncf/quantization/algorithms/weight_compression/torch_backend.py +++ b/nncf/quantization/algorithms/weight_compression/torch_backend.py @@ -21,6 +21,7 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout from nncf.experimental.common.tensor_statistics.collectors import TensorCollector +from nncf.experimental.tensor.definitions import TensorDataType from nncf.experimental.tensor.tensor import Tensor from nncf.parameters import CompressWeightsMode from nncf.quantization.algorithms.weight_compression.backend import WeightCompressionAlgoBackend @@ -137,13 +138,11 @@ def get_channel_agnostic_reduction_axes( reduction_axis = [ndims - 1] elif weight_port_id == 1: reduction_axis = [max(0, ndims - 2)] - reduction_axis = [max(0, reduction_axis)] elif node_with_weight.metatype == om.PTAddmmMetatype: if weight_port_id == 1: reduction_axis = [ndims - 1] elif weight_port_id == 2: reduction_axis = [max(0, ndims - 2)] - reduction_axis = [max(0, reduction_axis)] return reduction_axis @staticmethod @@ -207,8 +206,11 @@ def transform_model( # calculates compressed weights and decompression parameters compressed_weight = compress_weight(Tensor(weight), wc_params.reduction_axis, compression_config) + # pack compressed tensor + packed_tensor = compressed_weight.tensor.astype(TensorDataType.uint8) + # sets compressed tensor - compressed_parameter = torch.nn.Parameter(compressed_weight.tensor.data, requires_grad=False) + compressed_parameter = torch.nn.Parameter(packed_tensor.data, requires_grad=False) setattr(module, weight_attr_name, compressed_parameter) consumer_nodes = graph.get_next_nodes(weight_node) @@ -219,8 +221,11 @@ def transform_model( if id(param) == id(weight): setattr(c_module, name, compressed_parameter) + # pack zero point tensor + packed_zero_point = compressed_weight.zero_point.astype(TensorDataType.uint8) + # creates weight decompressor - decompressor = WeightsDecompressor(compressed_weight.scale.data, compressed_weight.zero_point.data) + decompressor = WeightsDecompressor(compressed_weight.scale.data, packed_zero_point.data) # registry weight decompression module in the model decompressor_name = f"weights_decompressor_{weight_node.node_name.replace('.', '_')}" diff --git a/nncf/quantization/algorithms/weight_compression/weight_lowering.py b/nncf/quantization/algorithms/weight_compression/weight_lowering.py index 492a744cf6d..d9358d5a7f7 100644 --- a/nncf/quantization/algorithms/weight_compression/weight_lowering.py +++ b/nncf/quantization/algorithms/weight_compression/weight_lowering.py @@ -72,8 +72,11 @@ def calculate_normalized_weight_and_nf4_scale( :param reduction_axis: Axis, along which to reduce (collect) different statistics (e.g. min, max). :param group_size: Number of weights (e.g. 128) in the channel dimension that share quantization parameters (scale). The value -1 means no grouping. Defaults to -1. - :return: Normalized weights and nf4 scale. + :return: Normalized weight tensor of float32 type and nf4 scale tensor of float32 type. """ + if weight.dtype != TensorDataType.float32: + weight = weight.astype(TensorDataType.float32) + if group_size != -1: # weights are reshaped: [a1, r, a2] -> [a1, r//gs, gs, a2] weight, reduction_axis = reshape_weight_for_grouped_quantization(weight, reduction_axis, group_size) @@ -109,7 +112,8 @@ def do_integer_quantization( :param weight: Weight array to compress. :param reduction_axis: Axis, along which to reduce (collect) different statistics (e.g. min, max). :param config: Information on how to compress (quantize) a specific weight. - :return: The compressed weights, scale and zero point that was used for its quantization. + :return: The compressed weights tensor of uint8 type, scale tensor of float32 type and + zero point tensor of int32 type that was used for its quantization. """ mode = config.mode assert mode != CompressWeightsMode.NF4, "The function supports integer quantization only" @@ -119,6 +123,9 @@ def do_integer_quantization( level_low = 0 level_high = 2**num_bits - 1 + if weight.dtype != TensorDataType.float32: + weight = weight.astype(TensorDataType.float32) + if group_size != -1: # weights are reshaped from [a1, r, a2] to [a1, r//gs, gs, a2] weight, reduction_axis = reshape_weight_for_grouped_quantization(weight, reduction_axis, group_size) @@ -135,14 +142,11 @@ def do_integer_quantization( level_high_sym = 2 ** (num_bits - 1) - 1 scale = scale / level_high_sym zero_point = fns.as_tensor_like(scale, [-level_low_sym]) + eps = fns.finfo(scale).eps + # NOTE: adding machine epsilon to avoid division by zero + scale = fns.where(fns.abs(scale) < eps, eps, scale) - scale = scale.astype(weight.dtype) - zero_point = zero_point.astype(TensorDataType.uint8) - - eps = fns.finfo(weight).eps - # NOTE: adding machine epsilon to avoid division by zero - scale = fns.where(fns.abs(scale) < eps, eps, scale) - compressed_weights = fns.round(weight / scale + zero_point) + compressed_weights = fns.round(weight / scale + zero_point.astype(weight.dtype)) compressed_weights = fns.clip(compressed_weights, level_low, level_high).astype(TensorDataType.uint8) return compressed_weights, scale, zero_point @@ -158,10 +162,13 @@ def get_integer_quantization_error(weight: Tensor, reduction_axis: int, config: :return: The quantity characterizing the error of integer quantization. """ orig_shape = weight.shape + + if weight.dtype != TensorDataType.float32: + weight = weight.astype(TensorDataType.float32) + compressed_weights, scale, zero_point = do_integer_quantization(weight, reduction_axis, config) - compressed_weights = compressed_weights.astype(dtype=weight.dtype) - decompressed_weight = (compressed_weights - zero_point) * scale + decompressed_weight = (compressed_weights - zero_point).astype(weight.dtype) * scale decompressed_weight = decompressed_weight.reshape(orig_shape) diff = (decompressed_weight - weight) ** 2 diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index dcbcfc79ba7..2648ae908c8 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -423,7 +423,8 @@ def quantize_with_tune_hyperparams( "advanced_parameters": advanced_quantization_parameters, } - param_grids = get_quantization_param_grids(create_ptq_pipeline(**init_quantization_params)) + backend = get_backend(model) + param_grids = get_quantization_param_grids(create_ptq_pipeline(**init_quantization_params), backend) hyperparameter_tuner = HyperparameterTuner( create_ptq_pipeline, diff --git a/nncf/torch/dynamic_graph/graph_tracer.py b/nncf/torch/dynamic_graph/graph_tracer.py index dc7d10ea5b4..c9cf108dec1 100644 --- a/nncf/torch/dynamic_graph/graph_tracer.py +++ b/nncf/torch/dynamic_graph/graph_tracer.py @@ -8,7 +8,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from copy import deepcopy from typing import Any, Callable, Dict, Optional, Tuple, TypeVar import torch @@ -35,8 +34,6 @@ def trace_graph( as_eval: bool = False, trace_parameters: bool = False, ) -> DynamicGraph: - sd = deepcopy(model.state_dict()) - if context_to_use is None: context_to_use = TracingContext() @@ -54,7 +51,6 @@ def trace_graph( self.custom_forward_fn(model) else: self.custom_forward_fn(model) - model.load_state_dict(sd) context_to_use.disable_trace_dynamic_graph() return context_to_use.graph diff --git a/nncf/torch/dynamic_graph/patch_pytorch.py b/nncf/torch/dynamic_graph/patch_pytorch.py index 8c3acea9eec..499eb2c4d1d 100644 --- a/nncf/torch/dynamic_graph/patch_pytorch.py +++ b/nncf/torch/dynamic_graph/patch_pytorch.py @@ -112,6 +112,7 @@ class FunctionsToPatchWithoutTracing: "storage_offset", "stride", "get_device", + "is_floating_point", ] FUNCTIONS_TO_PATCH_WITHOUT_TRACING = TENSOR_CREATING_FUNCTIONS + TENSOR_UTILITY_FUNCTIONS diff --git a/nncf/torch/graph/operator_metatypes.py b/nncf/torch/graph/operator_metatypes.py index 816f9e6ee2f..3e287114faa 100644 --- a/nncf/torch/graph/operator_metatypes.py +++ b/nncf/torch/graph/operator_metatypes.py @@ -580,7 +580,7 @@ class PTMatMulMetatype(PTOperatorMetatype): @PT_OPERATOR_METATYPES.register() class PTAddmmMetatype(PTOperatorMetatype): name = "MatMulOp" - module_to_function_names = {NamespaceTarget.TORCH: ["addmm"], NamespaceTarget.TORCH: ["baddbmm"]} + module_to_function_names = {NamespaceTarget.TORCH: ["addmm", "baddbmm"]} hw_config_names = [HWConfigOpName.MATMUL] # 0-th arg to the baddbmm is basically a (b)ias to be (add)ed to the (bmm) operation, # presuming that most runtime implementations will fuse the bias addition into the matrix multiplication diff --git a/nncf/torch/nncf_network.py b/nncf/torch/nncf_network.py index a8298b3340b..0954843ce72 100644 --- a/nncf/torch/nncf_network.py +++ b/nncf/torch/nncf_network.py @@ -383,6 +383,8 @@ def get_clean_shallow_copy(self) -> "NNCFNetwork": self._ignored_scopes, self._target_scopes, wrap_outputs_fn=self._wrap_outputs_fn, + replace_modules=self.replace_modules, + trace_parameters=self.trace_parameters, ) self._model_ref._nncf = new_interface self._model_ref.nncf.reset_nncf_modules() diff --git a/nncf/version.py b/nncf/version.py index 6165667a875..c92bad2f07f 100644 --- a/nncf/version.py +++ b/nncf/version.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.8.0" +__version__ = "2.8.1" BKC_TORCH_VERSION = "2.1.2" BKC_TORCHVISION_VERSION = "0.16.2" diff --git a/tests/common/accuracy_control/test_evaluator.py b/tests/common/accuracy_control/test_evaluator.py index 07085b2966c..c7c6c5250e8 100644 --- a/tests/common/accuracy_control/test_evaluator.py +++ b/tests/common/accuracy_control/test_evaluator.py @@ -79,21 +79,27 @@ class TestCase: TestCase(metric_value=[0.1], values_for_each_item=None, expected_is_metric_mode=None, raise_exception=True), ], ) -def test_determine_mode(ts: TestCase): +def test_determine_mode(ts: TestCase, mocker): def _validation_fn(dummy_model, dummy_dataset): return (ts.metric_value, ts.values_for_each_item) + prepared_model = mocker.Mock() + prepared_model.model_for_inference = None + if ts.raise_exception: with pytest.raises(RuntimeError): - _ = Evaluator.determine_mode(None, Dataset([None]), _validation_fn) + _ = Evaluator.determine_mode(prepared_model, Dataset([None]), _validation_fn) else: - is_metric_mode = Evaluator.determine_mode(None, Dataset([None]), _validation_fn) + is_metric_mode = Evaluator.determine_mode(prepared_model, Dataset([None]), _validation_fn) assert is_metric_mode == ts.expected_is_metric_mode -def test_determine_mode_2(): +def test_determine_mode_2(mocker): def _validation_fn_with_error(dummy_model, dummy_dataset): raise RuntimeError - is_metric_mode = Evaluator.determine_mode(None, Dataset([None]), _validation_fn_with_error) + prepared_model = mocker.Mock() + prepared_model.model_for_inference = None + + is_metric_mode = Evaluator.determine_mode(prepared_model, Dataset([None]), _validation_fn_with_error) assert not is_metric_mode diff --git a/tests/common/requirements.txt b/tests/common/requirements.txt index 1388c4ee806..1acffe3481f 100644 --- a/tests/common/requirements.txt +++ b/tests/common/requirements.txt @@ -1,3 +1,3 @@ -pytest +pytest==7.4.4 pytest-cov pytest-mock>=3.3.1 diff --git a/tests/cross_fw/examples/requirements.txt b/tests/cross_fw/examples/requirements.txt index 9955deccd94..cdbff4d6aca 100644 --- a/tests/cross_fw/examples/requirements.txt +++ b/tests/cross_fw/examples/requirements.txt @@ -1,2 +1,2 @@ -pytest +pytest==7.4.4 pytest-cov diff --git a/tests/cross_fw/install/requirements.txt b/tests/cross_fw/install/requirements.txt index 8af77860ebe..0dcd9c22373 100644 --- a/tests/cross_fw/install/requirements.txt +++ b/tests/cross_fw/install/requirements.txt @@ -1,3 +1,3 @@ -pytest +pytest==7.4.4 pytest-cov virtualenv diff --git a/tests/openvino/native/models.py b/tests/openvino/native/models.py index 8a7f89f6970..a88ea72bf4a 100644 --- a/tests/openvino/native/models.py +++ b/tests/openvino/native/models.py @@ -885,3 +885,34 @@ def _create_ov_model(self): model = ov.Model([x], [inputs]) return model + + +class StatefulModel(OVReferenceModel): + """ + Stateful model for testing. + Borrowed from https://github.com/openvinotoolkit/openvino/blob/0c552b7b152c341b5e545d131bd032fcb3cb6b86/src/bindings/python/tests/utils/helpers.py#L212 + """ + + def __init__(self, stateful=True): + super().__init__(stateful=stateful) + + def _create_ov_model(self, stateful=True): + input_shape = [1, 8] + data_type = np.float32 + input_data = opset.parameter(input_shape, name="input_data", dtype=data_type) + init_val = opset.constant(np.zeros(input_shape), data_type) + if stateful: + rv = opset.read_value(init_val, "var_id_667", data_type, input_shape) + add = opset.add(rv, input_data, name="MemoryAdd") + node = opset.assign(add, "var_id_667") + result = opset.result(add, name="Result") + result.get_output_tensor(0).set_names(set(["Result"])) + model = ov.Model(results=[result], sinks=[node], parameters=[input_data], name="TestModel") + else: + bias = opset.constant(init_val, data_type) + add = opset.add(input_data, bias, name="Add") + result = opset.result(add, name="Result") + result.get_output_tensor(0).set_names(set(["Result"])) + model = ov.Model(results=[result], parameters=[input_data], name="TestModel") + + return model diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py index ac958fa9b79..1a599c222f7 100644 --- a/tests/openvino/native/quantization/test_weights_compression.py +++ b/tests/openvino/native/quantization/test_weights_compression.py @@ -24,6 +24,7 @@ from nncf.openvino.graph.node_utils import get_const_value from nncf.quantization import compress_weights from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig +from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters from nncf.quantization.algorithms.weight_compression.mixed_precision import MIXED_PRECISION_CRITERIA from nncf.quantization.algorithms.weight_compression.weight_lowering import get_integer_quantization_error from nncf.quantization.algorithms.weight_compression.weight_lowering import reshape_weight_for_grouped_quantization @@ -598,3 +599,9 @@ def test_call_max_var_criterion_with_dataset_by_default(mocker, mode): compress_weights(model, mode=mode, ratio=0.8, group_size=-1, dataset=dataset) scores_spy.assert_called() + + +def test_data_type_for_num_weights(mocker): + stub = mocker.stub() + params = WeightCompressionParameters(stub, stub, stub, np.int32(1), stub) + assert isinstance(params.num_weights, np.uint64) diff --git a/tests/openvino/native/test_engine.py b/tests/openvino/native/test_engine.py index 85951117c94..cd1ba092124 100644 --- a/tests/openvino/native/test_engine.py +++ b/tests/openvino/native/test_engine.py @@ -16,6 +16,7 @@ from tests.openvino.native.models import ConvModel from tests.openvino.native.models import LinearModel from tests.openvino.native.models import QuantizedModel +from tests.openvino.native.models import StatefulModel def check_engine_creation_and_inference(model, input_data): @@ -58,3 +59,21 @@ def test_infer_quantized_model_list(): model = QuantizedModel().ov_model input_data = [np.random.rand(*inp.shape) for inp in model.get_parameters()] check_engine_creation_and_inference(model, input_data) + + +@pytest.mark.parametrize("stateful", [True, False]) +def test_compiled_model_engine_inference_stateful(stateful): + model = StatefulModel(stateful).ov_model + input_data = [np.ones(inp.shape) for inp in model.get_parameters()] + + engine = OVNativeEngine(model) + + for _ in range(10): + engine.infer(input_data) + + out = engine.infer(input_data) + + input_data = input_data[0] + out = out["Result"] + + assert np.array_equal(out[0], input_data[0]) diff --git a/tests/openvino/requirements.txt b/tests/openvino/requirements.txt index 82ac48f5735..214b0366430 100644 --- a/tests/openvino/requirements.txt +++ b/tests/openvino/requirements.txt @@ -1,4 +1,4 @@ -pytest +pytest==7.4.4 virtualenv pytest-cov pytest-mock>=3.3.1 diff --git a/tests/post_training/requirements.txt b/tests/post_training/requirements.txt index 77abed1ca82..c9bff02f625 100644 --- a/tests/post_training/requirements.txt +++ b/tests/post_training/requirements.txt @@ -5,7 +5,7 @@ torchvision==0.16.0 transformers==4.36.0 onnx==1.13.1 onnxruntime==1.14.1 -pytest +pytest==7.4.4 pytest-cov openvino-dev==2023.3.0 optimum[onnxruntime,openvino]==1.16.0 diff --git a/tests/tensorflow/requirements.txt b/tests/tensorflow/requirements.txt index 3d113a8c59c..11c0a8932f0 100644 --- a/tests/tensorflow/requirements.txt +++ b/tests/tensorflow/requirements.txt @@ -1,6 +1,6 @@ PyYAML tensorflow_addons~=0.20.0 -pytest +pytest==7.4.4 pytest-cov pytest-mock pytest-dependency diff --git a/tests/torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__normalize__multi_input_branch.dot b/tests/torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__normalize__multi_input_branch.dot index 079b8f9afe8..3b5e80a9758 100644 --- a/tests/torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__normalize__multi_input_branch.dot +++ b/tests/torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__normalize__multi_input_branch.dot @@ -1,27 +1,25 @@ strict digraph { "0 /nncf_model_input_0" [id=0, type=nncf_model_input]; -"1 ShiftScaleParametrized/is_floating_point_0" [id=1, type=is_floating_point]; -"2 ShiftScaleParametrized/clone_0" [id=2, type=clone]; -"3 ShiftScaleParametrized/sub__0" [id=3, type=sub_]; -"4 ShiftScaleParametrized/div__0" [id=4, type=div_]; -"5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" [id=5, type=symmetric_quantize]; -"6 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" [id=6, type=symmetric_quantize]; -"7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" [id=7, type=conv2d]; -"8 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_1" [id=8, type=symmetric_quantize]; -"9 ShiftScaleParametrized/NNCFConv2d[conv]/SymmetricQuantizer/symmetric_quantize_0" [id=9, type=symmetric_quantize]; -"10 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1" [id=10, type=conv2d]; -"11 /nncf_model_output_0" [id=11, type=nncf_model_output]; -"12 /nncf_model_output_1" [id=12, type=nncf_model_output]; -"0 /nncf_model_input_0" -> "1 ShiftScaleParametrized/is_floating_point_0"; -"0 /nncf_model_input_0" -> "2 ShiftScaleParametrized/clone_0"; -"0 /nncf_model_input_0" -> "9 ShiftScaleParametrized/NNCFConv2d[conv]/SymmetricQuantizer/symmetric_quantize_0"; -"2 ShiftScaleParametrized/clone_0" -> "3 ShiftScaleParametrized/sub__0"; -"3 ShiftScaleParametrized/sub__0" -> "4 ShiftScaleParametrized/div__0"; -"4 ShiftScaleParametrized/div__0" -> "5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0"; -"5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" -> "7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0"; -"6 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" -> "7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0"; -"7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" -> "11 /nncf_model_output_0"; -"8 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_1" -> "10 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1"; -"9 ShiftScaleParametrized/NNCFConv2d[conv]/SymmetricQuantizer/symmetric_quantize_0" -> "10 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1"; -"10 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1" -> "12 /nncf_model_output_1"; +"1 ShiftScaleParametrized/clone_0" [id=1, type=clone]; +"2 ShiftScaleParametrized/sub__0" [id=2, type=sub_]; +"3 ShiftScaleParametrized/div__0" [id=3, type=div_]; +"4 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" [id=4, type=symmetric_quantize]; +"5 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" [id=5, type=symmetric_quantize]; +"6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" [id=6, type=conv2d]; +"7 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_1" [id=7, type=symmetric_quantize]; +"8 ShiftScaleParametrized/NNCFConv2d[conv]/SymmetricQuantizer/symmetric_quantize_0" [id=8, type=symmetric_quantize]; +"9 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1" [id=9, type=conv2d]; +"10 /nncf_model_output_0" [id=10, type=nncf_model_output]; +"11 /nncf_model_output_1" [id=11, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "1 ShiftScaleParametrized/clone_0"; +"0 /nncf_model_input_0" -> "8 ShiftScaleParametrized/NNCFConv2d[conv]/SymmetricQuantizer/symmetric_quantize_0"; +"1 ShiftScaleParametrized/clone_0" -> "2 ShiftScaleParametrized/sub__0"; +"2 ShiftScaleParametrized/sub__0" -> "3 ShiftScaleParametrized/div__0"; +"3 ShiftScaleParametrized/div__0" -> "4 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0"; +"4 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" -> "6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0"; +"5 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" -> "6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0"; +"6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" -> "10 /nncf_model_output_0"; +"7 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_1" -> "9 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1"; +"8 ShiftScaleParametrized/NNCFConv2d[conv]/SymmetricQuantizer/symmetric_quantize_0" -> "9 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1"; +"9 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1" -> "11 /nncf_model_output_1"; } diff --git a/tests/torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__normalize__single_input_branch.dot b/tests/torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__normalize__single_input_branch.dot index 4d067597486..9eab740c541 100644 --- a/tests/torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__normalize__single_input_branch.dot +++ b/tests/torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__normalize__single_input_branch.dot @@ -1,19 +1,17 @@ strict digraph { "0 /nncf_model_input_0" [id=0, type=nncf_model_input]; -"1 ShiftScaleParametrized/is_floating_point_0" [id=1, type=is_floating_point]; -"2 ShiftScaleParametrized/clone_0" [id=2, type=clone]; -"3 ShiftScaleParametrized/sub__0" [id=3, type=sub_]; -"4 ShiftScaleParametrized/div__0" [id=4, type=div_]; -"5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" [id=5, type=symmetric_quantize]; -"6 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" [id=6, type=symmetric_quantize]; -"7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" [id=7, type=conv2d]; -"8 /nncf_model_output_0" [id=8, type=nncf_model_output]; -"0 /nncf_model_input_0" -> "1 ShiftScaleParametrized/is_floating_point_0"; -"0 /nncf_model_input_0" -> "2 ShiftScaleParametrized/clone_0"; -"2 ShiftScaleParametrized/clone_0" -> "3 ShiftScaleParametrized/sub__0"; -"3 ShiftScaleParametrized/sub__0" -> "4 ShiftScaleParametrized/div__0"; -"4 ShiftScaleParametrized/div__0" -> "5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0"; -"5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" -> "7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0"; -"6 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" -> "7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0"; -"7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" -> "8 /nncf_model_output_0"; +"1 ShiftScaleParametrized/clone_0" [id=1, type=clone]; +"2 ShiftScaleParametrized/sub__0" [id=2, type=sub_]; +"3 ShiftScaleParametrized/div__0" [id=3, type=div_]; +"4 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" [id=4, type=symmetric_quantize]; +"5 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" [id=5, type=symmetric_quantize]; +"6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" [id=6, type=conv2d]; +"7 /nncf_model_output_0" [id=7, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "1 ShiftScaleParametrized/clone_0"; +"1 ShiftScaleParametrized/clone_0" -> "2 ShiftScaleParametrized/sub__0"; +"2 ShiftScaleParametrized/sub__0" -> "3 ShiftScaleParametrized/div__0"; +"3 ShiftScaleParametrized/div__0" -> "4 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0"; +"4 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" -> "6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0"; +"5 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" -> "6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0"; +"6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" -> "7 /nncf_model_output_0"; } diff --git a/tests/torch/models_hub_test/requirements.txt b/tests/torch/models_hub_test/requirements.txt index ec4f8b0e29e..9e1ccf3e367 100644 --- a/tests/torch/models_hub_test/requirements.txt +++ b/tests/torch/models_hub_test/requirements.txt @@ -3,7 +3,7 @@ torch==2.1.0 --extra-index-url https://download.pytorch.org/whl/cpu torchvision==0.16.0 transformers==4.36.0 -pytest +pytest==7.4.4 timm==0.9.2 scikit-learn==1.2.2 av==11.0.0