diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index ab248964257..86b10fe2a43 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -1,5 +1,16 @@
 # Release Notes
 
+## New in Release 2.8.1
+
+Post-training Quantization:
+
+- Bugfixes:
+  - (Common) Fixed issue with `nncf.compress_weights()` to avoid overflows on 32-bit Windows systems.
+  - (Common) Fixed performance issue with `nncf.compress_weights()` on LLama models.
+  - (Common) Fixed `nncf.quantize_with_accuracy_control` pipeline with `tune_hyperparams=True` enabled option.
+  - (OpenVINO) Fixed issue for stateful LLM models and added state restoring after the inference for it.
+  - (PyTorch) Fixed issue with `nncf.compress_weights()` for LLM models with the executing `is_floating_point` with tracing.
+
 ## New in Release 2.8.0
 
 Post-training Quantization:
diff --git a/docs/Installation.md b/docs/Installation.md
index 3ec4dc38878..9f0dbce77f9 100644
--- a/docs/Installation.md
+++ b/docs/Installation.md
@@ -70,6 +70,7 @@ as well as the supported versions of Python:
 | NNCF      | OpenVINO   | PyTorch  | ONNX     | TensorFlow | Python |
 |-----------|------------|----------|----------|------------|--------|
 | `develop` | `2023.3.0` | `2.1.2`  | `1.13.1` | `2.12.0`   | `3.8`  |
+| `2.8.1`   | `2023.3.0` | `2.1.2`  | `1.13.1` | `2.12.0`   | `3.8`  |
 | `2.8.0`   | `2023.3.0` | `2.1.2`  | `1.13.1` | `2.12.0`   | `3.8`  |
 | `2.7.0`   | `2023.2.0` | `2.1`    | `1.13.1` | `2.12.0`   | `3.8`  |
 | `2.6.0`   | `2023.1.0` | `2.0.1`  | `1.13.1` | `2.12.0`   | `3.8`  |
diff --git a/nncf/common/factory.py b/nncf/common/factory.py
index 8c3a4ba2e19..f2d85ecefa9 100644
--- a/nncf/common/factory.py
+++ b/nncf/common/factory.py
@@ -17,9 +17,7 @@
 from nncf.common.graph.transformations.command_creation import CommandCreator
 from nncf.common.tensor_statistics import aggregator
 from nncf.common.utils.backend import BackendType
-from nncf.common.utils.backend import get_available_backends
 from nncf.common.utils.backend import get_backend
-from nncf.common.utils.backend import is_openvino_compiled_model
 from nncf.data.dataset import Dataset
 
 TModel = TypeVar("TModel")
@@ -86,12 +84,6 @@ def create(model: TModel) -> Engine:
         :param model: backend-specific model instance.
         :return: backend-specific Engine instance.
         """
-        available_backends = get_available_backends()
-        if BackendType.OPENVINO in available_backends and is_openvino_compiled_model(model):
-            from nncf.openvino.engine import OVCompiledModelEngine
-
-            return OVCompiledModelEngine(model)
-
         model_backend = get_backend(model)
         if model_backend == BackendType.ONNX:
             from nncf.onnx.engine import ONNXEngine
diff --git a/nncf/openvino/engine.py b/nncf/openvino/engine.py
index decd31a6364..248f8912af3 100644
--- a/nncf/openvino/engine.py
+++ b/nncf/openvino/engine.py
@@ -15,6 +15,7 @@
 import openvino.runtime as ov
 
 from nncf.common.engine import Engine
+from nncf.openvino.graph.model_utils import model_has_state
 from nncf.parameters import TargetDevice
 
 
@@ -27,11 +28,12 @@ class OVCompiledModelEngine(Engine):
     to infer the compiled model.
     """
 
-    def __init__(self, model: ov.CompiledModel):
-        self.compiled_model = model
+    def __init__(self, compiled_model: ov.CompiledModel, stateful: bool):
+        self.infer_request = compiled_model.create_infer_request()
+        self.reset_state = stateful and hasattr(self.infer_request, "reset_state")
         self.input_tensor_names = set()
-        self.number_of_inputs = len(model.inputs)
-        for model_input in model.inputs:
+        self.number_of_inputs = len(compiled_model.inputs)
+        for model_input in compiled_model.inputs:
             self.input_tensor_names.update(model_input.get_names())
 
     def _check_input_data_format(
@@ -63,7 +65,11 @@ def infer(
         :return output_data: Model's output.
         """
         self._check_input_data_format(input_data)
-        model_outputs = self.compiled_model(input_data)
+
+        if self.reset_state:
+            self.infer_request.reset_state()
+
+        model_outputs = self.infer_request.infer(input_data, share_inputs=True)
 
         output_data = {}
         for tensor, value in model_outputs.items():
@@ -86,8 +92,9 @@ def __init__(self, model: ov.Model, target_device: TargetDevice = TargetDevice.C
             target_device = TargetDevice.CPU
 
         ie = ov.Core()
+        stateful = model_has_state(model)
         compiled_model = ie.compile_model(model, target_device.value)
-        self.engine = OVCompiledModelEngine(compiled_model)
+        self.engine = OVCompiledModelEngine(compiled_model, stateful)
 
     def infer(
         self, input_data: Union[np.ndarray, List[np.ndarray], Tuple[np.ndarray], Dict[str, np.ndarray]]
diff --git a/nncf/openvino/graph/model_utils.py b/nncf/openvino/graph/model_utils.py
index 21013bc07c7..733a61fa7be 100644
--- a/nncf/openvino/graph/model_utils.py
+++ b/nncf/openvino/graph/model_utils.py
@@ -60,3 +60,13 @@ def get_start_nodes_for_activation_path_tracing(nncf_graph: NNCFGraph) -> List[N
     :return: Target NNCFGraph input nodes.
     """
     return nncf_graph.get_input_nodes() + nncf_graph.get_nodes_by_metatypes([OVReadValueMetatype])
+
+
+def model_has_state(model: ov.Model) -> bool:
+    """
+    Returns True if model has state else False
+
+    :param model: OpenVINO model
+    :return: True if model has state else False
+    """
+    return len(model.get_sinks()) > 0
diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py
index 8e8102eaf1c..7ed0ffb05b5 100644
--- a/nncf/openvino/quantization/quantize_model.py
+++ b/nncf/openvino/quantization/quantize_model.py
@@ -263,7 +263,7 @@ def native_quantize_with_accuracy_control_impl(
             fast_bias_correction,
             model_type,
             ignored_scope,
-            advanced_quantization_parameters,
+            copied_parameters,
         )
         tuned_quantized_metric_results = evaluator.collect_metric_results(
             tuned_quantized_model, validation_dataset, model_name="tuned"
diff --git a/nncf/quantization/algorithms/accuracy_control/backend.py b/nncf/quantization/algorithms/accuracy_control/backend.py
index 2607aa1a7a4..de964936880 100644
--- a/nncf/quantization/algorithms/accuracy_control/backend.py
+++ b/nncf/quantization/algorithms/accuracy_control/backend.py
@@ -13,6 +13,7 @@
 from abc import abstractmethod
 from typing import Any, List, Optional, TypeVar
 
+from nncf.common.engine import Engine
 from nncf.common.graph.graph import NNCFGraph
 from nncf.common.graph.graph import NNCFNode
 from nncf.common.graph.operator_metatypes import OperatorMetatype
@@ -21,6 +22,35 @@
 TPModel = TypeVar("TPModel")
 
 
+class PreparedModel(ABC):
+    @property
+    @abstractmethod
+    def model_for_inference(self) -> TPModel:
+        """
+        Returns prepared model for inference.
+
+        :return: Prepared model for inference.
+        """
+
+    @property
+    @abstractmethod
+    def engine(self) -> Engine:
+        """
+        Returns the engine for inference the prepared model.
+
+        :return: The engine for inference the prepared model.
+        """
+
+    def __call__(self, input_data: Any) -> Any:
+        """
+        Runs model on the provided input data and returns the raw model outputs.
+
+        :param input_data: inputs for the model
+        :return: raw model outputs
+        """
+        return self.engine.infer(input_data)
+
+
 class AccuracyControlAlgoBackend(ABC):
     # Metatypes
 
@@ -158,15 +188,3 @@ def get_model_size(model: TModel) -> int:
         :param model: A model
         :return: Model size (in bytes)
         """
-
-    # Preparation of model
-
-    @staticmethod
-    @abstractmethod
-    def prepare_for_inference(model: TModel) -> TPModel:
-        """
-        Prepares model for inference.
-
-        :param model: A model that should be prepared.
-        :return: Prepared model for inference.
-        """
diff --git a/nncf/quantization/algorithms/accuracy_control/evaluator.py b/nncf/quantization/algorithms/accuracy_control/evaluator.py
index cb1e3173e6c..cda9a5e2c9b 100644
--- a/nncf/quantization/algorithms/accuracy_control/evaluator.py
+++ b/nncf/quantization/algorithms/accuracy_control/evaluator.py
@@ -12,15 +12,14 @@
 from dataclasses import dataclass
 from typing import Any, Callable, Iterable, List, Optional, Tuple, TypeVar, Union
 
-from nncf.common.factory import EngineFactory
 from nncf.common.logging import nncf_logger
 from nncf.common.utils.backend import BackendType
 from nncf.common.utils.backend import get_backend
 from nncf.common.utils.timer import timer
 from nncf.data.dataset import Dataset
+from nncf.quantization.algorithms.accuracy_control.backend import PreparedModel
 
 TModel = TypeVar("TModel")
-TPModel = TypeVar("TPModel")
 TTensor = TypeVar("TTensor")
 
 
@@ -111,7 +110,7 @@ def is_metric_mode(self) -> bool:
         """
         return self._metric_mode
 
-    def prepare_model_for_inference(self, model: TModel) -> TPModel:
+    def prepare_model(self, model: TModel) -> PreparedModel:
         """
         Prepares model for inference.
 
@@ -121,21 +120,19 @@ def prepare_model_for_inference(self, model: TModel) -> TPModel:
         backend = get_backend(model)
 
         if backend == BackendType.OPENVINO:
-            import openvino.runtime as ov
+            from nncf.quantization.algorithms.accuracy_control.openvino_backend import OVPreparedModel
 
-            return ov.compile_model(model)
+            return OVPreparedModel(model)
 
-        raise NotImplementedError(
-            f"The `prepare_model_for_inference()` method is not implemented for the {backend} backend."
-        )
+        raise NotImplementedError(f"The `prepare_model()` method is not implemented for the {backend} backend.")
 
-    def validate_model_for_inference(
-        self, model_for_inference: TPModel, dataset: Dataset, indices: Optional[List[int]] = None
+    def validate_prepared_model(
+        self, prepared_model: PreparedModel, dataset: Dataset, indices: Optional[List[int]] = None
     ):
         """
         Validates prepared model for inference.
 
-        :param model: Prepared model to validate.
+        :param prepared_model: Prepared model to validate.
         :param dataset: Dataset to validate the model.
         :param indices: Zero-based indices of data items that should be selected from
             the dataset.
@@ -147,7 +144,7 @@ def validate_model_for_inference(
                 item.
         """
         if self._metric_mode is None:
-            self._metric_mode = Evaluator.determine_mode(model_for_inference, dataset, self._validation_fn)
+            self._metric_mode = Evaluator.determine_mode(prepared_model, dataset, self._validation_fn)
 
         if not self.is_metric_mode() and indices is not None:
             raise ValueError("The `indices` parameter can be used only if Evaluator.is_metric_mode() = True")
@@ -156,7 +153,7 @@ def validate_model_for_inference(
         if self._enable_iteration_count:
             validation_dataset = IterationCounter(validation_dataset)
 
-        metric, values_for_each_item = self._validation_fn(model_for_inference, validation_dataset)
+        metric, values_for_each_item = self._validation_fn(prepared_model.model_for_inference, validation_dataset)
 
         self._num_passed_iterations = validation_dataset.num_iterations if self._enable_iteration_count else 0
 
@@ -189,12 +186,12 @@ def validate(
                 Otherwise, if the condition is false, it represents list of logits for each
                 item.
         """
-        model_for_inference = self.prepare_model_for_inference(model)
-        return self.validate_model_for_inference(model_for_inference, dataset, indices)
+        prepared_model = self.prepare_model(model)
+        return self.validate_prepared_model(prepared_model, dataset, indices)
 
     @staticmethod
     def determine_mode(
-        model_for_inference: TPModel,
+        prepared_model: PreparedModel,
         dataset: Dataset,
         validation_fn: Callable[[Any, Iterable[Any]], Tuple[float, Union[None, List[float], List[List[TTensor]]]]],
     ) -> bool:
@@ -202,7 +199,7 @@ def determine_mode(
         Determines mode based on the type of returned value from the
         validation function.
 
-        :param model_for_inference: Model to validate.
+        :param prepared_model: Model to validate.
         :param dataset: Dataset to validate the model.
         :param validation_fn: Validation function to validate model.
         :return: A boolean indicator where `True` means that the `Evaluator` collects
@@ -214,7 +211,7 @@ def determine_mode(
         data_item = dataset.get_data([0])
 
         try:
-            metric_value, values_for_each_item = validation_fn(model_for_inference, data_item)
+            metric_value, values_for_each_item = validation_fn(prepared_model.model_for_inference, data_item)
         except Exception:
             metric_mode = False
 
@@ -261,15 +258,15 @@ def determine_mode(
 
         return metric_mode
 
-    def collect_values_for_each_item_using_model_for_inference(
-        self, model_for_inference: TPModel, dataset: Dataset, indices: Optional[List[int]] = None
+    def collect_values_for_each_item_using_prepared_model(
+        self, prepared_model: PreparedModel, dataset: Dataset, indices: Optional[List[int]] = None
     ) -> Union[List[float], List[List[TTensor]]]:
         """
         Collects value for each item from the dataset using prepared model for inference.
         If `is_metric_mode()` returns `True` then i-th value is a metric for i-th data item.
         It is an output of the model for i-th data item otherwise.
 
-        :param model: Model to infer.
+        :param prepared_model: Model to infer.
         :param dataset: Dataset to collect values.
         :param indices: The zero-based indices of data items that should be selected from
             the dataset.
@@ -278,15 +275,14 @@ def collect_values_for_each_item_using_model_for_inference(
         if self._metric_mode:
             # Collect metrics for each item
             values_for_each_item = [
-                self._validation_fn(model_for_inference, [data_item])[0] for data_item in dataset.get_data(indices)
+                self._validation_fn(prepared_model.model_for_inference, [data_item])[0]
+                for data_item in dataset.get_data(indices)
             ]
         else:
             # Collect outputs for each item
-            engine = EngineFactory.create(model_for_inference)
-
             values_for_each_item = []
             for data_item in dataset.get_inference_data(indices):
-                logits = engine.infer(data_item)
+                logits = prepared_model(data_item)
                 values_for_each_item.append(list(logits.values()))
 
         self._num_passed_iterations = len(values_for_each_item) if self._enable_iteration_count else 0
@@ -307,8 +303,8 @@ def collect_values_for_each_item(
             the dataset.
         :return: Collected values.
         """
-        model_for_inference = self.prepare_model_for_inference(model)
-        return self.collect_values_for_each_item_using_model_for_inference(model_for_inference, dataset, indices)
+        prepared_model = self.prepare_model(model)
+        return self.collect_values_for_each_item_using_prepared_model(prepared_model, dataset, indices)
 
     def collect_metric_results(self, model: TModel, dataset: Dataset, model_name: str = "") -> MetricResults:
         """
@@ -322,18 +318,16 @@ def collect_metric_results(self, model: TModel, dataset: Dataset, model_name: st
         nncf_logger.info(f"Validation of {model_name} model was started")
 
         with timer() as preparation_time:
-            model_for_inference = self.prepare_model_for_inference(model)
+            prepared_model = self.prepare_model(model)
 
         with timer() as validation_time:
-            metric, values_for_each_item = self.validate_model_for_inference(model_for_inference, dataset)
+            metric, values_for_each_item = self.validate_prepared_model(prepared_model, dataset)
 
         nncf_logger.info(f"Metric of {model_name} model: {metric}")
 
         if values_for_each_item is None:
             nncf_logger.info(f"Collecting values for each data item using the {model_name} model")
             with timer():
-                values_for_each_item = self.collect_values_for_each_item_using_model_for_inference(
-                    model_for_inference, dataset
-                )
+                values_for_each_item = self.collect_values_for_each_item_using_prepared_model(prepared_model, dataset)
 
         return MetricResults(metric, values_for_each_item, preparation_time(), validation_time())
diff --git a/nncf/quantization/algorithms/accuracy_control/openvino_backend.py b/nncf/quantization/algorithms/accuracy_control/openvino_backend.py
index acf41100e8e..b330704d54b 100644
--- a/nncf/quantization/algorithms/accuracy_control/openvino_backend.py
+++ b/nncf/quantization/algorithms/accuracy_control/openvino_backend.py
@@ -16,6 +16,7 @@
 
 from nncf.common.graph import NNCFGraph
 from nncf.common.graph import NNCFNode
+from nncf.openvino.engine import OVCompiledModelEngine
 from nncf.openvino.graph.layer_attributes import OVLayerAttributes
 from nncf.openvino.graph.metatypes.groups import CONSTANT_OPERATIONS
 from nncf.openvino.graph.metatypes.groups import FAKE_QUANTIZE_OPERATIONS
@@ -26,10 +27,33 @@
 from nncf.openvino.graph.metatypes.openvino_metatypes import OVConcatMetatype
 from nncf.openvino.graph.metatypes.openvino_metatypes import OVOpMetatype
 from nncf.openvino.graph.model_utils import get_start_nodes_for_activation_path_tracing
+from nncf.openvino.graph.model_utils import model_has_state
 from nncf.openvino.graph.node_utils import get_bias_value
 from nncf.openvino.graph.node_utils import get_weight_value
 from nncf.openvino.graph.node_utils import is_node_with_bias
 from nncf.quantization.algorithms.accuracy_control.backend import AccuracyControlAlgoBackend
+from nncf.quantization.algorithms.accuracy_control.backend import PreparedModel
+
+
+class OVPreparedModel(PreparedModel):
+    """
+    Implementation of the `PreparedModel` for OpenVINO backend.
+    """
+
+    def __init__(self, model: ov.Model):
+        self._stateful = model_has_state(model)
+        self._compiled_model = ov.compile_model(model)
+        self._engine = None
+
+    @property
+    def model_for_inference(self) -> ov.CompiledModel:
+        return self._compiled_model
+
+    @property
+    def engine(self) -> OVCompiledModelEngine:
+        if self._engine is None:
+            self._engine = OVCompiledModelEngine(self._compiled_model, self._stateful)
+        return self._engine
 
 
 class OVAccuracyControlAlgoBackend(AccuracyControlAlgoBackend):
@@ -97,9 +121,3 @@ def get_model_size(model: ov.Model) -> int:
                 model_size += op.data.nbytes
 
         return model_size
-
-    # Preparation of model
-
-    @staticmethod
-    def prepare_for_inference(model: ov.Model) -> ov.CompiledModel:
-        return ov.compile_model(model)
diff --git a/nncf/quantization/algorithms/accuracy_control/ranker.py b/nncf/quantization/algorithms/accuracy_control/ranker.py
index 933e472c06f..c0627a5d912 100644
--- a/nncf/quantization/algorithms/accuracy_control/ranker.py
+++ b/nncf/quantization/algorithms/accuracy_control/ranker.py
@@ -200,7 +200,7 @@ def _sequential_calculation_ranking_score(
                 self._algo_backend.get_op_with_weights_metatypes(),
             )
 
-            prepared_model = self._algo_backend.prepare_for_inference(modified_model)
+            prepared_model = self._evaluator.prepare_model(modified_model)
             ranking_score = self._calculate_ranking_score(
                 prepared_model, ranking_subset_indices, reference_values_for_each_item
             )
@@ -229,7 +229,7 @@ def _multithreading_calculation_ranking_score(
                 self._algo_backend.get_op_with_weights_metatypes(),
             )
 
-            prepared_model_queue.append(executor.submit(self._algo_backend.prepare_for_inference, modified_model))
+            prepared_model_queue.append(executor.submit(self._evaluator.prepare_model, modified_model))
 
             if idx >= (self._num_workers - 1):
                 prepared_model = prepared_model_queue.pop(0).result()
@@ -263,12 +263,12 @@ def _calculate_ranking_score(
         """
         if self._evaluator.is_metric_mode():
             # Calculate ranking score based on metric
-            ranking_score, _ = self._evaluator.validate_model_for_inference(
+            ranking_score, _ = self._evaluator.validate_prepared_model(
                 prepared_model, self._dataset, ranking_subset_indices
             )
         else:
             # Calculate ranking score based on differences in logits
-            approximate_outputs = self._evaluator.collect_values_for_each_item_using_model_for_inference(
+            approximate_outputs = self._evaluator.collect_values_for_each_item_using_prepared_model(
                 prepared_model, self._dataset, ranking_subset_indices
             )
             reference_outputs = [reference_values_for_each_item[i] for i in ranking_subset_indices]
diff --git a/nncf/quantization/algorithms/weight_compression/config.py b/nncf/quantization/algorithms/weight_compression/config.py
index d0ecb1bcab1..915b9ba23a7 100644
--- a/nncf/quantization/algorithms/weight_compression/config.py
+++ b/nncf/quantization/algorithms/weight_compression/config.py
@@ -11,6 +11,8 @@
 from dataclasses import dataclass
 from typing import Optional, TypeVar
 
+import numpy as np
+
 from nncf.common.graph.graph import NNCFNode
 from nncf.parameters import CompressWeightsMode
 
@@ -54,6 +56,11 @@ class WeightCompressionParameters:
     weight_name: str
     node_with_weight: NNCFNode
     weight_port_id: int
-    num_weights: int
+    num_weights: np.uint64
     reduction_axis: int
     compression_config = WeightCompressionConfig()
+
+    def __post_init__(self):
+        # Explicitly cast num_weights to avoid overflow on finding total number of weights.
+        # The issue happens on Windows, because np.ndarray.size() returns np.int32 and sum of weights is more than 2^32.
+        self.num_weights = np.uint64(self.num_weights)
diff --git a/nncf/quantization/algorithms/weight_compression/mixed_precision.py b/nncf/quantization/algorithms/weight_compression/mixed_precision.py
index 120a736bb53..aac509bfe7c 100644
--- a/nncf/quantization/algorithms/weight_compression/mixed_precision.py
+++ b/nncf/quantization/algorithms/weight_compression/mixed_precision.py
@@ -17,6 +17,7 @@
 from nncf.common.utils.registry import Registry
 from nncf.experimental.tensor import Tensor
 from nncf.experimental.tensor import functions as fns
+from nncf.experimental.tensor.definitions import TensorDataType
 from nncf.parameters import SensitivityMetric
 from nncf.quantization.algorithms.weight_compression.backend import WeightCompressionAlgoBackend
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
@@ -170,9 +171,12 @@ def _calc_weight_sensitivity(self, weight_param: WeightCompressionParameters) ->
         reduction_axis = weight_param.reduction_axis
 
         orig_shape = weight.shape
+
+        if weight.dtype != TensorDataType.float32:
+            weight = weight.astype(TensorDataType.float32)
+
         compressed_weights, scale, zero_point = do_integer_quantization(weight, reduction_axis, backup_config)
-        decompressed_weight = compressed_weights.astype(dtype=scale.dtype)
-        decompressed_weight = (compressed_weights - zero_point) * scale
+        decompressed_weight = (compressed_weights - zero_point).astype(weight.dtype) * scale
         decompressed_weight = decompressed_weight.reshape(orig_shape)
         return fns.linalg.norm(decompressed_weight - weight, ord="fro").item()
 
diff --git a/nncf/quantization/algorithms/weight_compression/torch_backend.py b/nncf/quantization/algorithms/weight_compression/torch_backend.py
index 3cbe72f4d20..89484afa54e 100644
--- a/nncf/quantization/algorithms/weight_compression/torch_backend.py
+++ b/nncf/quantization/algorithms/weight_compression/torch_backend.py
@@ -21,6 +21,7 @@
 from nncf.common.graph.transformations.commands import TargetType
 from nncf.common.graph.transformations.layout import TransformationLayout
 from nncf.experimental.common.tensor_statistics.collectors import TensorCollector
+from nncf.experimental.tensor.definitions import TensorDataType
 from nncf.experimental.tensor.tensor import Tensor
 from nncf.parameters import CompressWeightsMode
 from nncf.quantization.algorithms.weight_compression.backend import WeightCompressionAlgoBackend
@@ -137,13 +138,11 @@ def get_channel_agnostic_reduction_axes(
                 reduction_axis = [ndims - 1]
             elif weight_port_id == 1:
                 reduction_axis = [max(0, ndims - 2)]
-            reduction_axis = [max(0, reduction_axis)]
         elif node_with_weight.metatype == om.PTAddmmMetatype:
             if weight_port_id == 1:
                 reduction_axis = [ndims - 1]
             elif weight_port_id == 2:
                 reduction_axis = [max(0, ndims - 2)]
-            reduction_axis = [max(0, reduction_axis)]
         return reduction_axis
 
     @staticmethod
@@ -207,8 +206,11 @@ def transform_model(
             # calculates compressed weights and decompression parameters
             compressed_weight = compress_weight(Tensor(weight), wc_params.reduction_axis, compression_config)
 
+            # pack compressed tensor
+            packed_tensor = compressed_weight.tensor.astype(TensorDataType.uint8)
+
             # sets compressed tensor
-            compressed_parameter = torch.nn.Parameter(compressed_weight.tensor.data, requires_grad=False)
+            compressed_parameter = torch.nn.Parameter(packed_tensor.data, requires_grad=False)
             setattr(module, weight_attr_name, compressed_parameter)
 
             consumer_nodes = graph.get_next_nodes(weight_node)
@@ -219,8 +221,11 @@ def transform_model(
                         if id(param) == id(weight):
                             setattr(c_module, name, compressed_parameter)
 
+            # pack zero point tensor
+            packed_zero_point = compressed_weight.zero_point.astype(TensorDataType.uint8)
+
             # creates weight decompressor
-            decompressor = WeightsDecompressor(compressed_weight.scale.data, compressed_weight.zero_point.data)
+            decompressor = WeightsDecompressor(compressed_weight.scale.data, packed_zero_point.data)
 
             # registry weight decompression module in the model
             decompressor_name = f"weights_decompressor_{weight_node.node_name.replace('.', '_')}"
diff --git a/nncf/quantization/algorithms/weight_compression/weight_lowering.py b/nncf/quantization/algorithms/weight_compression/weight_lowering.py
index 492a744cf6d..d9358d5a7f7 100644
--- a/nncf/quantization/algorithms/weight_compression/weight_lowering.py
+++ b/nncf/quantization/algorithms/weight_compression/weight_lowering.py
@@ -72,8 +72,11 @@ def calculate_normalized_weight_and_nf4_scale(
     :param reduction_axis: Axis, along which to reduce (collect) different statistics (e.g. min, max).
     :param group_size: Number of weights (e.g. 128) in the channel dimension that share quantization parameters (scale).
         The value -1 means no grouping. Defaults to -1.
-    :return: Normalized weights and nf4 scale.
+    :return: Normalized weight tensor of float32 type and nf4 scale tensor of float32 type.
     """
+    if weight.dtype != TensorDataType.float32:
+        weight = weight.astype(TensorDataType.float32)
+
     if group_size != -1:
         # weights are reshaped: [a1, r, a2] -> [a1, r//gs, gs, a2]
         weight, reduction_axis = reshape_weight_for_grouped_quantization(weight, reduction_axis, group_size)
@@ -109,7 +112,8 @@ def do_integer_quantization(
     :param weight: Weight array to compress.
     :param reduction_axis: Axis, along which to reduce (collect) different statistics (e.g. min, max).
     :param config: Information on how to compress (quantize) a specific weight.
-    :return: The compressed weights, scale and zero point that was used for its quantization.
+    :return: The compressed weights tensor of uint8 type, scale tensor of float32 type and
+        zero point tensor of int32 type that was used for its quantization.
     """
     mode = config.mode
     assert mode != CompressWeightsMode.NF4, "The function supports integer quantization only"
@@ -119,6 +123,9 @@ def do_integer_quantization(
     level_low = 0
     level_high = 2**num_bits - 1
 
+    if weight.dtype != TensorDataType.float32:
+        weight = weight.astype(TensorDataType.float32)
+
     if group_size != -1:
         # weights are reshaped from [a1, r, a2] to [a1, r//gs, gs, a2]
         weight, reduction_axis = reshape_weight_for_grouped_quantization(weight, reduction_axis, group_size)
@@ -135,14 +142,11 @@ def do_integer_quantization(
         level_high_sym = 2 ** (num_bits - 1) - 1
         scale = scale / level_high_sym
         zero_point = fns.as_tensor_like(scale, [-level_low_sym])
+        eps = fns.finfo(scale).eps
+        # NOTE: adding machine epsilon to avoid division by zero
+        scale = fns.where(fns.abs(scale) < eps, eps, scale)
 
-    scale = scale.astype(weight.dtype)
-    zero_point = zero_point.astype(TensorDataType.uint8)
-
-    eps = fns.finfo(weight).eps
-    # NOTE: adding machine epsilon to avoid division by zero
-    scale = fns.where(fns.abs(scale) < eps, eps, scale)
-    compressed_weights = fns.round(weight / scale + zero_point)
+    compressed_weights = fns.round(weight / scale + zero_point.astype(weight.dtype))
     compressed_weights = fns.clip(compressed_weights, level_low, level_high).astype(TensorDataType.uint8)
     return compressed_weights, scale, zero_point
 
@@ -158,10 +162,13 @@ def get_integer_quantization_error(weight: Tensor, reduction_axis: int, config:
     :return: The quantity characterizing the error of integer quantization.
     """
     orig_shape = weight.shape
+
+    if weight.dtype != TensorDataType.float32:
+        weight = weight.astype(TensorDataType.float32)
+
     compressed_weights, scale, zero_point = do_integer_quantization(weight, reduction_axis, config)
 
-    compressed_weights = compressed_weights.astype(dtype=weight.dtype)
-    decompressed_weight = (compressed_weights - zero_point) * scale
+    decompressed_weight = (compressed_weights - zero_point).astype(weight.dtype) * scale
 
     decompressed_weight = decompressed_weight.reshape(orig_shape)
     diff = (decompressed_weight - weight) ** 2
diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py
index dcbcfc79ba7..2648ae908c8 100644
--- a/nncf/quantization/quantize_model.py
+++ b/nncf/quantization/quantize_model.py
@@ -423,7 +423,8 @@ def quantize_with_tune_hyperparams(
         "advanced_parameters": advanced_quantization_parameters,
     }
 
-    param_grids = get_quantization_param_grids(create_ptq_pipeline(**init_quantization_params))
+    backend = get_backend(model)
+    param_grids = get_quantization_param_grids(create_ptq_pipeline(**init_quantization_params), backend)
 
     hyperparameter_tuner = HyperparameterTuner(
         create_ptq_pipeline,
diff --git a/nncf/torch/dynamic_graph/graph_tracer.py b/nncf/torch/dynamic_graph/graph_tracer.py
index dc7d10ea5b4..c9cf108dec1 100644
--- a/nncf/torch/dynamic_graph/graph_tracer.py
+++ b/nncf/torch/dynamic_graph/graph_tracer.py
@@ -8,7 +8,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from copy import deepcopy
 from typing import Any, Callable, Dict, Optional, Tuple, TypeVar
 
 import torch
@@ -35,8 +34,6 @@ def trace_graph(
         as_eval: bool = False,
         trace_parameters: bool = False,
     ) -> DynamicGraph:
-        sd = deepcopy(model.state_dict())
-
         if context_to_use is None:
             context_to_use = TracingContext()
 
@@ -54,7 +51,6 @@ def trace_graph(
                         self.custom_forward_fn(model)
                 else:
                     self.custom_forward_fn(model)
-        model.load_state_dict(sd)
 
         context_to_use.disable_trace_dynamic_graph()
         return context_to_use.graph
diff --git a/nncf/torch/dynamic_graph/patch_pytorch.py b/nncf/torch/dynamic_graph/patch_pytorch.py
index 8c3acea9eec..499eb2c4d1d 100644
--- a/nncf/torch/dynamic_graph/patch_pytorch.py
+++ b/nncf/torch/dynamic_graph/patch_pytorch.py
@@ -112,6 +112,7 @@ class FunctionsToPatchWithoutTracing:
         "storage_offset",
         "stride",
         "get_device",
+        "is_floating_point",
     ]
 
     FUNCTIONS_TO_PATCH_WITHOUT_TRACING = TENSOR_CREATING_FUNCTIONS + TENSOR_UTILITY_FUNCTIONS
diff --git a/nncf/torch/graph/operator_metatypes.py b/nncf/torch/graph/operator_metatypes.py
index 816f9e6ee2f..3e287114faa 100644
--- a/nncf/torch/graph/operator_metatypes.py
+++ b/nncf/torch/graph/operator_metatypes.py
@@ -580,7 +580,7 @@ class PTMatMulMetatype(PTOperatorMetatype):
 @PT_OPERATOR_METATYPES.register()
 class PTAddmmMetatype(PTOperatorMetatype):
     name = "MatMulOp"
-    module_to_function_names = {NamespaceTarget.TORCH: ["addmm"], NamespaceTarget.TORCH: ["baddbmm"]}
+    module_to_function_names = {NamespaceTarget.TORCH: ["addmm", "baddbmm"]}
     hw_config_names = [HWConfigOpName.MATMUL]
     # 0-th arg to the baddbmm is basically a (b)ias to be (add)ed to the (bmm) operation,
     # presuming that most runtime implementations will fuse the bias addition into the matrix multiplication
diff --git a/nncf/torch/nncf_network.py b/nncf/torch/nncf_network.py
index a8298b3340b..0954843ce72 100644
--- a/nncf/torch/nncf_network.py
+++ b/nncf/torch/nncf_network.py
@@ -383,6 +383,8 @@ def get_clean_shallow_copy(self) -> "NNCFNetwork":
             self._ignored_scopes,
             self._target_scopes,
             wrap_outputs_fn=self._wrap_outputs_fn,
+            replace_modules=self.replace_modules,
+            trace_parameters=self.trace_parameters,
         )
         self._model_ref._nncf = new_interface
         self._model_ref.nncf.reset_nncf_modules()
diff --git a/nncf/version.py b/nncf/version.py
index 6165667a875..c92bad2f07f 100644
--- a/nncf/version.py
+++ b/nncf/version.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.8.0"
+__version__ = "2.8.1"
 
 BKC_TORCH_VERSION = "2.1.2"
 BKC_TORCHVISION_VERSION = "0.16.2"
diff --git a/tests/common/accuracy_control/test_evaluator.py b/tests/common/accuracy_control/test_evaluator.py
index 07085b2966c..c7c6c5250e8 100644
--- a/tests/common/accuracy_control/test_evaluator.py
+++ b/tests/common/accuracy_control/test_evaluator.py
@@ -79,21 +79,27 @@ class TestCase:
         TestCase(metric_value=[0.1], values_for_each_item=None, expected_is_metric_mode=None, raise_exception=True),
     ],
 )
-def test_determine_mode(ts: TestCase):
+def test_determine_mode(ts: TestCase, mocker):
     def _validation_fn(dummy_model, dummy_dataset):
         return (ts.metric_value, ts.values_for_each_item)
 
+    prepared_model = mocker.Mock()
+    prepared_model.model_for_inference = None
+
     if ts.raise_exception:
         with pytest.raises(RuntimeError):
-            _ = Evaluator.determine_mode(None, Dataset([None]), _validation_fn)
+            _ = Evaluator.determine_mode(prepared_model, Dataset([None]), _validation_fn)
     else:
-        is_metric_mode = Evaluator.determine_mode(None, Dataset([None]), _validation_fn)
+        is_metric_mode = Evaluator.determine_mode(prepared_model, Dataset([None]), _validation_fn)
         assert is_metric_mode == ts.expected_is_metric_mode
 
 
-def test_determine_mode_2():
+def test_determine_mode_2(mocker):
     def _validation_fn_with_error(dummy_model, dummy_dataset):
         raise RuntimeError
 
-    is_metric_mode = Evaluator.determine_mode(None, Dataset([None]), _validation_fn_with_error)
+    prepared_model = mocker.Mock()
+    prepared_model.model_for_inference = None
+
+    is_metric_mode = Evaluator.determine_mode(prepared_model, Dataset([None]), _validation_fn_with_error)
     assert not is_metric_mode
diff --git a/tests/common/requirements.txt b/tests/common/requirements.txt
index 1388c4ee806..1acffe3481f 100644
--- a/tests/common/requirements.txt
+++ b/tests/common/requirements.txt
@@ -1,3 +1,3 @@
-pytest
+pytest==7.4.4
 pytest-cov
 pytest-mock>=3.3.1
diff --git a/tests/cross_fw/examples/requirements.txt b/tests/cross_fw/examples/requirements.txt
index 9955deccd94..cdbff4d6aca 100644
--- a/tests/cross_fw/examples/requirements.txt
+++ b/tests/cross_fw/examples/requirements.txt
@@ -1,2 +1,2 @@
-pytest
+pytest==7.4.4
 pytest-cov
diff --git a/tests/cross_fw/install/requirements.txt b/tests/cross_fw/install/requirements.txt
index 8af77860ebe..0dcd9c22373 100644
--- a/tests/cross_fw/install/requirements.txt
+++ b/tests/cross_fw/install/requirements.txt
@@ -1,3 +1,3 @@
-pytest
+pytest==7.4.4
 pytest-cov
 virtualenv
diff --git a/tests/openvino/native/models.py b/tests/openvino/native/models.py
index 8a7f89f6970..a88ea72bf4a 100644
--- a/tests/openvino/native/models.py
+++ b/tests/openvino/native/models.py
@@ -885,3 +885,34 @@ def _create_ov_model(self):
 
         model = ov.Model([x], [inputs])
         return model
+
+
+class StatefulModel(OVReferenceModel):
+    """
+    Stateful model for testing.
+    Borrowed from https://github.com/openvinotoolkit/openvino/blob/0c552b7b152c341b5e545d131bd032fcb3cb6b86/src/bindings/python/tests/utils/helpers.py#L212
+    """
+
+    def __init__(self, stateful=True):
+        super().__init__(stateful=stateful)
+
+    def _create_ov_model(self, stateful=True):
+        input_shape = [1, 8]
+        data_type = np.float32
+        input_data = opset.parameter(input_shape, name="input_data", dtype=data_type)
+        init_val = opset.constant(np.zeros(input_shape), data_type)
+        if stateful:
+            rv = opset.read_value(init_val, "var_id_667", data_type, input_shape)
+            add = opset.add(rv, input_data, name="MemoryAdd")
+            node = opset.assign(add, "var_id_667")
+            result = opset.result(add, name="Result")
+            result.get_output_tensor(0).set_names(set(["Result"]))
+            model = ov.Model(results=[result], sinks=[node], parameters=[input_data], name="TestModel")
+        else:
+            bias = opset.constant(init_val, data_type)
+            add = opset.add(input_data, bias, name="Add")
+            result = opset.result(add, name="Result")
+            result.get_output_tensor(0).set_names(set(["Result"]))
+            model = ov.Model(results=[result], parameters=[input_data], name="TestModel")
+
+        return model
diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py
index ac958fa9b79..1a599c222f7 100644
--- a/tests/openvino/native/quantization/test_weights_compression.py
+++ b/tests/openvino/native/quantization/test_weights_compression.py
@@ -24,6 +24,7 @@
 from nncf.openvino.graph.node_utils import get_const_value
 from nncf.quantization import compress_weights
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
+from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
 from nncf.quantization.algorithms.weight_compression.mixed_precision import MIXED_PRECISION_CRITERIA
 from nncf.quantization.algorithms.weight_compression.weight_lowering import get_integer_quantization_error
 from nncf.quantization.algorithms.weight_compression.weight_lowering import reshape_weight_for_grouped_quantization
@@ -598,3 +599,9 @@ def test_call_max_var_criterion_with_dataset_by_default(mocker, mode):
     compress_weights(model, mode=mode, ratio=0.8, group_size=-1, dataset=dataset)
 
     scores_spy.assert_called()
+
+
+def test_data_type_for_num_weights(mocker):
+    stub = mocker.stub()
+    params = WeightCompressionParameters(stub, stub, stub, np.int32(1), stub)
+    assert isinstance(params.num_weights, np.uint64)
diff --git a/tests/openvino/native/test_engine.py b/tests/openvino/native/test_engine.py
index 85951117c94..cd1ba092124 100644
--- a/tests/openvino/native/test_engine.py
+++ b/tests/openvino/native/test_engine.py
@@ -16,6 +16,7 @@
 from tests.openvino.native.models import ConvModel
 from tests.openvino.native.models import LinearModel
 from tests.openvino.native.models import QuantizedModel
+from tests.openvino.native.models import StatefulModel
 
 
 def check_engine_creation_and_inference(model, input_data):
@@ -58,3 +59,21 @@ def test_infer_quantized_model_list():
     model = QuantizedModel().ov_model
     input_data = [np.random.rand(*inp.shape) for inp in model.get_parameters()]
     check_engine_creation_and_inference(model, input_data)
+
+
+@pytest.mark.parametrize("stateful", [True, False])
+def test_compiled_model_engine_inference_stateful(stateful):
+    model = StatefulModel(stateful).ov_model
+    input_data = [np.ones(inp.shape) for inp in model.get_parameters()]
+
+    engine = OVNativeEngine(model)
+
+    for _ in range(10):
+        engine.infer(input_data)
+
+    out = engine.infer(input_data)
+
+    input_data = input_data[0]
+    out = out["Result"]
+
+    assert np.array_equal(out[0], input_data[0])
diff --git a/tests/openvino/requirements.txt b/tests/openvino/requirements.txt
index 82ac48f5735..214b0366430 100644
--- a/tests/openvino/requirements.txt
+++ b/tests/openvino/requirements.txt
@@ -1,4 +1,4 @@
-pytest
+pytest==7.4.4
 virtualenv
 pytest-cov
 pytest-mock>=3.3.1
diff --git a/tests/post_training/requirements.txt b/tests/post_training/requirements.txt
index 77abed1ca82..c9bff02f625 100644
--- a/tests/post_training/requirements.txt
+++ b/tests/post_training/requirements.txt
@@ -5,7 +5,7 @@ torchvision==0.16.0
 transformers==4.36.0
 onnx==1.13.1
 onnxruntime==1.14.1
-pytest
+pytest==7.4.4
 pytest-cov
 openvino-dev==2023.3.0
 optimum[onnxruntime,openvino]==1.16.0
diff --git a/tests/tensorflow/requirements.txt b/tests/tensorflow/requirements.txt
index 3d113a8c59c..11c0a8932f0 100644
--- a/tests/tensorflow/requirements.txt
+++ b/tests/tensorflow/requirements.txt
@@ -1,6 +1,6 @@
 PyYAML
 tensorflow_addons~=0.20.0
-pytest
+pytest==7.4.4
 pytest-cov
 pytest-mock
 pytest-dependency
diff --git a/tests/torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__normalize__multi_input_branch.dot b/tests/torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__normalize__multi_input_branch.dot
index 079b8f9afe8..3b5e80a9758 100644
--- a/tests/torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__normalize__multi_input_branch.dot
+++ b/tests/torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__normalize__multi_input_branch.dot
@@ -1,27 +1,25 @@
 strict digraph  {
 "0 /nncf_model_input_0" [id=0, type=nncf_model_input];
-"1 ShiftScaleParametrized/is_floating_point_0" [id=1, type=is_floating_point];
-"2 ShiftScaleParametrized/clone_0" [id=2, type=clone];
-"3 ShiftScaleParametrized/sub__0" [id=3, type=sub_];
-"4 ShiftScaleParametrized/div__0" [id=4, type=div_];
-"5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" [id=5, type=symmetric_quantize];
-"6 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" [id=6, type=symmetric_quantize];
-"7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" [id=7, type=conv2d];
-"8 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_1" [id=8, type=symmetric_quantize];
-"9 ShiftScaleParametrized/NNCFConv2d[conv]/SymmetricQuantizer/symmetric_quantize_0" [id=9, type=symmetric_quantize];
-"10 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1" [id=10, type=conv2d];
-"11 /nncf_model_output_0" [id=11, type=nncf_model_output];
-"12 /nncf_model_output_1" [id=12, type=nncf_model_output];
-"0 /nncf_model_input_0" -> "1 ShiftScaleParametrized/is_floating_point_0";
-"0 /nncf_model_input_0" -> "2 ShiftScaleParametrized/clone_0";
-"0 /nncf_model_input_0" -> "9 ShiftScaleParametrized/NNCFConv2d[conv]/SymmetricQuantizer/symmetric_quantize_0";
-"2 ShiftScaleParametrized/clone_0" -> "3 ShiftScaleParametrized/sub__0";
-"3 ShiftScaleParametrized/sub__0" -> "4 ShiftScaleParametrized/div__0";
-"4 ShiftScaleParametrized/div__0" -> "5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0";
-"5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" -> "7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0";
-"6 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" -> "7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0";
-"7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" -> "11 /nncf_model_output_0";
-"8 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_1" -> "10 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1";
-"9 ShiftScaleParametrized/NNCFConv2d[conv]/SymmetricQuantizer/symmetric_quantize_0" -> "10 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1";
-"10 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1" -> "12 /nncf_model_output_1";
+"1 ShiftScaleParametrized/clone_0" [id=1, type=clone];
+"2 ShiftScaleParametrized/sub__0" [id=2, type=sub_];
+"3 ShiftScaleParametrized/div__0" [id=3, type=div_];
+"4 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" [id=4, type=symmetric_quantize];
+"5 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" [id=5, type=symmetric_quantize];
+"6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" [id=6, type=conv2d];
+"7 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_1" [id=7, type=symmetric_quantize];
+"8 ShiftScaleParametrized/NNCFConv2d[conv]/SymmetricQuantizer/symmetric_quantize_0" [id=8, type=symmetric_quantize];
+"9 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1" [id=9, type=conv2d];
+"10 /nncf_model_output_0" [id=10, type=nncf_model_output];
+"11 /nncf_model_output_1" [id=11, type=nncf_model_output];
+"0 /nncf_model_input_0" -> "1 ShiftScaleParametrized/clone_0";
+"0 /nncf_model_input_0" -> "8 ShiftScaleParametrized/NNCFConv2d[conv]/SymmetricQuantizer/symmetric_quantize_0";
+"1 ShiftScaleParametrized/clone_0" -> "2 ShiftScaleParametrized/sub__0";
+"2 ShiftScaleParametrized/sub__0" -> "3 ShiftScaleParametrized/div__0";
+"3 ShiftScaleParametrized/div__0" -> "4 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0";
+"4 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" -> "6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0";
+"5 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" -> "6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0";
+"6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" -> "10 /nncf_model_output_0";
+"7 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_1" -> "9 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1";
+"8 ShiftScaleParametrized/NNCFConv2d[conv]/SymmetricQuantizer/symmetric_quantize_0" -> "9 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1";
+"9 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1" -> "11 /nncf_model_output_1";
 }
diff --git a/tests/torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__normalize__single_input_branch.dot b/tests/torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__normalize__single_input_branch.dot
index 4d067597486..9eab740c541 100644
--- a/tests/torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__normalize__single_input_branch.dot
+++ b/tests/torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__normalize__single_input_branch.dot
@@ -1,19 +1,17 @@
 strict digraph  {
 "0 /nncf_model_input_0" [id=0, type=nncf_model_input];
-"1 ShiftScaleParametrized/is_floating_point_0" [id=1, type=is_floating_point];
-"2 ShiftScaleParametrized/clone_0" [id=2, type=clone];
-"3 ShiftScaleParametrized/sub__0" [id=3, type=sub_];
-"4 ShiftScaleParametrized/div__0" [id=4, type=div_];
-"5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" [id=5, type=symmetric_quantize];
-"6 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" [id=6, type=symmetric_quantize];
-"7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" [id=7, type=conv2d];
-"8 /nncf_model_output_0" [id=8, type=nncf_model_output];
-"0 /nncf_model_input_0" -> "1 ShiftScaleParametrized/is_floating_point_0";
-"0 /nncf_model_input_0" -> "2 ShiftScaleParametrized/clone_0";
-"2 ShiftScaleParametrized/clone_0" -> "3 ShiftScaleParametrized/sub__0";
-"3 ShiftScaleParametrized/sub__0" -> "4 ShiftScaleParametrized/div__0";
-"4 ShiftScaleParametrized/div__0" -> "5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0";
-"5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" -> "7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0";
-"6 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" -> "7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0";
-"7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" -> "8 /nncf_model_output_0";
+"1 ShiftScaleParametrized/clone_0" [id=1, type=clone];
+"2 ShiftScaleParametrized/sub__0" [id=2, type=sub_];
+"3 ShiftScaleParametrized/div__0" [id=3, type=div_];
+"4 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" [id=4, type=symmetric_quantize];
+"5 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" [id=5, type=symmetric_quantize];
+"6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" [id=6, type=conv2d];
+"7 /nncf_model_output_0" [id=7, type=nncf_model_output];
+"0 /nncf_model_input_0" -> "1 ShiftScaleParametrized/clone_0";
+"1 ShiftScaleParametrized/clone_0" -> "2 ShiftScaleParametrized/sub__0";
+"2 ShiftScaleParametrized/sub__0" -> "3 ShiftScaleParametrized/div__0";
+"3 ShiftScaleParametrized/div__0" -> "4 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0";
+"4 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" -> "6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0";
+"5 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" -> "6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0";
+"6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" -> "7 /nncf_model_output_0";
 }
diff --git a/tests/torch/models_hub_test/requirements.txt b/tests/torch/models_hub_test/requirements.txt
index ec4f8b0e29e..9e1ccf3e367 100644
--- a/tests/torch/models_hub_test/requirements.txt
+++ b/tests/torch/models_hub_test/requirements.txt
@@ -3,7 +3,7 @@ torch==2.1.0
 --extra-index-url https://download.pytorch.org/whl/cpu
 torchvision==0.16.0
 transformers==4.36.0
-pytest
+pytest==7.4.4
 timm==0.9.2
 scikit-learn==1.2.2
 av==11.0.0