openvinotoolkit · alexsu52 · Sep 25, 2023 · Sep 1, 2023 · Sep 1, 2023 · Sep 4, 2023
diff --git a/nncf/quantization/advanced_parameters.py b/nncf/quantization/advanced_parameters.py
@@ -146,6 +146,8 @@ class AdvancedQuantizationParameters:
     :type bias_correction_params: nncf.quantization.advanced_parameters.AdvancedBiasCorrectionParameters
     :param backend_params: Backend-specific parameters.
     :type backend_params: Dict[str, Any]
+    :param intermediate_model_dir: Path to a directory for intermediated models saving.
+    :type intermediate_model_dir: str
     """
 
     # General parameters
@@ -170,6 +172,8 @@ class AdvancedQuantizationParameters:
     # Backend specific parameters
     backend_params: Dict[str, Any] = field(default_factory=dict)
 
+    intermediate_model_dir: Optional[str] = None
+
 
 @api()
 @dataclass

@@ -165,13 +165,16 @@ def __init__(
                 quantizer_group, preset, self._quantization_params[quantizer_group]
             )
 
+        self._reset_cache()
+        self._algorithm_key = f"MMQ_{hash(self)}"
+
+    def _reset_cache(self):
         # It prevents the duplicate weight quantizers from being added.
         # It can happen when you have layers that share the identical weight tensor.
         self._quantization_target_points_to_qconfig = (
             collections.OrderedDict()
         )  # type: OrderedDict[TargetPoint, QuantizerConfig]
         self._unified_scale_groups = []
-        self._algorithm_key = f"MMQ_{hash(self)}"
 
     @property
     def available_backends(self) -> Dict[str, BackendType]:
@@ -677,7 +680,7 @@ def filter_func(point: StatisticPoint) -> bool:
 
     def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer:
         self._set_backend_entity(model)
-
+        self._reset_cache()
         quantization_target_points, _ = self._get_quantization_target_points(model, graph)
         output = StatisticPointsContainer()
         for quantization_target_point, qconfig in quantization_target_points.items():

diff --git a/nncf/quantization/algorithms/post_training/algorithm.py b/nncf/quantization/algorithms/post_training/algorithm.py
@@ -10,7 +10,7 @@
 # limitations under the License.
 
 from dataclasses import dataclass
-from typing import Callable, Dict, List, Optional, TypeVar
+from typing import Callable, Dict, List, Optional, Tuple, TypeVar
 
 from nncf import Dataset
 from nncf.common.deprecation import warning_deprecated
@@ -88,12 +88,14 @@ def __init__(
         super().__init__()
         self.algorithms = []
         self.first_stage_algorithms: List[self.FirstStageAlgorithm] = []
+        self.subset_size = subset_size
 
         if target_device is TargetDevice.VPU:
             warning_deprecated("VPU device is deprecated and will no longer be supported in the future.")
 
         if advanced_parameters is None:
             advanced_parameters = AdvancedQuantizationParameters()
+        self.intermediate_model_dir = advanced_parameters.intermediate_model_dir
 
         if model_type == ModelType.TRANSFORMER:
             smooth_quant_algorithm = SmoothQuant(
@@ -163,6 +165,45 @@ def __init__(
     def available_backends(self) -> Dict[str, BackendType]:
         return
 
+    def _is_single_model(self, model: TModel) -> bool:
+        model_backend = get_backend(model)
+        if model_backend == BackendType.ONNX:
+            return True
+        elif model_backend == BackendType.OPENVINO:
+            from nncf.quantization.algorithms.post_training.openvino_backend import OVPostTrainingBackend
+
+            return OVPostTrainingBackend.is_single_model(model)
+        elif model_backend == BackendType.TORCH:
+            return True
+        else:
+            raise RuntimeError(
+                "Cannot return backend-specific entity because {} is not supported!".format(model_backend)
+            )
+
+    def _set_backend_entity(self, model: TModel) -> None:
+        """
+        Creates a helper class with a backed-specific logic of the algorithm
+
+        :param model: backend-specific input model
+        """
+        model_backend = get_backend(model)
+        if model_backend == BackendType.ONNX:
+            raise RuntimeError(
+                "Cannot return backend-specific entity because {} is not supported!".format(model_backend)
+            )
+        elif model_backend == BackendType.OPENVINO:
+            from nncf.quantization.algorithms.post_training.openvino_backend import OVPostTrainingBackend
+
+            self._backend_entity = OVPostTrainingBackend()
+        elif model_backend == BackendType.TORCH:
+            raise RuntimeError(
+                "Cannot return backend-specific entity because {} is not supported!".format(model_backend)
+            )
+        else:
+            raise RuntimeError(
+                "Cannot return backend-specific entity because {} is not supported!".format(model_backend)
+            )
+
     def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer:
         if self.first_stage_algorithms:
             raise NotImplementedError(
@@ -176,20 +217,16 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin
                     output.add_statistic_point(statistic_point)
         return output
 
-    def apply(
+    def _apply(
         self,
         model: TModel,
         graph: NNCFGraph,
         statistic_points: Optional[StatisticPointsContainer] = None,
         dataset: Optional[Dataset] = None,
     ) -> TModel:
-        modified_model = copy_model(model)
-        modified_model_graph = graph
-        backend = get_backend(modified_model)
-
         for first_stage_algorithm in self.first_stage_algorithms:
             algorithm = first_stage_algorithm.algorithm
-
+            backend = get_backend(model)
             if isinstance(algorithm, SmoothQuant) and backend != BackendType.OPENVINO:
                 nncf_logger.debug(f"{backend.name} does not support SmoothQuant algorithm yet.")
                 continue
@@ -199,31 +236,76 @@ def apply(
                 continue
 
             for pre_pass in first_stage_algorithm.pre_passes:
-                modified_model = pre_pass(modified_model, modified_model_graph)
-                modified_model_graph = NNCFGraphFactory.create(modified_model)
+                model = pre_pass(model, graph)
+                graph = NNCFGraphFactory.create(model)
 
-            statistics_aggregator = StatisticsAggregatorFactory.create(modified_model, dataset)
-            algo_statistic_points = algorithm.get_statistic_points(modified_model, modified_model_graph)
+            statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset)
+            algo_statistic_points = algorithm.get_statistic_points(model, graph)
             statistics_aggregator.register_statistic_points(algo_statistic_points)
-            statistics_aggregator.collect_statistics(modified_model, modified_model_graph)
-            modified_model = algorithm.apply(
-                modified_model, modified_model_graph, statistics_aggregator.statistic_points
-            )
-            modified_model_graph = NNCFGraphFactory.create(modified_model)
+            statistics_aggregator.collect_statistics(model, graph)
+            model = algorithm.apply(model, graph, statistics_aggregator.statistic_points)
+            model = NNCFGraphFactory.create(model)
 
         if statistic_points is None:
-            statistics_aggregator = StatisticsAggregatorFactory.create(modified_model, dataset)
+            statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset)
             for algorithm in self.algorithms:
-                algo_statistic_points = algorithm.get_statistic_points(modified_model, modified_model_graph)
+                algo_statistic_points = algorithm.get_statistic_points(model, graph)
                 statistics_aggregator.register_statistic_points(algo_statistic_points)
 
-            statistics_aggregator.collect_statistics(modified_model, modified_model_graph)
+            statistics_aggregator.collect_statistics(model, graph)
             statistic_points = statistics_aggregator.statistic_points
 
         for algorithm in self.algorithms[:-1]:
-            modified_model = algorithm.apply(modified_model, modified_model_graph, statistic_points)
-            modified_model_graph = NNCFGraphFactory.create(modified_model)
+            model = algorithm.apply(model, graph, statistic_points)
+            graph = NNCFGraphFactory.create(model)
         # building the model graph is not required after the last algorithm
-        modified_model = self.algorithms[-1].apply(modified_model, modified_model_graph, statistic_points)
+        model = self.algorithms[-1].apply(model, graph, statistic_points)
+
+        return model
+
+    def apply(
+        self,
+        model: TModel,
+        graph: NNCFGraph,
+        statistic_points: Optional[StatisticPointsContainer] = None,
+        dataset: Optional[Dataset] = None,
+    ) -> TModel:
+        model_copy = copy_model(model)
+        if self._is_single_model(model_copy):
+            return self._apply(model_copy, graph, statistic_points, dataset)
+        self._set_backend_entity(model)
+        nncf_logger.info("The model consists of child submodels. The iteratively each submodel will be quantized.")
+        quantized_model, _ = self._dfs_quantize_models(model_copy, graph, dataset, statistic_points, 0)
+        return quantized_model
+
+    def _dfs_quantize_models(
+        self,
+        parent_model: TModel,
+        parent_graph: NNCFGraph,
+        parent_dataset: Dataset,
+        parent_statistic_points: Optional[StatisticPointsContainer],
+        parent_model_cnt: int,
+    ) -> Tuple[TModel, int]:
+        if not self._backend_entity.is_single_model(parent_model):
+            parent_model_with_additional_outputs = self._backend_entity.add_additional_outputs(parent_model)
+            dataitems = self._backend_entity.collect_dataitems_for_children_models(
+                parent_model_with_additional_outputs, parent_dataset, self.subset_size, parent_model_cnt
+            )
+            global_model_cnt = parent_model_cnt
+            for child_model, backend_params in self._backend_entity.get_child_models(parent_model):
+                child_dataset = self._backend_entity.make_dataset_for_child_models(dataitems, **backend_params)
+
+                child_q_model, model_cnt = self._dfs_quantize_models(
+                    child_model, NNCFGraphFactory.create(child_model), child_dataset, None, global_model_cnt + 1
+                )
+                global_model_cnt = model_cnt
+
+                nncf_logger.info(f"Set quantized model number {model_cnt} to the original model")
+                self._backend_entity.set_child_model(child_q_model, **backend_params)
+                if self.intermediate_model_dir:
+                    nncf_logger.info(f"Save quantized model number {model_cnt} to dir {self.intermediate_model_dir}")
+                    self._backend_entity.dump_model(child_q_model, self.intermediate_model_dir, **backend_params)
 
-        return modified_model
+        nncf_logger.info(f"Quantize a model number {parent_model_cnt}")
+        quantized_model = self._apply(parent_model, parent_graph, parent_statistic_points, parent_dataset)
+        return quantized_model, parent_model_cnt
diff --git a/nncf/quantization/algorithms/post_training/backend.py b/nncf/quantization/algorithms/post_training/backend.py
@@ -0,0 +1,96 @@
+# Copyright (c) 2023 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from abc import ABC
+from abc import abstractmethod
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Tuple
+
+from nncf import Dataset
+from nncf.data.dataset import DataItem
+from nncf.quantization.algorithms.post_training.algorithm import TModel
+
+
+class PostTrainingBackend(ABC):
+    @staticmethod
+    @abstractmethod
+    def collect_dataitems_for_children_models(
+        model: TModel, calibration_dataset: Dataset, subset_size: int, model_cnt: int
+    ) -> Iterable[DataItem]:
+        """
+        Returns dataitems for children models of the main model.
+
+        :param model: Model to infer to collect dataitems.
+        :param calibration_dataset: Dataset is used to collect new dataitems.
+        :param subset_size: Size of dataitems to collect
+        :param model_cnt: Global model number.
+        """
+
+    @staticmethod
+    @abstractmethod
+    def make_dataset_for_child_models(dataitems: Iterable[DataItem], backend_params: Dict[str, Any]) -> Dataset:
+        """
+        Return dataset for child models.
+
+        :param dataitems: Data items to collect into dataset.
+        :param backend_params: Backend-specific parameters.
+        """
+
+    @staticmethod
+    @abstractmethod
+    def is_single_model(model: TModel) -> bool:
+        """
+        Chechks whether a model has inner subgraphs to quantize.
+
+        :param model: Model to check.
+        :return: True if the model has no inner subgraphs, otherwise - False.
+        """
+
+    @staticmethod
+    @abstractmethod
+    def get_child_models(model: TModel) -> List[Tuple[TModel, Dict[str, Any]]]:
+        """
+        Returns all child models of passed model.
+
+        :param model: Model to seek for child models.
+        :return: Models with backend specific parameters.
+        """
+
+    @staticmethod
+    @abstractmethod
+    def add_additional_outputs(model: TModel) -> TModel:
+        """
+        Returns the model with additional outputs to collect statistics for child models.
+
+        :param model: Model to update.
+        :return: Updated model with extra outputs.
+        """
+
+    @staticmethod
+    @abstractmethod
+    def dump_model(model: TModel, dir: Path, backend_params: Dict[str, Any]) -> None:
+        """
+        Save a model to a directory. Backend params are used to determine the model name to dump.
+
+        :param model: Model to dump.
+        :param dir: Directory path.
+        :param backend_params: Backend specific parameters.
+        """
+
+    @staticmethod
+    @abstractmethod
+    def set_child_model(child_model: TModel, backend_params: Dict[str, Any]) -> None:
+        """
+        Set subgraph model to an original model.
+
+        :param subgraph_model: Model to set.
+        :param backend_params: Backend specific parameters.
+        """