[PTQ][OV] SQ for Convolutions (#2204)

### Changes - Updated SmoothQuant algorithm to work with Convolution layers; ### Reason for changes - Better accuracy results in some cases; ### Related tickets - 113591 ### Tests --------- Co-authored-by: Liubov Talamanova <[email protected]>
openvinotoolkit · Nov 7, 2023 · 074e749 · 074e749
1 parent e00f6b7
commit 074e749
Show file tree

Hide file tree

Showing 17 changed files with 3,011 additions and 2,811 deletions.
diff --git a/nncf/common/graph/graph.py b/nncf/common/graph/graph.py
@@ -434,7 +434,8 @@ def add_nncf_node(
         :param layer_name: The name of the framework-specific "layer" object that houses the operation represented by
             the node and associated trainable weights, if any.
         :param ignored_algorithms: A list of compression algorithm names (from the same set of strings that are
-            specified in the `"algorithm": ...` section of the .json NNCF config) which should ignore this operation.
+            specified in the `"algorithm": ...` section of the .json NNCF config or `ptq_quantization`)
+            which should ignore this operation.
         :param is_in_iteration_scope: Whether the node to be currently added corresponds to an iteration of an RNN
             cycle (where the number of iterations is determined dynamically based on the RNN input shape).
         :param is_integer_input: Only valid for input nodes - whether the input node corresponds to an integer input.

diff --git a/nncf/openvino/graph/nncf_graph_builder.py b/nncf/openvino/graph/nncf_graph_builder.py
@@ -123,7 +123,27 @@ def _add_nncf_node(node: ov.Node, graph: NNCFGraph) -> None:
         """
         node_type = node.get_type_name()
         metatype = get_node_metatype(node)
-        graph.add_nncf_node(node_name=node.get_friendly_name(), node_type=node_type, node_metatype=metatype)
+        ignored_algorithms = GraphConverter._get_ignored_algorithms(node)
+        graph.add_nncf_node(
+            node_name=node.get_friendly_name(),
+            node_type=node_type,
+            node_metatype=metatype,
+            ignored_algorithms=ignored_algorithms,
+        )
+
+    @staticmethod
+    def _get_ignored_algorithms(node: ov.Node) -> List[str]:
+        """
+        Creates a list of the ignored algorithms corresponding with
+        the ignored_algorithms option of add_nncf_node method.
+
+        :param node: OpenVINO node.
+        :return: List of the ignored algorithms.
+        """
+        ignored_algorithms = []
+        if "nncf_smooth_quant" in node.get_friendly_name():
+            ignored_algorithms.append("ptq_quantization")
+        return ignored_algorithms
 
     @staticmethod
     def create_nncf_graph(model: ov.Model) -> NNCFGraph:

diff --git a/nncf/openvino/pot/quantization/quantize_model.py b/nncf/openvino/pot/quantization/quantize_model.py
@@ -18,6 +18,7 @@
 from openvino._offline_transformations import compress_quantize_weights_transformation
 from openvino.tools import pot
 
+from nncf.common.deprecation import warning_deprecated
 from nncf.common.logging import nncf_logger
 from nncf.common.quantization.structs import QuantizationPreset
 from nncf.data import Dataset
@@ -339,10 +340,19 @@ def quantize_impl(
     if advanced_parameters is None:
         advanced_parameters = AdvancedQuantizationParameters()
 
-    if model_type == ModelType.TRANSFORMER and advanced_parameters.smooth_quant_alpha > 0:
+    if advanced_parameters.smooth_quant_alpha is not None:
+        warning_deprecated(
+            "`AdvancedQuantizationParameters(smooth_quant_alpha=..)` is deprecated."
+            "Please, use `AdvancedQuantizationParameters(smooth_quant_alphas)` option "
+            "with AdvancedSmoothQuantParameters(convolution=.., matmul=..) as value instead."
+        )
+
+    sq_params = advanced_parameters.smooth_quant_alphas
+
+    if model_type == ModelType.TRANSFORMER and (sq_params.convolution > 0 or sq_params.matmul > 0):
         nncf_logger.warning(
-            'IMPORTANT. The advanced parameter "smooth_quant_alpha > 0" IS NOT SUPPORTED for the POT backend!'
-            'Please, use "smooth_quant_alpha = -1".'
+            "IMPORTANT. The AdvancedSmoothQuantParameters parameter value > 0 IS NOT SUPPORTED for the POT backend!"
+            "Please, use `AdvancedSmoothQuantParameters(convolution = -1, matmul = -1)`."
         )
 
     algorithm_parameters = _create_quantization_config(
@@ -443,10 +453,19 @@ def quantize_with_accuracy_control_impl(
     if advanced_quantization_parameters is None:
         advanced_quantization_parameters = AdvancedQuantizationParameters()
 
-    if model_type == ModelType.TRANSFORMER and advanced_quantization_parameters.smooth_quant_alpha > 0:
+    if advanced_quantization_parameters.smooth_quant_alpha is not None:
+        warning_deprecated(
+            "`AdvancedQuantizationParameters(smooth_quant_alpha=..)` is deprecated."
+            "Please, use `AdvancedQuantizationParameters(smooth_quant_alphas)` option "
+            "with AdvancedSmoothQuantParameters(convolution=.., matmul=..) as value instead."
+        )
+
+    sq_params = advanced_quantization_parameters.smooth_quant_alphas
+
+    if model_type == ModelType.TRANSFORMER and (sq_params.convolution > 0 or sq_params.matmul > 0):
         nncf_logger.warning(
-            'IMPORTANT. The advanced parameter "smooth_quant_alpha > 0" IS NOT SUPPORTED for the POT backend!'
-            'Please, use "smooth_quant_alpha = -1".'
+            "IMPORTANT. The AdvancedSmoothQuantParameters parameter value > 0 IS NOT SUPPORTED for the POT backend!"
+            "Please, use `AdvancedSmoothQuantParameters(convolution = -1, matmul = -1)`."
         )
 
     if advanced_quantization_parameters.disable_bias_correction:

diff --git a/nncf/quantization/advanced_parameters.py b/nncf/quantization/advanced_parameters.py
@@ -111,6 +111,25 @@ class AdvancedBiasCorrectionParameters:
     threshold: Optional[float] = None
 
 
+@api()
+@dataclass
+class AdvancedSmoothQuantParameters:
+    """
+    Contains advanced alpha parameters for SmoothQuant algorithm.
+    It regulates the calculation of the smooth scale for different node types.
+    A negative value switches off the algorithm for current node type. In case of inaccurate results,
+    this parameter may be adjusted in the range from 0 to 1 or set -1 to disable SmoothQuant algorithm.
+
+    :param convolution: Whether to apply smoothing for Convolution layers.
+    :type convolution: float
+    :param matmul: Whether to apply smoothing for MatMul layers.
+    :type matmul: float
+    """
+
+    convolution: float = -1
+    matmul: float = 0.95
+
+
 @api()
 @dataclass
 class AdvancedQuantizationParameters:
@@ -130,10 +149,6 @@ class AdvancedQuantizationParameters:
     :type disable_channel_alignment: bool
     :param disable_bias_correction: Whether to disable the bias correction.
     :type disable_bias_correction: bool
-    :param smooth_quant_alpha: SmoothQuant-related parameter. It regulates the calculation of the smooth scale.
-        The default value is 0.95. A negative value switches off the algorithm. In case of inaccurate results,
-        this parameter may be adjusted in the range from 0 to 1 or set -1 to disable SmoothQuant algorithm.
-    :type smooth_quant_alpha: float
     :param activations_quantization_params: Quantization parameters for activations.
     :type activations_quantization_params: nncf.quantization.advanced_parameters.QuantizationParameters
     :param weights_quantization_params: Quantization parameters for weights.
@@ -144,6 +159,13 @@ class AdvancedQuantizationParameters:
     :type weights_range_estimator_params: nncf.quantization.range_estimator.RangeEstimatorParameters
     :param bias_correction_params: Advanced bias correction parameters.
     :type bias_correction_params: nncf.quantization.advanced_parameters.AdvancedBiasCorrectionParameters
+    :param smooth_quant_alphas: SmoothQuant-related parameters mapping.
+        It regulates the calculation of the smooth scale. The default value stored in AdvancedSmoothQuantParameters.
+        A negative value for each field switches off type smoothing. In case of inaccurate results,
+        fields may be adjusted in the range from 0 to 1 or set -1 to disable smoothing for type.
+    :type smooth_quant_alpha: AdvancedSmoothQuantParameters
+    :param smooth_quant_alpha: Deprecated SmoothQuant-related parameter.
+    :type smooth_quant_alpha: float
     :param backend_params: Backend-specific parameters.
     :type backend_params: Dict[str, Any]
     """
@@ -154,7 +176,6 @@ class AdvancedQuantizationParameters:
     inplace_statistics: bool = True
     disable_channel_alignment: bool = True
     disable_bias_correction: bool = False
-    smooth_quant_alpha: float = 0.95
 
     # Advanced Quantization parameters
     activations_quantization_params: QuantizationParameters = field(default_factory=QuantizationParameters)
@@ -167,6 +188,11 @@ class AdvancedQuantizationParameters:
     # Advanced BiasCorrection algorithm parameters
     bias_correction_params: AdvancedBiasCorrectionParameters = field(default_factory=AdvancedBiasCorrectionParameters)
 
+    # Advanced SmoothQuant algorithm parameters
+    smooth_quant_alphas: AdvancedSmoothQuantParameters = field(default_factory=AdvancedSmoothQuantParameters)
+    # Deprecated parameter
+    smooth_quant_alpha: float = None
+
     # Backend specific parameters
     backend_params: Dict[str, Any] = field(default_factory=dict)
 

diff --git a/nncf/quantization/algorithms/hyperparameter_tuner/param_grid.py b/nncf/quantization/algorithms/hyperparameter_tuner/param_grid.py
@@ -14,6 +14,7 @@
 
 from nncf.common.quantization.structs import QuantizationPreset
 from nncf.common.utils.backend import BackendType
+from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters
 from nncf.quantization.algorithms.bias_correction.algorithm import BiasCorrection
 from nncf.quantization.algorithms.channel_alignment.algorithm import ChannelAlignment
 from nncf.quantization.algorithms.fast_bias_correction.algorithm import FastBiasCorrection
@@ -79,7 +80,12 @@ def _get_minmax_quantization_param_grid() -> ParamGrid:
 
 
 def _get_smooth_quant_param_grid() -> ParamGrid:
-    return {"advanced_parameters:smooth_quant_alpha": [0.15, 0.25, 0.5, 0.75, 0.95]}
+    alpha_values = [0.15, 0.25, 0.5, 0.75, 0.95]
+    return {
+        "advanced_parameters:smooth_quant_alphas": [
+            AdvancedSmoothQuantParameters(matmul=alpha_v) for alpha_v in itertools.product(alpha_values)
+        ]
+    }
 
 
 def _get_channel_alignment_param_grid() -> ParamGrid:

diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py
@@ -339,8 +339,16 @@ def _get_ignored_names(
         ignored_names_by_layer_attributes = self._backend_entity.get_ignored_names_by_layer_attributes(
             inference_nncf_graph
         )
+
+        ignored_scope_by_algorithm = self._get_ignored_scope_by_algorithm(inference_nncf_graph)
+        ignored_names_by_algorithm = get_ignored_node_names_from_ignored_scope(
+            ignored_scope_by_algorithm, nncf_graph, strict=False
+        )
+
         ignored_names.update({name: IgnoreReason.AUTOGENERATED for name in ignored_names_by_layer_attributes})
 
+        ignored_names.update({name: IgnoreReason.AUTOGENERATED for name in ignored_names_by_algorithm})
+
         # User ignored scope has higher priority
         ignored_names.update({name: IgnoreReason.USER_REQUESTED for name in user_ignored_names})
 
@@ -361,6 +369,19 @@ def _get_ignored_scope(self, inference_nncf_graph: NNCFGraph, ignored_patterns:
 
         return IgnoredScope(names=nncf_node_names)
 
+    def _get_ignored_scope_by_algorithm(self, inference_nncf_graph: NNCFGraph) -> IgnoredScope:
+        """
+        Returns IgnoredScope with node ignored_algorithms matched `quantization`.
+
+        :param inference_nncf_graph: Inference NNCFGraph instance.
+        :return: IgnoredScope with corresponded nodes.
+        """
+        nncf_node_names = []
+        for nncf_node in inference_nncf_graph.get_all_nodes():
+            if "ptq_quantization" in nncf_node.ignored_algorithms:
+                nncf_node_names.append(nncf_node.node_name)
+        return IgnoredScope(names=nncf_node_names)
+
     def _get_quantizer_setup(
         self,
         nncf_graph: NNCFGraph,

diff --git a/nncf/quantization/algorithms/post_training/pipeline.py b/nncf/quantization/algorithms/post_training/pipeline.py
@@ -80,10 +80,23 @@ def create_ptq_pipeline(
 
     # Add the `SmoothQuant` algorithm as the first step of the pipeline.
     # It is added only for `ModelType.TRANSFORMER`.
-    if model_type == ModelType.TRANSFORMER and advanced_parameters.smooth_quant_alpha >= 0:
-        pipeline_steps.append(
-            [SmoothQuant(subset_size, advanced_parameters.inplace_statistics, advanced_parameters.smooth_quant_alpha)]
+    sq_params = advanced_parameters.smooth_quant_alphas
+    sq_alpha = advanced_parameters.smooth_quant_alpha
+    if sq_alpha is not None:
+        warning_deprecated(
+            "`AdvancedQuantizationParameters(smooth_quant_alpha=..)` is deprecated."
+            "Please, use `AdvancedQuantizationParameters(smooth_quant_alphas)` option "
+            "with AdvancedSmoothQuantParameters(convolution=.., matmul=..) as value instead."
         )
+        if sq_alpha < 0:
+            sq_params.convolution = -1
+            sq_params.matmul = -1
+        else:
+            sq_params.matmul = sq_alpha
+
+    if model_type == ModelType.TRANSFORMER and (sq_params.convolution >= 0 or sq_params.matmul >= 0):
+        alpha_map = {"convolution": sq_params.convolution, "matmul": sq_params.matmul}
+        pipeline_steps.append([SmoothQuant(subset_size, advanced_parameters.inplace_statistics, alpha_map=alpha_map)])
 
     # Add the `ChannelAlignment` algorithm as the second step of the pipeline.
     if not advanced_parameters.disable_channel_alignment: