Skip to content

Commit

Permalink
[PTQ][OV] SQ for Convolutions (#2204)
Browse files Browse the repository at this point in the history
### Changes

- Updated SmoothQuant algorithm to work with Convolution layers;

### Reason for changes

- Better accuracy results in some cases;

### Related tickets

- 113591

### Tests

---------

Co-authored-by: Liubov Talamanova <[email protected]>
  • Loading branch information
KodiaqQ and l-bat authored Nov 7, 2023
1 parent e00f6b7 commit 074e749
Show file tree
Hide file tree
Showing 17 changed files with 3,011 additions and 2,811 deletions.
3 changes: 2 additions & 1 deletion nncf/common/graph/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,8 @@ def add_nncf_node(
:param layer_name: The name of the framework-specific "layer" object that houses the operation represented by
the node and associated trainable weights, if any.
:param ignored_algorithms: A list of compression algorithm names (from the same set of strings that are
specified in the `"algorithm": ...` section of the .json NNCF config) which should ignore this operation.
specified in the `"algorithm": ...` section of the .json NNCF config or `ptq_quantization`)
which should ignore this operation.
:param is_in_iteration_scope: Whether the node to be currently added corresponds to an iteration of an RNN
cycle (where the number of iterations is determined dynamically based on the RNN input shape).
:param is_integer_input: Only valid for input nodes - whether the input node corresponds to an integer input.
Expand Down
22 changes: 21 additions & 1 deletion nncf/openvino/graph/nncf_graph_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,27 @@ def _add_nncf_node(node: ov.Node, graph: NNCFGraph) -> None:
"""
node_type = node.get_type_name()
metatype = get_node_metatype(node)
graph.add_nncf_node(node_name=node.get_friendly_name(), node_type=node_type, node_metatype=metatype)
ignored_algorithms = GraphConverter._get_ignored_algorithms(node)
graph.add_nncf_node(
node_name=node.get_friendly_name(),
node_type=node_type,
node_metatype=metatype,
ignored_algorithms=ignored_algorithms,
)

@staticmethod
def _get_ignored_algorithms(node: ov.Node) -> List[str]:
"""
Creates a list of the ignored algorithms corresponding with
the ignored_algorithms option of add_nncf_node method.
:param node: OpenVINO node.
:return: List of the ignored algorithms.
"""
ignored_algorithms = []
if "nncf_smooth_quant" in node.get_friendly_name():
ignored_algorithms.append("ptq_quantization")
return ignored_algorithms

@staticmethod
def create_nncf_graph(model: ov.Model) -> NNCFGraph:
Expand Down
31 changes: 25 additions & 6 deletions nncf/openvino/pot/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from openvino._offline_transformations import compress_quantize_weights_transformation
from openvino.tools import pot

from nncf.common.deprecation import warning_deprecated
from nncf.common.logging import nncf_logger
from nncf.common.quantization.structs import QuantizationPreset
from nncf.data import Dataset
Expand Down Expand Up @@ -339,10 +340,19 @@ def quantize_impl(
if advanced_parameters is None:
advanced_parameters = AdvancedQuantizationParameters()

if model_type == ModelType.TRANSFORMER and advanced_parameters.smooth_quant_alpha > 0:
if advanced_parameters.smooth_quant_alpha is not None:
warning_deprecated(
"`AdvancedQuantizationParameters(smooth_quant_alpha=..)` is deprecated."
"Please, use `AdvancedQuantizationParameters(smooth_quant_alphas)` option "
"with AdvancedSmoothQuantParameters(convolution=.., matmul=..) as value instead."
)

sq_params = advanced_parameters.smooth_quant_alphas

if model_type == ModelType.TRANSFORMER and (sq_params.convolution > 0 or sq_params.matmul > 0):
nncf_logger.warning(
'IMPORTANT. The advanced parameter "smooth_quant_alpha > 0" IS NOT SUPPORTED for the POT backend!'
'Please, use "smooth_quant_alpha = -1".'
"IMPORTANT. The AdvancedSmoothQuantParameters parameter value > 0 IS NOT SUPPORTED for the POT backend!"
"Please, use `AdvancedSmoothQuantParameters(convolution = -1, matmul = -1)`."
)

algorithm_parameters = _create_quantization_config(
Expand Down Expand Up @@ -443,10 +453,19 @@ def quantize_with_accuracy_control_impl(
if advanced_quantization_parameters is None:
advanced_quantization_parameters = AdvancedQuantizationParameters()

if model_type == ModelType.TRANSFORMER and advanced_quantization_parameters.smooth_quant_alpha > 0:
if advanced_quantization_parameters.smooth_quant_alpha is not None:
warning_deprecated(
"`AdvancedQuantizationParameters(smooth_quant_alpha=..)` is deprecated."
"Please, use `AdvancedQuantizationParameters(smooth_quant_alphas)` option "
"with AdvancedSmoothQuantParameters(convolution=.., matmul=..) as value instead."
)

sq_params = advanced_quantization_parameters.smooth_quant_alphas

if model_type == ModelType.TRANSFORMER and (sq_params.convolution > 0 or sq_params.matmul > 0):
nncf_logger.warning(
'IMPORTANT. The advanced parameter "smooth_quant_alpha > 0" IS NOT SUPPORTED for the POT backend!'
'Please, use "smooth_quant_alpha = -1".'
"IMPORTANT. The AdvancedSmoothQuantParameters parameter value > 0 IS NOT SUPPORTED for the POT backend!"
"Please, use `AdvancedSmoothQuantParameters(convolution = -1, matmul = -1)`."
)

if advanced_quantization_parameters.disable_bias_correction:
Expand Down
36 changes: 31 additions & 5 deletions nncf/quantization/advanced_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,25 @@ class AdvancedBiasCorrectionParameters:
threshold: Optional[float] = None


@api()
@dataclass
class AdvancedSmoothQuantParameters:
"""
Contains advanced alpha parameters for SmoothQuant algorithm.
It regulates the calculation of the smooth scale for different node types.
A negative value switches off the algorithm for current node type. In case of inaccurate results,
this parameter may be adjusted in the range from 0 to 1 or set -1 to disable SmoothQuant algorithm.
:param convolution: Whether to apply smoothing for Convolution layers.
:type convolution: float
:param matmul: Whether to apply smoothing for MatMul layers.
:type matmul: float
"""

convolution: float = -1
matmul: float = 0.95


@api()
@dataclass
class AdvancedQuantizationParameters:
Expand All @@ -130,10 +149,6 @@ class AdvancedQuantizationParameters:
:type disable_channel_alignment: bool
:param disable_bias_correction: Whether to disable the bias correction.
:type disable_bias_correction: bool
:param smooth_quant_alpha: SmoothQuant-related parameter. It regulates the calculation of the smooth scale.
The default value is 0.95. A negative value switches off the algorithm. In case of inaccurate results,
this parameter may be adjusted in the range from 0 to 1 or set -1 to disable SmoothQuant algorithm.
:type smooth_quant_alpha: float
:param activations_quantization_params: Quantization parameters for activations.
:type activations_quantization_params: nncf.quantization.advanced_parameters.QuantizationParameters
:param weights_quantization_params: Quantization parameters for weights.
Expand All @@ -144,6 +159,13 @@ class AdvancedQuantizationParameters:
:type weights_range_estimator_params: nncf.quantization.range_estimator.RangeEstimatorParameters
:param bias_correction_params: Advanced bias correction parameters.
:type bias_correction_params: nncf.quantization.advanced_parameters.AdvancedBiasCorrectionParameters
:param smooth_quant_alphas: SmoothQuant-related parameters mapping.
It regulates the calculation of the smooth scale. The default value stored in AdvancedSmoothQuantParameters.
A negative value for each field switches off type smoothing. In case of inaccurate results,
fields may be adjusted in the range from 0 to 1 or set -1 to disable smoothing for type.
:type smooth_quant_alpha: AdvancedSmoothQuantParameters
:param smooth_quant_alpha: Deprecated SmoothQuant-related parameter.
:type smooth_quant_alpha: float
:param backend_params: Backend-specific parameters.
:type backend_params: Dict[str, Any]
"""
Expand All @@ -154,7 +176,6 @@ class AdvancedQuantizationParameters:
inplace_statistics: bool = True
disable_channel_alignment: bool = True
disable_bias_correction: bool = False
smooth_quant_alpha: float = 0.95

# Advanced Quantization parameters
activations_quantization_params: QuantizationParameters = field(default_factory=QuantizationParameters)
Expand All @@ -167,6 +188,11 @@ class AdvancedQuantizationParameters:
# Advanced BiasCorrection algorithm parameters
bias_correction_params: AdvancedBiasCorrectionParameters = field(default_factory=AdvancedBiasCorrectionParameters)

# Advanced SmoothQuant algorithm parameters
smooth_quant_alphas: AdvancedSmoothQuantParameters = field(default_factory=AdvancedSmoothQuantParameters)
# Deprecated parameter
smooth_quant_alpha: float = None

# Backend specific parameters
backend_params: Dict[str, Any] = field(default_factory=dict)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from nncf.common.quantization.structs import QuantizationPreset
from nncf.common.utils.backend import BackendType
from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters
from nncf.quantization.algorithms.bias_correction.algorithm import BiasCorrection
from nncf.quantization.algorithms.channel_alignment.algorithm import ChannelAlignment
from nncf.quantization.algorithms.fast_bias_correction.algorithm import FastBiasCorrection
Expand Down Expand Up @@ -79,7 +80,12 @@ def _get_minmax_quantization_param_grid() -> ParamGrid:


def _get_smooth_quant_param_grid() -> ParamGrid:
return {"advanced_parameters:smooth_quant_alpha": [0.15, 0.25, 0.5, 0.75, 0.95]}
alpha_values = [0.15, 0.25, 0.5, 0.75, 0.95]
return {
"advanced_parameters:smooth_quant_alphas": [
AdvancedSmoothQuantParameters(matmul=alpha_v) for alpha_v in itertools.product(alpha_values)
]
}


def _get_channel_alignment_param_grid() -> ParamGrid:
Expand Down
21 changes: 21 additions & 0 deletions nncf/quantization/algorithms/min_max/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,8 +339,16 @@ def _get_ignored_names(
ignored_names_by_layer_attributes = self._backend_entity.get_ignored_names_by_layer_attributes(
inference_nncf_graph
)

ignored_scope_by_algorithm = self._get_ignored_scope_by_algorithm(inference_nncf_graph)
ignored_names_by_algorithm = get_ignored_node_names_from_ignored_scope(
ignored_scope_by_algorithm, nncf_graph, strict=False
)

ignored_names.update({name: IgnoreReason.AUTOGENERATED for name in ignored_names_by_layer_attributes})

ignored_names.update({name: IgnoreReason.AUTOGENERATED for name in ignored_names_by_algorithm})

# User ignored scope has higher priority
ignored_names.update({name: IgnoreReason.USER_REQUESTED for name in user_ignored_names})

Expand All @@ -361,6 +369,19 @@ def _get_ignored_scope(self, inference_nncf_graph: NNCFGraph, ignored_patterns:

return IgnoredScope(names=nncf_node_names)

def _get_ignored_scope_by_algorithm(self, inference_nncf_graph: NNCFGraph) -> IgnoredScope:
"""
Returns IgnoredScope with node ignored_algorithms matched `quantization`.
:param inference_nncf_graph: Inference NNCFGraph instance.
:return: IgnoredScope with corresponded nodes.
"""
nncf_node_names = []
for nncf_node in inference_nncf_graph.get_all_nodes():
if "ptq_quantization" in nncf_node.ignored_algorithms:
nncf_node_names.append(nncf_node.node_name)
return IgnoredScope(names=nncf_node_names)

def _get_quantizer_setup(
self,
nncf_graph: NNCFGraph,
Expand Down
19 changes: 16 additions & 3 deletions nncf/quantization/algorithms/post_training/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,23 @@ def create_ptq_pipeline(

# Add the `SmoothQuant` algorithm as the first step of the pipeline.
# It is added only for `ModelType.TRANSFORMER`.
if model_type == ModelType.TRANSFORMER and advanced_parameters.smooth_quant_alpha >= 0:
pipeline_steps.append(
[SmoothQuant(subset_size, advanced_parameters.inplace_statistics, advanced_parameters.smooth_quant_alpha)]
sq_params = advanced_parameters.smooth_quant_alphas
sq_alpha = advanced_parameters.smooth_quant_alpha
if sq_alpha is not None:
warning_deprecated(
"`AdvancedQuantizationParameters(smooth_quant_alpha=..)` is deprecated."
"Please, use `AdvancedQuantizationParameters(smooth_quant_alphas)` option "
"with AdvancedSmoothQuantParameters(convolution=.., matmul=..) as value instead."
)
if sq_alpha < 0:
sq_params.convolution = -1
sq_params.matmul = -1
else:
sq_params.matmul = sq_alpha

if model_type == ModelType.TRANSFORMER and (sq_params.convolution >= 0 or sq_params.matmul >= 0):
alpha_map = {"convolution": sq_params.convolution, "matmul": sq_params.matmul}
pipeline_steps.append([SmoothQuant(subset_size, advanced_parameters.inplace_statistics, alpha_map=alpha_map)])

# Add the `ChannelAlignment` algorithm as the second step of the pipeline.
if not advanced_parameters.disable_channel_alignment:
Expand Down
Loading

0 comments on commit 074e749

Please sign in to comment.