Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PTQ][OV] SQ for Convolutions #2204

Merged
merged 24 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion nncf/openvino/graph/nncf_graph_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,27 @@ def _add_nncf_node(node: ov.Node, graph: NNCFGraph) -> None:
"""
node_type = node.get_type_name()
metatype = get_node_metatype(node)
graph.add_nncf_node(node_name=node.get_friendly_name(), node_type=node_type, node_metatype=metatype)
ignored_algorithms = GraphConverter._get_ignored_algorithms(node)
graph.add_nncf_node(
node_name=node.get_friendly_name(),
node_type=node_type,
node_metatype=metatype,
ignored_algorithms=ignored_algorithms,
)

@staticmethod
def _get_ignored_algorithms(node: ov.Node) -> List[str]:
"""
Creates a list of the ignored algorithms corresponding with
the ignored_algorithms option of add_nncf_node method.

:param node: OpenVINO node.
:return: List of the ignored algorithms.
"""
ignored_algorithms = []
if "nncf_smooth_quant" in node.get_friendly_name():
ignored_algorithms.append("quantization")
return ignored_algorithms

@staticmethod
def create_nncf_graph(model: ov.Model) -> NNCFGraph:
Expand Down
31 changes: 25 additions & 6 deletions nncf/openvino/pot/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from openvino._offline_transformations import compress_quantize_weights_transformation
from openvino.tools import pot

from nncf.common.deprecation import warning_deprecated
from nncf.common.logging import nncf_logger
from nncf.common.quantization.structs import QuantizationPreset
from nncf.data import Dataset
Expand Down Expand Up @@ -339,10 +340,19 @@ def quantize_impl(
if advanced_parameters is None:
advanced_parameters = AdvancedQuantizationParameters()

if model_type == ModelType.TRANSFORMER and advanced_parameters.smooth_quant_alpha > 0:
if advanced_parameters.smooth_quant_alpha is not None:
warning_deprecated(
"`AdvancedQuantizationParameters(smooth_quant_alpha=..)` is deprecated."
"Please, use `AdvancedQuantizationParameters(smooth_quant_alphas)` option "
"with AdvancedSmoothQuantParameters(convolution=.., matmul=..) as value instead."
)

sq_params = advanced_parameters.smooth_quant_alphas

if model_type == ModelType.TRANSFORMER and (sq_params.convolution > 0 or sq_params.matmul > 0):
nncf_logger.warning(
'IMPORTANT. The advanced parameter "smooth_quant_alpha > 0" IS NOT SUPPORTED for the POT backend!'
'Please, use "smooth_quant_alpha = -1".'
'IMPORTANT. The AdvancedSmoothQuant parameter value > 0" IS NOT SUPPORTED for the POT backend!'
KodiaqQ marked this conversation as resolved.
Show resolved Hide resolved
'Please, use "AdvancedSmoothQuantParameters(convolution = -1, matmul = -1)".'
)

algorithm_parameters = _create_quantization_config(
Expand Down Expand Up @@ -443,10 +453,19 @@ def quantize_with_accuracy_control_impl(
if advanced_quantization_parameters is None:
advanced_quantization_parameters = AdvancedQuantizationParameters()

if model_type == ModelType.TRANSFORMER and advanced_quantization_parameters.smooth_quant_alpha > 0:
if advanced_quantization_parameters.smooth_quant_alpha is not None:
warning_deprecated(
"`AdvancedQuantizationParameters(smooth_quant_alpha=..)` is deprecated."
"Please, use `AdvancedQuantizationParameters(smooth_quant_alphas)` option "
"with AdvancedSmoothQuantParameters(convolution=.., matmul=..) as value instead."
)

sq_params = advanced_quantization_parameters.smooth_quant_alphas

if model_type == ModelType.TRANSFORMER and (sq_params.convolution > 0 or sq_params.matmul > 0):
nncf_logger.warning(
'IMPORTANT. The advanced parameter "smooth_quant_alpha > 0" IS NOT SUPPORTED for the POT backend!'
'Please, use "smooth_quant_alpha = -1".'
"IMPORTANT. The AdvancedSmoothQuantParameters parameter value > 0 IS NOT SUPPORTED for the POT backend!"
'Please, use "AdvancedSmoothQuantParameters(convolution = -1, matmul = -1)".'
)

if advanced_quantization_parameters.disable_bias_correction:
Expand Down
36 changes: 31 additions & 5 deletions nncf/quantization/advanced_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,25 @@ class AdvancedBiasCorrectionParameters:
threshold: Optional[float] = None


@api()
@dataclass
class AdvancedSmoothQuantParameters:
"""
Contains advanced alpha parameters for SmoothQuant algorithm.
It regulates the calculation of the smooth scale for different node types.
A negative value switches off the algorithm for current node type. In case of inaccurate results,
this parameter may be adjusted in the range from 0 to 1 or set -1 to disable SmoothQuant algorithm.

:param convolution: Whether to apply smoothing for Convolution layers.
:type convolution: float
:param matmul: Whether to apply smoothing for MatMul layers.
:type matmul: float
"""

convolution: float = -1
matmul: float = 0.95
alexsu52 marked this conversation as resolved.
Show resolved Hide resolved


@api()
@dataclass
class AdvancedQuantizationParameters:
Expand All @@ -130,10 +149,6 @@ class AdvancedQuantizationParameters:
:type disable_channel_alignment: bool
:param disable_bias_correction: Whether to disable the bias correction.
:type disable_bias_correction: bool
:param smooth_quant_alpha: SmoothQuant-related parameter. It regulates the calculation of the smooth scale.
The default value is 0.95. A negative value switches off the algorithm. In case of inaccurate results,
this parameter may be adjusted in the range from 0 to 1 or set -1 to disable SmoothQuant algorithm.
:type smooth_quant_alpha: float
:param activations_quantization_params: Quantization parameters for activations.
:type activations_quantization_params: nncf.quantization.advanced_parameters.QuantizationParameters
:param weights_quantization_params: Quantization parameters for weights.
Expand All @@ -144,6 +159,13 @@ class AdvancedQuantizationParameters:
:type weights_range_estimator_params: nncf.quantization.range_estimator.RangeEstimatorParameters
:param bias_correction_params: Advanced bias correction parameters.
:type bias_correction_params: nncf.quantization.advanced_parameters.AdvancedBiasCorrectionParameters
:param smooth_quant_alphas: SmoothQuant-related parameters mapping.
It regulates the calculation of the smooth scale. The default value stored in AdvancedSmoothQuantParameters.
A negative value for each field switches off type smoothing. In case of inaccurate results,
fields may be adjusted in the range from 0 to 1 or set -1 to disable smoothing for type.
:type smooth_quant_alpha: AdvancedSmoothQuantParameters
:param smooth_quant_alpha: Deprecated SmoothQuant-related parameter.
:type smooth_quant_alpha: float
:param backend_params: Backend-specific parameters.
:type backend_params: Dict[str, Any]
"""
Expand All @@ -154,7 +176,6 @@ class AdvancedQuantizationParameters:
inplace_statistics: bool = True
disable_channel_alignment: bool = True
disable_bias_correction: bool = False
smooth_quant_alpha: float = 0.95
alexsu52 marked this conversation as resolved.
Show resolved Hide resolved

# Advanced Quantization parameters
activations_quantization_params: QuantizationParameters = field(default_factory=QuantizationParameters)
Expand All @@ -167,6 +188,11 @@ class AdvancedQuantizationParameters:
# Advanced BiasCorrection algorithm parameters
bias_correction_params: AdvancedBiasCorrectionParameters = field(default_factory=AdvancedBiasCorrectionParameters)

# Advanced SmoothQuant algorithm parameters
smooth_quant_alphas: AdvancedSmoothQuantParameters = field(default_factory=AdvancedSmoothQuantParameters)
# Deprecated parameter
smooth_quant_alpha: float = None

# Backend specific parameters
backend_params: Dict[str, Any] = field(default_factory=dict)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from nncf.common.quantization.structs import QuantizationPreset
from nncf.common.utils.backend import BackendType
from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters
from nncf.quantization.algorithms.bias_correction.algorithm import BiasCorrection
from nncf.quantization.algorithms.channel_alignment.algorithm import ChannelAlignment
from nncf.quantization.algorithms.fast_bias_correction.algorithm import FastBiasCorrection
Expand Down Expand Up @@ -79,7 +80,12 @@ def _get_minmax_quantization_param_grid() -> ParamGrid:


def _get_smooth_quant_param_grid() -> ParamGrid:
return {"advanced_parameters:smooth_quant_alpha": [0.15, 0.25, 0.5, 0.75, 0.95]}
alpha_values = [0.15, 0.25, 0.5, 0.75, 0.95]
l-bat marked this conversation as resolved.
Show resolved Hide resolved
return {
"advanced_parameters:smooth_quant_alphas": [
AdvancedSmoothQuantParameters(matmul=alpha_v) for alpha_v in itertools.product(alpha_values)
]
}


def _get_channel_alignment_param_grid() -> ParamGrid:
Expand Down
21 changes: 21 additions & 0 deletions nncf/quantization/algorithms/min_max/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,8 +339,16 @@ def _get_ignored_names(
ignored_names_by_layer_attributes = self._backend_entity.get_ignored_names_by_layer_attributes(
inference_nncf_graph
)

ignored_scope_by_algorithm = self._get_ignored_scope_by_algorithm(inference_nncf_graph)
ignored_names_by_algorithm = get_ignored_node_names_from_ignored_scope(
ignored_scope_by_algorithm, nncf_graph, strict=False
)

ignored_names.update({name: IgnoreReason.AUTOGENERATED for name in ignored_names_by_layer_attributes})

ignored_names.update({name: IgnoreReason.AUTOGENERATED for name in ignored_names_by_algorithm})

# User ignored scope has higher priority
ignored_names.update({name: IgnoreReason.USER_REQUESTED for name in user_ignored_names})

Expand All @@ -361,6 +369,19 @@ def _get_ignored_scope(self, inference_nncf_graph: NNCFGraph, ignored_patterns:

return IgnoredScope(names=nncf_node_names)

def _get_ignored_scope_by_algorithm(self, inference_nncf_graph: NNCFGraph) -> IgnoredScope:
"""
Returns IgnoredScope with node ignored_algorithms matched `quantization`.

:param inference_nncf_graph: Inference NNCFGraph instance.
:return: IgnoredScope with corresponded nodes.
"""
nncf_node_names = []
for nncf_node in inference_nncf_graph.get_all_nodes():
if "quantization" in nncf_node.ignored_algorithms:
nncf_node_names.append(nncf_node.node_name)
return IgnoredScope(names=nncf_node_names)

def _get_quantizer_setup(
self,
nncf_graph: NNCFGraph,
Expand Down
19 changes: 16 additions & 3 deletions nncf/quantization/algorithms/post_training/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,23 @@ def create_ptq_pipeline(

# Add the `SmoothQuant` algorithm as the first step of the pipeline.
# It is added only for `ModelType.TRANSFORMER`.
if model_type == ModelType.TRANSFORMER and advanced_parameters.smooth_quant_alpha >= 0:
pipeline_steps.append(
[SmoothQuant(subset_size, advanced_parameters.inplace_statistics, advanced_parameters.smooth_quant_alpha)]
sq_params = advanced_parameters.smooth_quant_alphas
sq_alpha = advanced_parameters.smooth_quant_alpha
if sq_alpha is not None:
warning_deprecated(
"`AdvancedQuantizationParameters(smooth_quant_alpha=..)` is deprecated."
"Please, use `AdvancedQuantizationParameters(smooth_quant_alphas)` option "
"with AdvancedSmoothQuantParameters(convolution=.., matmul=..) as value instead."
)
if sq_alpha < 0:
sq_params.convolution = -1
sq_params.matmul = -1
else:
sq_params.matmul = sq_alpha

if model_type == ModelType.TRANSFORMER and (sq_params.convolution >= 0 or sq_params.matmul >= 0):
alpha_map = {"convolution": sq_params.convolution, "matmul": sq_params.matmul}
pipeline_steps.append([SmoothQuant(subset_size, advanced_parameters.inplace_statistics, alpha_map=alpha_map)])

# Add the `ChannelAlignment` algorithm as the second step of the pipeline.
if not advanced_parameters.disable_channel_alignment:
Expand Down
Loading