diff --git a/nncf/experimental/common/quantization/algorithms/post_training/__init__.py b/nncf/experimental/quantization/algorithms/post_training/__init__.py similarity index 100% rename from nncf/experimental/common/quantization/algorithms/post_training/__init__.py rename to nncf/experimental/quantization/algorithms/post_training/__init__.py diff --git a/nncf/experimental/common/quantization/algorithms/post_training/algorithm.py b/nncf/experimental/quantization/algorithms/post_training/algorithm.py similarity index 95% rename from nncf/experimental/common/quantization/algorithms/post_training/algorithm.py rename to nncf/experimental/quantization/algorithms/post_training/algorithm.py index 2b61689c638..8e75037f0fb 100644 --- a/nncf/experimental/common/quantization/algorithms/post_training/algorithm.py +++ b/nncf/experimental/quantization/algorithms/post_training/algorithm.py @@ -16,8 +16,8 @@ from nncf.common.graph.graph import NNCFGraph from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType -from nncf.experimental.common.quantization.algorithms.post_training.pipeline import experimental_create_ptq_pipeline -from nncf.experimental.common.quantization.algorithms.quantizer.base_quantizer import NNCFQuantizer +from nncf.experimental.quantization.algorithms.post_training.pipeline import experimental_create_ptq_pipeline +from nncf.experimental.quantization.algorithms.quantizer.base_quantizer import Quantizer from nncf.quantization.advanced_parameters import AdvancedBiasCorrectionParameters from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters from nncf.quantization.advanced_parameters import RangeEstimatorParameters @@ -37,7 +37,7 @@ class ExperimentalPostTrainingQuantization(Algorithm): def __init__( self, - quantizer: NNCFQuantizer, + quantizer: Quantizer, subset_size: int = 300, fast_bias_correction: Optional[bool] = True, smooth_quant: bool = False, diff --git a/nncf/experimental/common/quantization/algorithms/post_training/pipeline.py b/nncf/experimental/quantization/algorithms/post_training/pipeline.py similarity index 95% rename from nncf/experimental/common/quantization/algorithms/post_training/pipeline.py rename to nncf/experimental/quantization/algorithms/post_training/pipeline.py index c7dbaf796c3..924f0951e1a 100644 --- a/nncf/experimental/common/quantization/algorithms/post_training/pipeline.py +++ b/nncf/experimental/quantization/algorithms/post_training/pipeline.py @@ -11,8 +11,8 @@ from typing import Optional, TypeVar -from nncf.experimental.common.quantization.algorithms.quantizer.base_quantizer import NNCFQuantizer -from nncf.experimental.common.quantization.algorithms.range_estimator.range_estimator import MinMaxRangeEstimator +from nncf.experimental.quantization.algorithms.quantizer.base_quantizer import Quantizer +from nncf.experimental.quantization.algorithms.range_estimator.algorithm import MinMaxRangeEstimator from nncf.quantization.advanced_parameters import AdvancedBiasCorrectionParameters from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters from nncf.quantization.advanced_parameters import RangeEstimatorParameters @@ -27,7 +27,7 @@ def experimental_create_ptq_pipeline( - quantizer: NNCFQuantizer, + quantizer: Quantizer, subset_size: int = 300, fast_bias_correction: Optional[bool] = True, smooth_quant: bool = False, diff --git a/nncf/experimental/quantization/algorithms/quantizer/__init__.py b/nncf/experimental/quantization/algorithms/quantizer/__init__.py new file mode 100644 index 00000000000..c647751028b --- /dev/null +++ b/nncf/experimental/quantization/algorithms/quantizer/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/nncf/experimental/common/quantization/algorithms/quantizer/base_quantizer.py b/nncf/experimental/quantization/algorithms/quantizer/base_quantizer.py similarity index 98% rename from nncf/experimental/common/quantization/algorithms/quantizer/base_quantizer.py rename to nncf/experimental/quantization/algorithms/quantizer/base_quantizer.py index f7a4d6018ba..5e12f605355 100644 --- a/nncf/experimental/common/quantization/algorithms/quantizer/base_quantizer.py +++ b/nncf/experimental/quantization/algorithms/quantizer/base_quantizer.py @@ -18,7 +18,7 @@ TModel = TypeVar("TModel") -class NNCFQuantizer: +class Quantizer: @abstractmethod def get_quantization_setup(self, model: TModel, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup: """ diff --git a/nncf/experimental/common/quantization/algorithms/quantizer/fx_quantizer.py b/nncf/experimental/quantization/algorithms/quantizer/fx_quantizer.py similarity index 97% rename from nncf/experimental/common/quantization/algorithms/quantizer/fx_quantizer.py rename to nncf/experimental/quantization/algorithms/quantizer/fx_quantizer.py index 33e0ef94a79..ed3e5a0e431 100644 --- a/nncf/experimental/common/quantization/algorithms/quantizer/fx_quantizer.py +++ b/nncf/experimental/quantization/algorithms/quantizer/fx_quantizer.py @@ -29,12 +29,12 @@ from nncf.common.quantization.quantizer_setup import WeightQuantizationInsertionPoint from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig -from nncf.experimental.common.quantization.algorithms.quantizer.base_quantizer import NNCFQuantizer +from nncf.experimental.quantization.algorithms.quantizer.base_quantizer import Quantizer EdgeOrNode = Union[Tuple[torch.fx.Node, torch.fx.Node]] -class NNCFFXQuantizer(NNCFQuantizer): +class NNCFFXQuantizer(Quantizer): def __init__(self, quantizer: Quantizer): self._quantizer = quantizer diff --git a/nncf/experimental/common/quantization/algorithms/range_estimator/range_estimator.py b/nncf/experimental/quantization/algorithms/range_estimator/algorithm.py similarity index 96% rename from nncf/experimental/common/quantization/algorithms/range_estimator/range_estimator.py rename to nncf/experimental/quantization/algorithms/range_estimator/algorithm.py index 1b5ad8c5692..8a31c88d44b 100644 --- a/nncf/experimental/common/quantization/algorithms/range_estimator/range_estimator.py +++ b/nncf/experimental/quantization/algorithms/range_estimator/algorithm.py @@ -15,7 +15,7 @@ from nncf.common.graph.graph import NNCFGraph from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType -from nncf.experimental.common.quantization.algorithms.quantizer.base_quantizer import NNCFQuantizer +from nncf.experimental.quantization.algorithms.quantizer.base_quantizer import Quantizer from nncf.quantization.algorithms.algorithm import Algorithm from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization from nncf.quantization.range_estimator import RangeEstimatorParameters @@ -26,7 +26,7 @@ class MinMaxRangeEstimator(Algorithm): def __init__( self, - quantizer: NNCFQuantizer, + quantizer: Quantizer, subset_size: int = 300, inplace_statistics: bool = True, batchwise_statistics: bool = False, diff --git a/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/nncf/experimental/torch/fx/quantization/quantize_pt2e.py index f6267ecfb69..64e036d8214 100644 --- a/nncf/experimental/torch/fx/quantization/quantize_pt2e.py +++ b/nncf/experimental/torch/fx/quantization/quantize_pt2e.py @@ -16,7 +16,6 @@ import torch.fx from torch.ao.quantization.pt2e.duplicate_dq_pass import DuplicateDQPass from torch.ao.quantization.pt2e.port_metadata_pass import PortNodeMetaForQDQ -from torch.ao.quantization.pt2e.qat_utils import _fold_conv_bn_qat from torch.ao.quantization.pt2e.utils import _disallow_eval_train from torch.ao.quantization.quantizer import Quantizer from torch.fx import GraphModule @@ -26,11 +25,9 @@ from nncf.common.factory import NNCFGraphFactory from nncf.common.logging import nncf_logger from nncf.data import Dataset -from nncf.experimental.common.quantization.algorithms.post_training.algorithm import ( - ExperimentalPostTrainingQuantization, -) -from nncf.experimental.common.quantization.algorithms.quantizer.base_quantizer import NNCFQuantizer -from nncf.experimental.common.quantization.algorithms.quantizer.fx_quantizer import NNCFFXQuantizer +from nncf.experimental.quantization.algorithms.post_training.algorithm import ExperimentalPostTrainingQuantization +from nncf.experimental.quantization.algorithms.quantizer.base_quantizer import Quantizer as NNCFQuantizer +from nncf.experimental.quantization.algorithms.quantizer.fx_quantizer import NNCFFXQuantizer from nncf.experimental.torch.fx.constant_folding import constant_fold from nncf.experimental.torch.fx.transformations import QUANTIZE_NODE_TARGETS from nncf.experimental.torch.fx.transformations import fuse_conv_bn @@ -50,7 +47,7 @@ def quantize_pt2e( smooth_quant_params: Optional[AdvancedSmoothQuantParameters] = None, activations_range_estimator_params: Optional[RangeEstimatorParameters] = None, weights_range_estimator_params: Optional[RangeEstimatorParameters] = None, - batchwise_statistics: bool = False, + batchwise_statistics: Optional[bool] = None, fold_quantize: bool = False, ) -> torch.fx.GraphModule: """ @@ -75,11 +72,7 @@ def quantize_pt2e( for each item of the batch or for the entire batch, default is False. :param fold_quantize: Boolean flag for whether fold the quantize op or not. """ - nncf_logger.warning( - "Experimental Torch FX quantization backend is being used for the given torch.fx.GraphModule model." - " Torch FX PTQ is an experimental feature, consider using Torch or OpenVino PTQ backends" - " in case of errors or a poor model performance." - ) + nncf_logger.warning("This is an experimental feature and may change in the future without notice.") if subset_size < 1: raise nncf.ValidationError("Subset size must be positive.") @@ -121,7 +114,6 @@ def quantize_pt2e( # is not preformant quantized_model = GraphModule(quantized_model, quantized_model.graph) - quantized_model = _fold_conv_bn_qat(quantized_model) if fold_quantize: constant_fold(quantized_model, _quant_node_constraint) diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index b6728b292bf..981fd7604bb 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -759,7 +759,7 @@ def fill_quantization_target_points( """ Initializes a cache, finds quantization target points and them puts in the cache. - :param model: Backend-specific model, for which Quantization Target Points are being seek. + :param quantizer_setup: Quantization Target Points in format of SingleConfigQuantizerSetup. :param nncf_graph: NNCFGraph instance. :return: Mapping of quantization target points with associated quantization configuration, along with target points for scale unification.