diff --git a/nncf/quantization/algorithms/weight_compression/mixed_precision.py b/nncf/quantization/algorithms/weight_compression/mixed_precision.py index 53d44c97748..86ff7145a78 100644 --- a/nncf/quantization/algorithms/weight_compression/mixed_precision.py +++ b/nncf/quantization/algorithms/weight_compression/mixed_precision.py @@ -102,20 +102,13 @@ def apply( weight_param.compression_config = self._primary_config num_weights_in_4bit += weight_param.num_weights - @property - def available_backends(self) -> List[BackendType]: - return [BackendType.OPENVINO] - + @abstractmethod def _set_backend_entity(self, model: TModel) -> None: - model_backend = get_backend(model) - if model_backend == BackendType.OPENVINO: - from nncf.quantization.algorithms.weight_compression.openvino_backend import OVMixedPrecisionAlgoBackend + """ + Creates a helper class with a backed-specific logic of the algorithm. - self._backend_entity = OVMixedPrecisionAlgoBackend(model) - else: - raise nncf.UnsupportedBackendError( - "Cannot return backend-specific entity because {} is not supported!".format(model_backend.value) - ) + :param model: Backend-specific input model. + """ @abstractmethod def get_statistic_points( @@ -142,6 +135,29 @@ class DataFreeCriterion(MixedPrecisionCriterion): A baseline mixed precision criterion that is based on quantization noise of weights only. """ + @property + def available_backends(self) -> List[BackendType]: + return [BackendType.OPENVINO, BackendType.TORCH, BackendType.TORCH_FX] + + def _set_backend_entity(self, model: TModel) -> None: + model_backend = get_backend(model) + if model_backend == BackendType.OPENVINO: + from nncf.quantization.algorithms.weight_compression.openvino_backend import OVWeightCompressionAlgoBackend + + self._backend_entity = OVWeightCompressionAlgoBackend(model) + elif model_backend == BackendType.TORCH: + from nncf.quantization.algorithms.weight_compression.torch_backend import PTWeightCompressionAlgoBackend + + self._backend_entity = PTWeightCompressionAlgoBackend() + elif model_backend == BackendType.TORCH_FX: + from nncf.quantization.algorithms.weight_compression.torch_fx_backend import FXWeightCompressionAlgoBackend + + self._backend_entity = FXWeightCompressionAlgoBackend() + else: + raise nncf.UnsupportedBackendError( + "Cannot return backend-specific entity because {} is not supported!".format(model_backend.value) + ) + def _calc_weight_sensitivity( self, weight_param: WeightCompressionParameters, @@ -197,6 +213,21 @@ class DataBasedCriterion(DataFreeCriterion): STAT_KEY = None + @property + def available_backends(self) -> List[BackendType]: + return [BackendType.OPENVINO] + + def _set_backend_entity(self, model: TModel) -> None: + model_backend = get_backend(model) + if model_backend == BackendType.OPENVINO: + from nncf.quantization.algorithms.weight_compression.openvino_backend import OVMixedPrecisionAlgoBackend + + self._backend_entity = OVMixedPrecisionAlgoBackend(model) + else: + raise nncf.UnsupportedBackendError( + "Cannot return backend-specific entity because {} is not supported!".format(model_backend.value) + ) + def _calc_activation_sensitivity( self, weight_param: WeightCompressionParameters, diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index 52d5bf07edb..fd0548c99a7 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -511,7 +511,6 @@ def compress_weights( ) options = { - "sensitivity_metric": sensitivity_metric, "awq": awq, "scale_estimation": scale_estimation, "gptq": gptq, @@ -523,8 +522,11 @@ def compress_weights( f"Torch backend does not support {', '.join(unsupported_options)} option(s). Set them to None." ) - if ratio is not None and ratio != 1: - raise nncf.ParameterNotSupportedError("Torch backend does not support ratio != 1.") + if sensitivity_metric not in [None, SensitivityMetric.WEIGHT_QUANTIZATION_ERROR]: + raise nncf.ParameterNotSupportedError( + "Torch backend only supports data-free sensitivity metric. " + "Set None or SensitivityMetric.WEIGHT_QUANTIZATION_ERROR." + ) if is_wrapped_model(model): if not model.nncf.trace_parameters: @@ -553,7 +555,6 @@ def compress_weights( ) options = { - "sensitivity_metric": sensitivity_metric, "awq": awq, "scale_estimation": scale_estimation, "gptq": gptq, @@ -565,8 +566,11 @@ def compress_weights( f"TorchFX backend does not support {', '.join(unsupported_options)} option(s). Set them to None." ) - if ratio is not None and ratio != 1: - raise nncf.ParameterNotSupportedError("TorchFX backend does not support ratio != 1.") + if sensitivity_metric not in [None, SensitivityMetric.WEIGHT_QUANTIZATION_ERROR]: + raise nncf.ParameterNotSupportedError( + "TorchFX backend only supports data-free sensitivity metric. " + "Set None or SensitivityMetric.WEIGHT_QUANTIZATION_ERROR." + ) if dataset: raise nncf.ParameterNotSupportedError( diff --git a/tests/post_training/data/wc_reference_data_2024.5.yaml b/tests/post_training/data/wc_reference_data_2024.5.yaml index ee5b1ffdad4..bd263305a79 100644 --- a/tests/post_training/data/wc_reference_data_2024.5.yaml +++ b/tests/post_training/data/wc_reference_data_2024.5.yaml @@ -8,6 +8,6 @@ tinyllama_NF4_scale_estimation_stateful_per_channel_backend_OV: num_int4: 11 num_int8: 290 tinyllama_int4_data_free_backend_TORCH: - metric_value: 0.73541 - num_int4: 308 - num_int8: 4 + metric_value: 0.73873 + num_int4: 114 + num_int8: 84 diff --git a/tests/post_training/model_scope.py b/tests/post_training/model_scope.py index ed4b912a5dd..54b49d63a21 100644 --- a/tests/post_training/model_scope.py +++ b/tests/post_training/model_scope.py @@ -446,8 +446,10 @@ "model_id": "tinyllama/tinyllama-1.1b-step-50k-105b", "pipeline_cls": LMWeightCompression, "compression_params": { - "mode": CompressWeightsMode.INT4_ASYM, "group_size": 64, + "ratio": 0.8, + "mode": CompressWeightsMode.INT4_SYM, + "sensitivity_metric": SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, }, "backends": [BackendType.TORCH], }, diff --git a/tests/post_training/test_quantize_conformance.py b/tests/post_training/test_quantize_conformance.py index 2ea880fde31..20504a8b086 100644 --- a/tests/post_training/test_quantize_conformance.py +++ b/tests/post_training/test_quantize_conformance.py @@ -106,7 +106,10 @@ def ref_data_correction(data: Dict, file_name: str): with file_path.open() as f: correction_data = yaml.safe_load(f) for m_name, c_data in correction_data.items(): - data[m_name].update(c_data) + if m_name in data: + data[m_name].update(c_data) + else: + data[m_name] = c_data print(f"Applied correction file {file_path}") return data @@ -125,17 +128,18 @@ def fixture_wc_reference_data(): path_reference = DATA_ROOT / "wc_reference_data.yaml" with path_reference.open() as f: data = yaml.safe_load(f) - fp32_test_cases = defaultdict(dict) - for test_case_name in data: - if "atol" not in data[test_case_name]: - data[test_case_name]["atol"] = 1e-5 - reported_name = test_case_name.split("_backend_")[0] - fp32_case_name = f"{reported_name}_backend_FP32" - fp32_test_cases[fp32_case_name]["metric_value"] = 1 - if "atol" not in fp32_test_cases[fp32_case_name]: - fp32_test_cases[fp32_case_name]["atol"] = 1e-10 - data.update(fp32_test_cases) - return ref_data_correction(data, "wc_reference_data") + data = ref_data_correction(data, "wc_reference_data") + fp32_test_cases = defaultdict(dict) + for test_case_name in data: + if "atol" not in data[test_case_name]: + data[test_case_name]["atol"] = 1e-5 + reported_name = test_case_name.split("_backend_")[0] + fp32_case_name = f"{reported_name}_backend_FP32" + fp32_test_cases[fp32_case_name]["metric_value"] = 1 + if "atol" not in fp32_test_cases[fp32_case_name]: + fp32_test_cases[fp32_case_name]["atol"] = 1e-10 + data.update(fp32_test_cases) + return data @pytest.fixture(scope="session", name="ptq_result_data") diff --git a/tests/torch/fx/test_compress_weights.py b/tests/torch/fx/test_compress_weights.py index 519fcfd654e..835398bd57e 100644 --- a/tests/torch/fx/test_compress_weights.py +++ b/tests/torch/fx/test_compress_weights.py @@ -24,6 +24,7 @@ from nncf.quantization import compress_weights from nncf.torch.dynamic_graph.patch_pytorch import disable_patching from tests.torch.ptq.test_weights_compression import ALL_SENSITIVITY_METRICS +from tests.torch.ptq.test_weights_compression import DATA_BASED_SENSITIVITY_METRICS from tests.torch.ptq.test_weights_compression import INT4_MODES from tests.torch.ptq.test_weights_compression import INT8_MODES from tests.torch.ptq.test_weights_compression import SUPPORTED_MODES @@ -240,8 +241,7 @@ def test_raise_error_with_unsupported_params_for_int8(mode, params): @pytest.mark.parametrize( "params", ( - {"ratio": 0.5}, - *({"sensitivity_metric": metric} for metric in ALL_SENSITIVITY_METRICS), + *({"sensitivity_metric": metric} for metric in DATA_BASED_SENSITIVITY_METRICS), {"gptq": True}, {"awq": True}, {"scale_estimation": True}, diff --git a/tests/torch/ptq/test_weights_compression.py b/tests/torch/ptq/test_weights_compression.py index 88373ac308f..2e902e1af50 100644 --- a/tests/torch/ptq/test_weights_compression.py +++ b/tests/torch/ptq/test_weights_compression.py @@ -250,8 +250,7 @@ def test_raise_error_with_unsupported_params_for_int8(mode, params): @pytest.mark.parametrize( "params", ( - {"ratio": 0.5}, - *({"sensitivity_metric": metric} for metric in ALL_SENSITIVITY_METRICS), + *({"sensitivity_metric": metric} for metric in DATA_BASED_SENSITIVITY_METRICS), {"gptq": True}, {"awq": True}, {"scale_estimation": True},