Skip to content

Commit

Permalink
Data-free mixed precision algorithm for Torch and Torch FX backends (#…
Browse files Browse the repository at this point in the history
…3042)

### Changes

Data-free mixed precision algorithm for Torch and Torch FX backends

### Reason for changes

For more accurate weight compression

### Related tickets

ref: 153918

### Tests

NNCF/job/manual/job/post_training_weight_compression/228
  • Loading branch information
alexsu52 authored Oct 30, 2024
1 parent db3a935 commit 51a7fb6
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 38 deletions.
55 changes: 43 additions & 12 deletions nncf/quantization/algorithms/weight_compression/mixed_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,20 +102,13 @@ def apply(
weight_param.compression_config = self._primary_config
num_weights_in_4bit += weight_param.num_weights

@property
def available_backends(self) -> List[BackendType]:
return [BackendType.OPENVINO]

@abstractmethod
def _set_backend_entity(self, model: TModel) -> None:
model_backend = get_backend(model)
if model_backend == BackendType.OPENVINO:
from nncf.quantization.algorithms.weight_compression.openvino_backend import OVMixedPrecisionAlgoBackend
"""
Creates a helper class with a backed-specific logic of the algorithm.
self._backend_entity = OVMixedPrecisionAlgoBackend(model)
else:
raise nncf.UnsupportedBackendError(
"Cannot return backend-specific entity because {} is not supported!".format(model_backend.value)
)
:param model: Backend-specific input model.
"""

@abstractmethod
def get_statistic_points(
Expand All @@ -142,6 +135,29 @@ class DataFreeCriterion(MixedPrecisionCriterion):
A baseline mixed precision criterion that is based on quantization noise of weights only.
"""

@property
def available_backends(self) -> List[BackendType]:
return [BackendType.OPENVINO, BackendType.TORCH, BackendType.TORCH_FX]

def _set_backend_entity(self, model: TModel) -> None:
model_backend = get_backend(model)
if model_backend == BackendType.OPENVINO:
from nncf.quantization.algorithms.weight_compression.openvino_backend import OVWeightCompressionAlgoBackend

self._backend_entity = OVWeightCompressionAlgoBackend(model)
elif model_backend == BackendType.TORCH:
from nncf.quantization.algorithms.weight_compression.torch_backend import PTWeightCompressionAlgoBackend

self._backend_entity = PTWeightCompressionAlgoBackend()
elif model_backend == BackendType.TORCH_FX:
from nncf.quantization.algorithms.weight_compression.torch_fx_backend import FXWeightCompressionAlgoBackend

self._backend_entity = FXWeightCompressionAlgoBackend()
else:
raise nncf.UnsupportedBackendError(
"Cannot return backend-specific entity because {} is not supported!".format(model_backend.value)
)

def _calc_weight_sensitivity(
self,
weight_param: WeightCompressionParameters,
Expand Down Expand Up @@ -197,6 +213,21 @@ class DataBasedCriterion(DataFreeCriterion):

STAT_KEY = None

@property
def available_backends(self) -> List[BackendType]:
return [BackendType.OPENVINO]

def _set_backend_entity(self, model: TModel) -> None:
model_backend = get_backend(model)
if model_backend == BackendType.OPENVINO:
from nncf.quantization.algorithms.weight_compression.openvino_backend import OVMixedPrecisionAlgoBackend

self._backend_entity = OVMixedPrecisionAlgoBackend(model)
else:
raise nncf.UnsupportedBackendError(
"Cannot return backend-specific entity because {} is not supported!".format(model_backend.value)
)

def _calc_activation_sensitivity(
self,
weight_param: WeightCompressionParameters,
Expand Down
16 changes: 10 additions & 6 deletions nncf/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,6 @@ def compress_weights(
)

options = {
"sensitivity_metric": sensitivity_metric,
"awq": awq,
"scale_estimation": scale_estimation,
"gptq": gptq,
Expand All @@ -523,8 +522,11 @@ def compress_weights(
f"Torch backend does not support {', '.join(unsupported_options)} option(s). Set them to None."
)

if ratio is not None and ratio != 1:
raise nncf.ParameterNotSupportedError("Torch backend does not support ratio != 1.")
if sensitivity_metric not in [None, SensitivityMetric.WEIGHT_QUANTIZATION_ERROR]:
raise nncf.ParameterNotSupportedError(
"Torch backend only supports data-free sensitivity metric. "
"Set None or SensitivityMetric.WEIGHT_QUANTIZATION_ERROR."
)

if is_wrapped_model(model):
if not model.nncf.trace_parameters:
Expand Down Expand Up @@ -553,7 +555,6 @@ def compress_weights(
)

options = {
"sensitivity_metric": sensitivity_metric,
"awq": awq,
"scale_estimation": scale_estimation,
"gptq": gptq,
Expand All @@ -565,8 +566,11 @@ def compress_weights(
f"TorchFX backend does not support {', '.join(unsupported_options)} option(s). Set them to None."
)

if ratio is not None and ratio != 1:
raise nncf.ParameterNotSupportedError("TorchFX backend does not support ratio != 1.")
if sensitivity_metric not in [None, SensitivityMetric.WEIGHT_QUANTIZATION_ERROR]:
raise nncf.ParameterNotSupportedError(
"TorchFX backend only supports data-free sensitivity metric. "
"Set None or SensitivityMetric.WEIGHT_QUANTIZATION_ERROR."
)

if dataset:
raise nncf.ParameterNotSupportedError(
Expand Down
6 changes: 3 additions & 3 deletions tests/post_training/data/wc_reference_data_2024.5.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ tinyllama_NF4_scale_estimation_stateful_per_channel_backend_OV:
num_int4: 11
num_int8: 290
tinyllama_int4_data_free_backend_TORCH:
metric_value: 0.73541
num_int4: 308
num_int8: 4
metric_value: 0.73873
num_int4: 114
num_int8: 84
4 changes: 3 additions & 1 deletion tests/post_training/model_scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,8 +446,10 @@
"model_id": "tinyllama/tinyllama-1.1b-step-50k-105b",
"pipeline_cls": LMWeightCompression,
"compression_params": {
"mode": CompressWeightsMode.INT4_ASYM,
"group_size": 64,
"ratio": 0.8,
"mode": CompressWeightsMode.INT4_SYM,
"sensitivity_metric": SensitivityMetric.WEIGHT_QUANTIZATION_ERROR,
},
"backends": [BackendType.TORCH],
},
Expand Down
28 changes: 16 additions & 12 deletions tests/post_training/test_quantize_conformance.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,10 @@ def ref_data_correction(data: Dict, file_name: str):
with file_path.open() as f:
correction_data = yaml.safe_load(f)
for m_name, c_data in correction_data.items():
data[m_name].update(c_data)
if m_name in data:
data[m_name].update(c_data)
else:
data[m_name] = c_data
print(f"Applied correction file {file_path}")

return data
Expand All @@ -125,17 +128,18 @@ def fixture_wc_reference_data():
path_reference = DATA_ROOT / "wc_reference_data.yaml"
with path_reference.open() as f:
data = yaml.safe_load(f)
fp32_test_cases = defaultdict(dict)
for test_case_name in data:
if "atol" not in data[test_case_name]:
data[test_case_name]["atol"] = 1e-5
reported_name = test_case_name.split("_backend_")[0]
fp32_case_name = f"{reported_name}_backend_FP32"
fp32_test_cases[fp32_case_name]["metric_value"] = 1
if "atol" not in fp32_test_cases[fp32_case_name]:
fp32_test_cases[fp32_case_name]["atol"] = 1e-10
data.update(fp32_test_cases)
return ref_data_correction(data, "wc_reference_data")
data = ref_data_correction(data, "wc_reference_data")
fp32_test_cases = defaultdict(dict)
for test_case_name in data:
if "atol" not in data[test_case_name]:
data[test_case_name]["atol"] = 1e-5
reported_name = test_case_name.split("_backend_")[0]
fp32_case_name = f"{reported_name}_backend_FP32"
fp32_test_cases[fp32_case_name]["metric_value"] = 1
if "atol" not in fp32_test_cases[fp32_case_name]:
fp32_test_cases[fp32_case_name]["atol"] = 1e-10
data.update(fp32_test_cases)
return data


@pytest.fixture(scope="session", name="ptq_result_data")
Expand Down
4 changes: 2 additions & 2 deletions tests/torch/fx/test_compress_weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from nncf.quantization import compress_weights
from nncf.torch.dynamic_graph.patch_pytorch import disable_patching
from tests.torch.ptq.test_weights_compression import ALL_SENSITIVITY_METRICS
from tests.torch.ptq.test_weights_compression import DATA_BASED_SENSITIVITY_METRICS
from tests.torch.ptq.test_weights_compression import INT4_MODES
from tests.torch.ptq.test_weights_compression import INT8_MODES
from tests.torch.ptq.test_weights_compression import SUPPORTED_MODES
Expand Down Expand Up @@ -240,8 +241,7 @@ def test_raise_error_with_unsupported_params_for_int8(mode, params):
@pytest.mark.parametrize(
"params",
(
{"ratio": 0.5},
*({"sensitivity_metric": metric} for metric in ALL_SENSITIVITY_METRICS),
*({"sensitivity_metric": metric} for metric in DATA_BASED_SENSITIVITY_METRICS),
{"gptq": True},
{"awq": True},
{"scale_estimation": True},
Expand Down
3 changes: 1 addition & 2 deletions tests/torch/ptq/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,7 @@ def test_raise_error_with_unsupported_params_for_int8(mode, params):
@pytest.mark.parametrize(
"params",
(
{"ratio": 0.5},
*({"sensitivity_metric": metric} for metric in ALL_SENSITIVITY_METRICS),
*({"sensitivity_metric": metric} for metric in DATA_BASED_SENSITIVITY_METRICS),
{"gptq": True},
{"awq": True},
{"scale_estimation": True},
Expand Down

0 comments on commit 51a7fb6

Please sign in to comment.