Skip to content

Commit

Permalink
updated tests
Browse files Browse the repository at this point in the history
  • Loading branch information
alexsu52 committed Oct 21, 2024
1 parent bd1a5ea commit b4d13b8
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 55 deletions.
37 changes: 26 additions & 11 deletions nncf/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,14 +473,21 @@ def compress_weights(
if mode in [CompressWeightsMode.NF4, CompressWeightsMode.E2M1]:
raise AttributeError("Torch backend does not support NF4 and E2M1 modes for weight compression.")

if True in [awq, scale_estimation, gptq, lora_correction]:
options = {
"sensitivity_metric": sensitivity_metric,
"awq": awq,
"scale_estimation": scale_estimation,
"gptq": gptq,
"lora_correction": lora_correction,
}
unsupported_options = [name for name, value in options.items() if value is not None]
if unsupported_options:
raise AttributeError(
"Torch backend does not support 'awq', 'scale_estimation', 'gptq' and 'lora_correction' options. "
"Set them to None."
f"Torch backend does not support {', '.join(unsupported_options)} option(s). Set them to None."
)

if backup_mode is not None:
raise AttributeError("Torch backend does not support backup_mode option.")
if ratio is not None and ratio != 1:
raise AttributeError("Torch backend does not support ratio != 1.")

if is_wrapped_model(model):
if not model.nncf.trace_parameters:
Expand All @@ -506,14 +513,22 @@ def compress_weights(
if mode in [CompressWeightsMode.NF4, CompressWeightsMode.E2M1]:
raise AttributeError("Torch backend does not support NF4 and E2M1 modes for weight compression.")

if backup_mode is not None:
raise AttributeError("TorchFX backend does not support backup_mode option.")

if any((awq, scale_estimation, gptq, lora_correction)):
options = {
"sensitivity_metric": sensitivity_metric,
"awq": awq,
"scale_estimation": scale_estimation,
"gptq": gptq,
"lora_correction": lora_correction,
}
unsupported_options = [name for name, value in options.items() if value is not None]
if unsupported_options:
raise AttributeError(
"TorchFX backend does not support 'awq', 'scale_estimation', 'gptq',"
"and 'lora_correction' options. Set them to None."
f"TorchFX backend does not support {', '.join(unsupported_options)} option(s). Set them to None."
)

if ratio is not None and ratio != 1:
raise AttributeError("TorchFX backend does not support ratio != 1.")

if dataset:
raise AttributeError(
"TorchFX only supports data-free weights compression," "Set the 'dataset' option to None"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@
import torch.utils
import torch.utils.data
import torchvision
from datasets import load_dataset
from optimum.exporters.openvino.convert import export_from_model
from optimum.intel.openvino import OVModelForCausalLM
from transformers import AutoModelForCausalLM

import nncf
from datasets import load_dataset
from nncf.experimental.torch.sparsify_activations import sparsify_activations
from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgoBackend
from nncf.experimental.torch.sparsify_activations.torch_backend import PTSparsifyActivationsAlgoBackend
from nncf.torch.quantization.layers import AsymmetricWeightsDecompressor
from nncf.torch.quantization.layers import SymmetricWeightsDecompressor
from nncf.torch.quantization.layers import INT8AsymmetricWeightsDecompressor
from nncf.torch.quantization.layers import INT8SymmetricWeightsDecompressor
from tests.post_training.pipelines.base import LIMIT_LENGTH_OF_STATUS
from tests.post_training.pipelines.base import PT_BACKENDS
from tests.post_training.pipelines.base import BackendType
Expand Down Expand Up @@ -267,7 +267,7 @@ def save_compressed_model(self):
if self.backend == BackendType.CUDA_TORCH:
self.model_hf.float()
for module in self.model_hf.nncf.modules():
if isinstance(module, (AsymmetricWeightsDecompressor, SymmetricWeightsDecompressor)):
if isinstance(module, (INT8AsymmetricWeightsDecompressor, INT8SymmetricWeightsDecompressor)):
module.result_dtype = torch.float32
export_from_model(
self.model_hf, self.output_model_dir, stateful=False, compression_option="fp32", device="cuda"
Expand Down
64 changes: 49 additions & 15 deletions tests/torch/fx/test_compress_weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from nncf.quantization import compress_weights
from nncf.torch.dynamic_graph.patch_pytorch import disable_patching
from tests.torch.ptq.test_weights_compression import ALL_SENSITIVITY_METRICS
from tests.torch.ptq.test_weights_compression import INT4_MODES
from tests.torch.ptq.test_weights_compression import INT8_MODES
from tests.torch.ptq.test_weights_compression import SUPPORTED_MODES
from tests.torch.ptq.test_weights_compression import UNSUPPORTED_MODES
from tests.torch.ptq.test_weights_compression import ConvolutionModel
Expand Down Expand Up @@ -76,10 +78,13 @@ def _capture_model(model, inputs):

@pytest.mark.parametrize("mode", SUPPORTED_MODES)
def test_compress_weights(mode):
model = ShortTransformer(5, 10)
input_ids = torch.randint(0, 10, (5,))
model = ShortTransformer(8, 16)
input_ids = torch.randint(0, 10, (8,))
exported_model = _capture_model(model, input_ids)
compressed_model = compress_weights(exported_model, mode=mode)
kwargs = {}
if mode in [CompressWeightsMode.INT4_SYM, CompressWeightsMode.INT4_ASYM]:
kwargs["group_size"] = 4
compressed_model = compress_weights(exported_model, mode=mode, **kwargs)
dtype = torch.int8 if mode == CompressWeightsMode.INT8_SYM else torch.uint8
n_compressed_weights = 0
n_target_modules = 0
Expand All @@ -91,7 +96,7 @@ def test_compress_weights(mode):
assert n_target_modules == n_compressed_weights


@pytest.mark.parametrize("mode", SUPPORTED_MODES)
@pytest.mark.parametrize("mode", INT8_MODES)
def test_compress_weights_graph_edge(mode):
model = ShortTransformer(5, 10)
input_ids = torch.randint(0, 10, (5,))
Expand All @@ -108,10 +113,13 @@ def test_compress_weights_graph_edge(mode):
@pytest.mark.parametrize("mode", SUPPORTED_MODES)
def test_compress_weights_shared_weights(mocker, mode):
with disable_patching():
model = ShortTransformer(5, 10, share_weights=True)
input_ids = torch.randint(0, 10, (5,))
model = ShortTransformer(8, 16, share_weights=True)
input_ids = torch.randint(0, 10, (8,))
exported_model = _capture_model(model, input_ids)
compressed_model = compress_weights(exported_model, mode=mode)
kwargs = {}
if mode in [CompressWeightsMode.INT4_SYM, CompressWeightsMode.INT4_ASYM]:
kwargs["group_size"] = 4
compressed_model = compress_weights(exported_model, mode=mode, **kwargs)
dtype = torch.int8 if mode == CompressWeightsMode.INT8_SYM else torch.uint8
n_compressed_weights = 0
n_target_modules = 0
Expand Down Expand Up @@ -141,11 +149,14 @@ def test_compress_weights_shared_weights(mocker, mode):
@pytest.mark.parametrize("mode", SUPPORTED_MODES)
def test_compressed_model_inference(mode):
torch.manual_seed(42)
model = ShortTransformer(5, 10, share_weights=True)
input_ids = torch.randint(0, 10, (5,))
model = ShortTransformer(8, 16, share_weights=True)
input_ids = torch.randint(0, 10, (8,))
exported_model = _capture_model(model, input_ids)
exported_model_output = exported_model(input_ids)
compressed_model = compress_weights(exported_model, mode=mode)
kwargs = {}
if mode in [CompressWeightsMode.INT4_SYM, CompressWeightsMode.INT4_ASYM]:
kwargs["group_size"] = 4
compressed_model = compress_weights(exported_model, mode=mode, **kwargs)
compressed_model_outputs = compressed_model(input_ids)

assert (
Expand All @@ -160,7 +171,7 @@ def test_compress_weights_model_size_conv(mode):
dtype = torch.int8 if mode == CompressWeightsMode.INT8_SYM else torch.uint8
model = ConvolutionModel()

input_ids = torch.randint(0, 10, [1, 3, 300, 300])
input_ids = torch.randint(0, 10, [1, 3, 256, 256])
exported_model = _capture_model(model, input_ids)
model_size = get_model_size(exported_model)
compressed_model = compress_weights(exported_model, mode=mode)
Expand All @@ -181,9 +192,11 @@ def test_compress_weights_model_size_conv(mode):
@pytest.mark.parametrize("mode", SUPPORTED_MODES)
def test_compress_weights_functional_model(mode):
model = FunctionalModel()
decompressor_type = "symmetric" if mode == CompressWeightsMode.INT8_SYM else "asymmetric"
decompressor_type = (
"symmetric" if mode in (CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT4_SYM) else "asymmetric"
)

input_ids = torch.randint(0, 10, [1, 3, 300, 300])
input_ids = torch.randint(0, 10, [1, 3, 256, 256])
exported_model = _capture_model(model, input_ids)
compressed_model = compress_weights(exported_model, mode=mode)

Expand All @@ -195,7 +208,7 @@ def test_compress_weights_functional_model(mode):
assert n_compressed_weights == 4


@pytest.mark.parametrize("mode", SUPPORTED_MODES)
@pytest.mark.parametrize("mode", INT8_MODES)
@pytest.mark.parametrize(
"params",
(
Expand All @@ -222,6 +235,27 @@ def test_raise_error_with_unsupported_params_for_int8(mode, params):
compress_weights(exported_model, mode=mode, **params)


@pytest.mark.parametrize("mode", INT4_MODES)
@pytest.mark.parametrize(
"params",
(
{"ratio": 0.5},
*({"sensitivity_metric": metric} for metric in ALL_SENSITIVITY_METRICS),
{"gptq": True},
{"awq": True},
{"scale_estimation": True},
{"lora_correction": True},
{"dataset": Dataset([1])},
),
)
def test_raise_error_with_unsupported_params_for_int8(mode, params):
dummy_torch_model = EmptyModel()
dummy_input = torch.Tensor()
exported_model = _capture_model(dummy_torch_model, dummy_input)
with pytest.raises(AttributeError):
compress_weights(exported_model, mode=mode, **params)


@pytest.mark.parametrize("mode", UNSUPPORTED_MODES)
def test_raise_error_with_not_int8(mode):
dummy_torch_model = EmptyModel()
Expand Down Expand Up @@ -251,7 +285,7 @@ def test_model_devices_and_precisions(use_cuda, dtype):
model = MatMulModel().to(device)
if dtype == torch.float16:
model.half()
dummy_input = torch.rand((1, 300), dtype=dtype, device=device)
dummy_input = torch.rand((1, 256), dtype=dtype, device=device)
exported_model = _capture_model(model, dummy_input)
compressed_model = compress_weights(exported_model)
result = compressed_model(dummy_input)
Expand Down
83 changes: 58 additions & 25 deletions tests/torch/ptq/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
from nncf import SensitivityMetric
from nncf.quantization import compress_weights
from nncf.torch import wrap_model
from nncf.torch.quantization.layers import AsymmetricWeightsDecompressor
from nncf.torch.quantization.layers import SymmetricWeightsDecompressor
from nncf.torch.quantization.layers import INT4AsymmetricWeightsDecompressor
from nncf.torch.quantization.layers import INT4SymmetricWeightsDecompressor
from nncf.torch.quantization.layers import INT8AsymmetricWeightsDecompressor
from nncf.torch.quantization.layers import INT8SymmetricWeightsDecompressor

DATA_BASED_SENSITIVITY_METRICS = (
SensitivityMetric.HESSIAN_INPUT_ACTIVATION,
Expand All @@ -30,12 +32,10 @@

ALL_SENSITIVITY_METRICS = DATA_BASED_SENSITIVITY_METRICS + (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR,)

SUPPORTED_MODES = (CompressWeightsMode.INT8, CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT8_SYM)
UNSUPPORTED_MODES = (
CompressWeightsMode.INT4_SYM,
CompressWeightsMode.INT4_ASYM,
CompressWeightsMode.NF4,
)
INT8_MODES = (CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT8_SYM)
INT4_MODES = (CompressWeightsMode.INT4_SYM, CompressWeightsMode.INT4_ASYM)
SUPPORTED_MODES = INT8_MODES + INT4_MODES
UNSUPPORTED_MODES = (CompressWeightsMode.NF4, CompressWeightsMode.E2M1)


class ShortTransformer(torch.nn.Module):
Expand All @@ -58,7 +58,7 @@ def forward(self, input_ids):
class MatMulModel(torch.nn.Module):
def __init__(self):
super().__init__()
self.w = torch.nn.Parameter(torch.ones(size=(300, 300), dtype=torch.float32))
self.w = torch.nn.Parameter(torch.ones(size=(256, 256), dtype=torch.float32))

def forward(self, input):
return input @ self.w
Expand All @@ -68,7 +68,7 @@ class FunctionalModel(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv_w = torch.nn.Parameter(torch.ones(size=(5, 3, 3, 3), dtype=torch.float32))
self.matmul_w = torch.nn.Parameter(torch.ones(size=(1, 3, 300, 300), dtype=torch.float32))
self.matmul_w = torch.nn.Parameter(torch.ones(size=(1, 3, 256, 256), dtype=torch.float32))
self.conv_tr_w = torch.nn.Parameter(torch.rand(size=(5, 4, 3, 3)))
self.nested_matmul = MatMulModel()

Expand Down Expand Up @@ -108,14 +108,18 @@ def forward(self, input_):
return x


@pytest.mark.parametrize("mode", (CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8_ASYM))
@pytest.mark.parametrize("mode", SUPPORTED_MODES)
def test_compress_weights(mode):
model = ShortTransformer(5, 10)
model = ShortTransformer(8, 16)
dtype = torch.int8 if mode == CompressWeightsMode.INT8_SYM else torch.uint8

input_ids = torch.randint(0, 10, (5,))
input_ids = torch.randint(0, 10, (8,))
wrapped_model = wrap_model(model, example_input=input_ids, trace_parameters=True)
compressed_model = compress_weights(wrapped_model, mode=mode)

kwargs = {}
if mode in [CompressWeightsMode.INT4_SYM, CompressWeightsMode.INT4_ASYM]:
kwargs["group_size"] = 4
compressed_model = compress_weights(wrapped_model, mode=mode, **kwargs)

n_compressed_weights = 0
n_target_modules = 0
Expand All @@ -129,14 +133,19 @@ def test_compress_weights(mode):
assert n_compressed_weights == n_target_modules


@pytest.mark.parametrize("mode", (CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8_ASYM))
@pytest.mark.parametrize("mode", SUPPORTED_MODES)
def test_compress_weights_functional_model(mode):
model = FunctionalModel()
decompressor_type = (
SymmetricWeightsDecompressor if mode == CompressWeightsMode.INT8_SYM else AsymmetricWeightsDecompressor
)
decompressor_map = {
CompressWeightsMode.INT8_SYM: (INT8SymmetricWeightsDecompressor,),
CompressWeightsMode.INT8_ASYM: (INT8AsymmetricWeightsDecompressor,),
CompressWeightsMode.INT4_SYM: (INT4SymmetricWeightsDecompressor, INT8AsymmetricWeightsDecompressor),
CompressWeightsMode.INT4_ASYM: (INT4AsymmetricWeightsDecompressor, INT8AsymmetricWeightsDecompressor),
}

input_ids = torch.randint(0, 10, [1, 3, 300, 300])
decompressor_type = decompressor_map[mode]

input_ids = torch.randint(0, 10, [1, 3, 256, 256])
wrapped_model = wrap_model(model, example_input=input_ids, trace_parameters=True)
compressed_model = compress_weights(wrapped_model, mode=mode)

Expand Down Expand Up @@ -166,14 +175,18 @@ def test_compress_weights_conv():
assert n_compressed_weights == n_target_modules


@pytest.mark.parametrize("mode", (CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8_ASYM))
@pytest.mark.parametrize("mode", SUPPORTED_MODES)
def test_compress_shared_weights(mocker, mode):
model = ShortTransformer(5, 10, share_weights=True)
model = ShortTransformer(8, 16, share_weights=True)
dtype = torch.int8 if mode == CompressWeightsMode.INT8_SYM else torch.uint8

input_ids = torch.randint(0, 10, (5,))
input_ids = torch.randint(0, 10, (8,))
wrapped_model = wrap_model(model, example_input=input_ids, trace_parameters=True)
compressed_model = compress_weights(wrapped_model, mode=mode)

kwargs = {}
if mode in [CompressWeightsMode.INT4_SYM, CompressWeightsMode.INT4_ASYM]:
kwargs["group_size"] = 4
compressed_model = compress_weights(wrapped_model, mode=mode, **kwargs)

n_compressed_weights = 0
n_target_modules = 0
Expand Down Expand Up @@ -202,7 +215,7 @@ def forward(self, input):
return input


@pytest.mark.parametrize("mode", SUPPORTED_MODES)
@pytest.mark.parametrize("mode", INT8_MODES)
@pytest.mark.parametrize(
"params",
(
Expand All @@ -228,6 +241,26 @@ def test_raise_error_with_unsupported_params_for_int8(mode, params):
compress_weights(wrapped_model, mode=mode, **params)


@pytest.mark.parametrize("mode", INT4_MODES)
@pytest.mark.parametrize(
"params",
(
{"ratio": 0.5},
*({"sensitivity_metric": metric} for metric in ALL_SENSITIVITY_METRICS),
{"gptq": True},
{"awq": True},
{"scale_estimation": True},
{"lora_correction": True},
),
)
def test_raise_error_with_unsupported_params_for_int4(mode, params):
dummy_torch_model = EmptyModel()
dummy_input = torch.Tensor()
wrapped_model = wrap_model(dummy_torch_model, example_input=dummy_input, trace_parameters=True)
with pytest.raises(AttributeError):
compress_weights(wrapped_model, mode=mode, **params)


@pytest.mark.parametrize("mode", UNSUPPORTED_MODES)
def test_raise_error_with_not_int8(mode):
dummy_torch_model = EmptyModel()
Expand Down Expand Up @@ -269,7 +302,7 @@ def test_model_devices_and_precisions(use_cuda, dtype):
if dtype == torch.float16:
model.half()

dummy_input = torch.rand((1, 300), dtype=dtype, device=device)
dummy_input = torch.rand((1, 256), dtype=dtype, device=device)
wrapped_model = wrap_model(model, example_input=dummy_input, trace_parameters=True)
compressed_model = compress_weights(wrapped_model)
result = compressed_model(dummy_input)
Expand Down

0 comments on commit b4d13b8

Please sign in to comment.