Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1) Fixed bug with clamp range in scale estimation. #2781

Merged
merged 3 commits into from
Jul 5, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from nncf.quantization.algorithms.weight_compression.awq_patterns import get_awq_patterns
from nncf.quantization.algorithms.weight_compression.backend import AWQAlgoBackend
from nncf.quantization.algorithms.weight_compression.backend import WeightCompressionAlgoBackend
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
from nncf.quantization.algorithms.weight_compression.weight_lowering import compress_weight
from nncf.tensor import Tensor
Expand Down Expand Up @@ -222,11 +223,9 @@ def dump_parameters(
dump_parameters(model, parameters, algo_name, path)

@staticmethod
def get_compress_decompress_pipeline(
weight_compression_parameter: WeightCompressionParameters, w_shape, s_shape, z_p_shape=None
):
def get_compress_decompress_pipeline(config: WeightCompressionConfig, w_shape, s_shape, z_p_shape=None):
parameters, clamp = OVWeightCompressionAlgoBackend.get_compress_pipeline(
weight_compression_parameter, w_shape, s_shape, z_p_shape, True
config, w_shape, s_shape, z_p_shape, True
)

if len(parameters) == 3:
Expand All @@ -243,16 +242,14 @@ def get_compress_decompress_pipeline(
return lambda parameters: compiled_model(parameters)[0]

@staticmethod
def get_compress_pipeline(
weight_compression_parameter: WeightCompressionParameters, w_shape, s_shape, z_p_shape=None, return_nodes=False
):
config = weight_compression_parameter.compression_config
def get_compress_pipeline(config: WeightCompressionConfig, w_shape, s_shape, z_p_shape=None, return_nodes=False):
mode = config.mode
assert mode in [CompressWeightsMode.INT4_SYM, CompressWeightsMode.INT4_ASYM]
num_bits = config.num_bits

level_low = 0
level_high = 2**num_bits - 1
asym_quant = mode in [CompressWeightsMode.INT4_ASYM]
level_low = 0 if asym_quant else -(2 ** (num_bits - 1))
level_high = 2**num_bits - 1 if asym_quant else 2 ** (num_bits - 1) - 1

w = opset.parameter(w_shape, name="w")
s = opset.parameter(s_shape, name="s")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,11 @@ def apply(
compress_model = compress_decompress_cache[key]["compress_model"]
else:
compress_decompress_model = self._backend_entity.get_compress_decompress_pipeline(
wp, q_weights.shape, scale.shape, zp_shape
wp.compression_config, q_weights.shape, scale.shape, zp_shape
)
compress_model = self._backend_entity.get_compress_pipeline(
wp.compression_config, q_weights.shape, scale.shape, zp_shape
)
compress_model = self._backend_entity.get_compress_pipeline(wp, q_weights.shape, scale.shape, zp_shape)
compress_decompress_cache[key] = {
"compress_decompress_model": compress_decompress_model,
"compress_model": compress_model,
Expand Down
31 changes: 31 additions & 0 deletions tests/openvino/native/quantization/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
from nncf.quantization.algorithms.weight_compression.mixed_precision import MIXED_PRECISION_CRITERIA
from nncf.quantization.algorithms.weight_compression.openvino_backend import OVWeightCompressionAlgoBackend
from nncf.quantization.algorithms.weight_compression.weight_lowering import do_dequantization
from nncf.quantization.algorithms.weight_compression.weight_lowering import do_integer_quantization
from nncf.quantization.algorithms.weight_compression.weight_lowering import get_integer_quantization_error
from nncf.quantization.algorithms.weight_compression.weight_lowering import reshape_weight_for_grouped_quantization
from nncf.scopes import IgnoredScope
Expand Down Expand Up @@ -906,3 +909,31 @@ def test_mixed_precision_e2m1(mode, all_layers, ratio, ref_ids):
}
ref_nf4_nodes = {f"weights_{i}" for i in ref_ids}
assert ref_nf4_nodes == names


@pytest.mark.parametrize("mode", (CompressWeightsMode.INT4_SYM, CompressWeightsMode.INT4_ASYM))
def test_np_ov_compression_decompression(mode):
sz = 8
w = np.arange(-sz, sz).reshape(2, sz).astype(np.float32)
w = Tensor(w)

config = WeightCompressionConfig(mode)

compressed_weighs, scale, zp = do_integer_quantization(w, -1, config)
decompressed_weighs = do_dequantization(compressed_weighs, scale, zp)

compressed_weighs = compressed_weighs.data
decompressed_weighs = decompressed_weighs.data
zp_shape = zp.shape if zp is not None else None

compress = OVWeightCompressionAlgoBackend.get_compress_pipeline(config, w.shape, scale.shape, zp_shape)
compress_decompress = OVWeightCompressionAlgoBackend.get_compress_decompress_pipeline(
config, w.shape, scale.shape, zp_shape
)

params = [w.data, scale.data, zp.data] if zp is not None else [w.data, scale.data]
compressed_weighs_ov = compress(params)
decompressed_weighs_ov = compress_decompress(params)

assert np.allclose(compressed_weighs, compressed_weighs_ov)
assert np.allclose(decompressed_weighs, decompressed_weighs_ov)
Loading