Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

E8M0 scale for E2M1 weights. #2767

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,12 @@ def transform_model(
) -> ov.Model:
for wc_params in weight_compression_parameters:
compression_config = wc_params.compression_config
scale_dtype = ov.Type.f16
if compression_config.mode == CompressWeightsMode.NF4:
compression_dtype = ov.Type.nf4
elif compression_config.mode == CompressWeightsMode.E2M1:
compression_dtype = ov.Type.f4e2m1
scale_dtype = ov.Type.f8e8m0
elif compression_config.mode == CompressWeightsMode.INT4_SYM:
compression_dtype = ov.Type.i4
elif compression_config.mode == CompressWeightsMode.INT4_ASYM:
Expand Down Expand Up @@ -190,8 +192,11 @@ def transform_model(
)

scale_const = opset.constant(
compressed_weight.scale.data, dtype=ov.Type.f16, name=f"{const_node_name}/scale"
compressed_weight.scale.data, dtype=scale_dtype, name=f"{const_node_name}/scale"
)
if scale_dtype != ov.Type.f16:
scale_const = opset.convert(scale_const, ov.Type.f16)

mul = opset.multiply(
converted_const,
scale_const,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ def calculate_e2m1_scale(weight: Tensor, reduction_axes: ReductionAxes, max_val=
"""
scale = calculate_nf4_scale(weight, reduction_axes) / max_val

scale = fns.log2(scale)
scale = fns.ceil(scale)
scale = fns.clip(scale, -127, 127)
scale = 2**scale

return scale


Expand Down
2 changes: 2 additions & 0 deletions nncf/tensor/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from nncf.tensor.functions.numeric import argsort as argsort
from nncf.tensor.functions.numeric import as_tensor_like as as_tensor_like
from nncf.tensor.functions.numeric import astype as astype
from nncf.tensor.functions.numeric import ceil as ceil
from nncf.tensor.functions.numeric import clip as clip
from nncf.tensor.functions.numeric import concatenate as concatenate
from nncf.tensor.functions.numeric import count_nonzero as count_nonzero
Expand All @@ -31,6 +32,7 @@
from nncf.tensor.functions.numeric import isclose as isclose
from nncf.tensor.functions.numeric import isempty as isempty
from nncf.tensor.functions.numeric import item as item
from nncf.tensor.functions.numeric import log2 as log2
from nncf.tensor.functions.numeric import logical_or as logical_or
from nncf.tensor.functions.numeric import masked_mean as masked_mean
from nncf.tensor.functions.numeric import masked_median as masked_median
Expand Down
24 changes: 24 additions & 0 deletions nncf/tensor/functions/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -858,3 +858,27 @@ def from_numpy(ndarray: np.ndarray, *, backend: TensorBackend) -> Tensor:
if backend == TensorBackend.numpy:
return Tensor(ndarray)
return Tensor(get_numeric_backend_fn("from_numpy", backend)(ndarray))


@functools.singledispatch
@tensor_guard
def log2(a: Tensor) -> Tensor:
"""
Base-2 logarithm of a.

:param a: The input tensor.
:return: A tensor containing the base-2 logarithm of each element in a.
"""
return Tensor(log2(a.data))


@functools.singledispatch
@tensor_guard
def ceil(a: Tensor) -> Tensor:
ljaljushkin marked this conversation as resolved.
Show resolved Hide resolved
"""
Return the ceiling of the input, element-wise.

:param a: Input data.
:return: An array of the same type as a, containing the ceiling values.
"""
return Tensor(ceil(a.data))
10 changes: 10 additions & 0 deletions nncf/tensor/functions/numpy_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,3 +405,13 @@ def arange(
if dtype is not None:
dtype = DTYPE_MAP[dtype]
return np.arange(start, end, step, dtype=dtype)


@register_numpy_types(numeric.log2)
def _(a: Union[np.ndarray, np.generic]) -> Union[np.ndarray, np.generic]:
return np.log2(a)


@register_numpy_types(numeric.ceil)
def _(a: Union[np.ndarray, np.generic]) -> np.ndarray:
return np.ceil(a)
10 changes: 10 additions & 0 deletions nncf/tensor/functions/torch_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,3 +438,13 @@ def arange(

def from_numpy(ndarray: np.ndarray) -> torch.Tensor:
return torch.from_numpy(ndarray)


@numeric.log2.register(torch.Tensor)
def _(a: torch.Tensor) -> torch.Tensor:
return torch.log2(a)


@numeric.ceil.register(torch.Tensor)
def _(a: torch.Tensor) -> torch.Tensor:
return torch.ceil(a)
12 changes: 9 additions & 3 deletions tests/openvino/native/quantization/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -901,8 +901,14 @@ def test_mixed_precision_e2m1(mode, all_layers, ratio, ref_ids):
sensitivity_metric=mode,
dataset=dataset,
)
names = {
names_e2m1 = {
op.get_friendly_name() for op in compressed_model.get_ordered_ops() if op.get_element_type() == ov.Type.f4e2m1
}
ref_nf4_nodes = {f"weights_{i}" for i in ref_ids}
assert ref_nf4_nodes == names
ref_e2m1_nodes = {f"weights_{i}" for i in ref_ids}
assert ref_e2m1_nodes == names_e2m1

names_e8m0 = {
op.get_friendly_name() for op in compressed_model.get_ordered_ops() if op.get_element_type() == ov.Type.f8e8m0
}
ref_e8m0_nodes = {f"weights_{i}/scale" for i in ref_ids}
assert ref_e8m0_nodes == names_e8m0
Loading