Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WC] Align compression subgraphs for both weight input data types #2537

Merged
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def transform_model(
const_attributes = wc_params.node_with_weight.layer_attributes.constant_attributes[wc_params.weight_port_id]
const_node_name = const_attributes["name"]
const_node = self.name_to_node_mapping[const_node_name]
const_dtype = const_node.output(0).get_element_type().to_dtype()
const_dtype = const_node.output(0).get_element_type()

weight = Tensor(get_const_value(const_node))
original_shape = weight.shape
Expand All @@ -153,24 +153,28 @@ def transform_model(
compressed_const = opset.constant(
compressed_weight.tensor.data, dtype=compression_dtype, name=const_node_name
)
converted_const = opset.convert(compressed_const, const_dtype)
converted_const = opset.convert(compressed_const, ov.Type.f16)
if compressed_weight.zero_point is not None:
zero_point_const = opset.constant(
compressed_weight.zero_point.data,
dtype=compression_dtype,
name=f"{const_node_name}/zero_point",
)
converted_zero_point = opset.convert(zero_point_const, const_dtype)
converted_const = opset.subtract(converted_const, converted_zero_point)
converted_zero_point = opset.convert(zero_point_const, ov.Type.f16)
converted_const = opset.subtract(
converted_const, converted_zero_point, name=f"{const_node_name}/zero_point/subtract"
)

scale_const = opset.constant(compressed_weight.scale.data, dtype="float16", name=f"{const_node_name}/scale")
if const_dtype != "float16":
scale_const = opset.convert(scale_const, const_dtype, name=f"{const_node_name}/scale_convert")
scale_const = opset.constant(
compressed_weight.scale.data, dtype=ov.Type.f16, name=f"{const_node_name}/scale"
)
mul = opset.multiply(
converted_const,
scale_const,
name=f"{const_node_name}/fq_weights_{wc_params.weight_port_id}",
)
if const_dtype == ov.Type.f32:
mul = opset.convert(mul, ov.Type.f32, name=f"{mul.get_friendly_name()}/convert")

if compression_config.group_size != -1:
mul = opset.reshape(mul, output_shape=original_shape, special_zero=False)
Expand Down
11 changes: 7 additions & 4 deletions tests/openvino/native/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,16 +790,19 @@ def _create_ov_model(self):


class IdentityMatmul(OVReferenceModel):
def _create_ov_model(self, weights_dtype=None):
def _create_ov_model(self, weights_dtype=None, activation_dtype=None):
"""
:param: weights_dtype: precision of weights, should be either np.float32 or np.float16
:param: activation_dtype: precision of activations, should be either np.float32 or np.float16
"""
weights_dtype = np.float32 if weights_dtype is None else weights_dtype
input_node = opset.parameter([3, 3], name="Input_1")
activation_dtype = np.float32 if activation_dtype is None else activation_dtype

input_node = opset.parameter([3, 3], dtype=activation_dtype, name="Input_1")
weights_data = np.eye(3) * 255
current_weights = opset.constant(weights_data, dtype=weights_dtype, name="weights")
if weights_dtype != np.float32:
current_weights = opset.convert(current_weights, np.float32, name="weights/convert")
if weights_dtype != activation_dtype:
current_weights = opset.convert(current_weights, activation_dtype, name="weights/convert")
matmul_node = opset.matmul(input_node, current_weights, transpose_a=False, transpose_b=True, name="MatMul")
result = opset.result(matmul_node, name="Result")
result.get_output_tensor(0).set_names(set(["Result"]))
Expand Down
56 changes: 32 additions & 24 deletions tests/openvino/native/quantization/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,6 @@ def check_int8_node(op: ov.Node, mode: CompressWeightsMode = CompressWeightsMode
mul_node = get_next_node(sub_node)
assert mul_node.get_type_name() == "Multiply"
scale_node = mul_node.input_value(1).get_node()
if scale_node.get_type_name() == "Convert":
scale_node = scale_node.input_value(0).get_node()
scale = get_const_value(scale_node)

return {
Expand Down Expand Up @@ -134,11 +132,12 @@ def check_int4_grouped(op: ov.Node, mode: CompressWeightsMode, group_size: int =
mul_node = get_next_node(sub_node)
assert mul_node.get_type_name() == "Multiply"
scale_node = mul_node.input_value(1).get_node()
if scale_node.get_type_name() == "Convert":
scale_node = scale_node.input_value(0).get_node()
assert list(scale_node.shape) == reduced_weight_shape

reshape_node = get_next_node(mul_node)
convert_node = get_next_node(mul_node)
assert convert_node.get_type_name() == "Convert"

reshape_node = get_next_node(convert_node)
assert reshape_node.get_type_name() == "Reshape"

return {
Expand All @@ -160,11 +159,12 @@ def check_nf4_grouped(op: ov.Node, group_size: int = 7):
mul_node = get_next_node(convert_node)
assert mul_node.get_type_name() == "Multiply"
scale_node = mul_node.input_value(1).get_node()
if scale_node.get_type_name() == "Convert":
scale_node = scale_node.input_value(0).get_node()
assert list(scale_node.shape) == reduced_weight_shape

reshape_node = get_next_node(mul_node)
convert_node = get_next_node(mul_node)
assert convert_node.get_type_name() == "Convert"

reshape_node = get_next_node(convert_node)
assert reshape_node.get_type_name() == "Reshape"

return {
Expand Down Expand Up @@ -697,22 +697,30 @@ def test_data_type_for_num_weights(mocker):
assert isinstance(params.num_weights, np.uint64)


def test_weight_scale_datatype():
# When model weight is in fp32, there will be an extra convert node for weight scale f16 > f32
model_fp32 = IdentityMatmul(weights_dtype=np.float32).ov_model
compressed_model_fp32 = compress_weights(model_fp32)
name_to_node_map = {op.get_friendly_name(): op for op in compressed_model_fp32.get_ops()}
assert "weights/scale_convert" in name_to_node_map
scale_multiply_node = name_to_node_map["weights/fq_weights_1"]
assert scale_multiply_node.input_value(1).get_node().get_element_type() == ov.Type.f32

# When model weight is in fp16, there will be no extra convert node for weight scale
model_fp16 = IdentityMatmul(weights_dtype=np.float16).ov_model
compressed_model_fp16 = compress_weights(model_fp16)
name_to_node_map = {op.get_friendly_name(): op for op in compressed_model_fp16.get_ops()}
assert "weights/scale_convert" not in name_to_node_map
scale_multiply_node = name_to_node_map["weights/fq_weights_1"]
assert scale_multiply_node.input_value(1).get_node().get_element_type() == ov.Type.f16
def test_compression_for_different_dtypes():
for activation_dtype in [np.float32, np.float16]:
for weight_dtype in [np.float32, np.float16]:
if activation_dtype == np.float16 and weight_dtype == np.float32:
# Activations can be in f16 only if weights are in f16
continue

model = IdentityMatmul(weights_dtype=weight_dtype, activation_dtype=activation_dtype).ov_model
compressed_model = compress_weights(model)
name_to_node_map = {op.get_friendly_name(): op for op in compressed_model.get_ops()}

# Weight scale should be in fp16 nevertheless the weight data type
scale_multiply_node = name_to_node_map["weights/fq_weights_1"]
assert scale_multiply_node.input_value(1).get_node().get_element_type() == ov.Type.f16

next_node = get_next_node(scale_multiply_node)
if activation_dtype == np.float16:
# There should be no convert node after multiply if both weights and activations are in f16
assert next_node.get_type_name() != "Convert"
else:
assert next_node.get_type_name() == "Convert"
# In case weight is in fp32, the convert node is manually inserted
if weight_dtype == np.float32:
assert next_node.get_friendly_name() == "weights/fq_weights_1/convert"


DATASET_SIZE = 129
Expand Down
Loading