Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PTQ][OV] BF16 support #2307

Merged
merged 38 commits into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
26b6c73
Added BF16 & ov.Tensor support
KodiaqQ Dec 7, 2023
5f02d99
Add FQ params dtype conversion
KodiaqQ Dec 7, 2023
2bcfca9
Update tests for BF16
KodiaqQ Dec 7, 2023
65ce6dc
Fix tests
KodiaqQ Dec 7, 2023
4f91018
Fix bf16 tests
KodiaqQ Dec 7, 2023
69e2297
Added const with types
KodiaqQ Dec 8, 2023
de85c1d
Apply comment
KodiaqQ Dec 12, 2023
058a6e1
Disable tests
KodiaqQ Dec 12, 2023
12447d9
Added PrePostProcessor for FP32 outputs
KodiaqQ Dec 12, 2023
627ff67
Remove BF16 from testing
KodiaqQ Dec 13, 2023
6f011d9
Merge remote-tracking branch 'openvinotoolkit/develop' into nm/bf16_s…
KodiaqQ Jan 16, 2024
f3c8ed8
Adjust to develop
KodiaqQ Jan 16, 2024
c97c616
Adjust BF16 suport in tests
KodiaqQ Jan 16, 2024
ccd0b91
Added opset.constant with shared_memory option
KodiaqQ Jan 18, 2024
7f670a0
Merge remote-tracking branch 'openvinotoolkit/develop' into nm/bf16_s…
KodiaqQ Jan 22, 2024
91bb312
Added cast to fp32
KodiaqQ Jan 23, 2024
ff8f0ca
Merge openvinotoolkit/develop into nm/bf16_support
KodiaqQ Apr 17, 2024
ca6ff73
Removed PrePostProcessor usage
KodiaqQ Apr 17, 2024
11f4929
Adapt F-/BC algos to BF16
KodiaqQ Apr 18, 2024
c67ee84
Change get_const_value data output
KodiaqQ Apr 19, 2024
833f7c9
Merge remote-tracking branch 'openvinotoolkit/develop' into nm/bf16_s…
KodiaqQ May 27, 2024
097e938
Change get_const_value behavior
KodiaqQ May 28, 2024
106ff8a
Update implementation
KodiaqQ Jun 17, 2024
d2e5556
Merge remote-tracking branch 'openvinotoolkit/develop' into nm/bf16_s…
KodiaqQ Jun 17, 2024
5636d1a
Merge remote-tracking branch 'openvinotoolkit/develop' into nm/bf16_s…
KodiaqQ Jun 18, 2024
d2df92e
Fix pipeline tests
KodiaqQ Jun 18, 2024
4f9cd37
Tensor names set update
KodiaqQ Jun 18, 2024
87ea12e
Merge remote-tracking branch 'openvinotoolkit/develop' into nm/bf16_s…
KodiaqQ Jun 19, 2024
1cdc747
Extend OutputInsertionCommand
KodiaqQ Jun 19, 2024
7dfd1c1
Apply comments
KodiaqQ Jun 19, 2024
9471ac8
Merge remote-tracking branch 'openvinotoolkit/develop' into nm/bf16_s…
KodiaqQ Jun 20, 2024
8ffe8ae
Limit .get_data usage
KodiaqQ Jul 10, 2024
5405bc9
Merge remote-tracking branch 'openvinotoolkit/develop' into nm/bf16_s…
KodiaqQ Jul 10, 2024
5f4062b
Limit shared_memory usage
KodiaqQ Jul 10, 2024
cfa7ce9
Fix WC
KodiaqQ Jul 10, 2024
f2add1f
Fix test_get_const_value
KodiaqQ Jul 10, 2024
5725636
Apply comment
KodiaqQ Jul 11, 2024
3e531c4
Apply minor comments
KodiaqQ Jul 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions nncf/openvino/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(self, model: ov.CompiledModel):
self.input_tensor_names.update(model_input.get_names())

def _check_input_data_format(
self, input_data: Union[np.ndarray, List[np.ndarray], Tuple[np.ndarray], Dict[str, np.ndarray]]
self, input_data: Union[np.ndarray, List[np.ndarray], Tuple[np.ndarray], Dict[str, np.ndarray], ov.Tensor]
) -> None:
"""
Checks correspondence of the model input names and the passed data.
Expand All @@ -44,7 +44,7 @@ def _check_input_data_format(

:param input_data: Provided inputs to infer the model.
"""
actual_num_inputs = 1 if isinstance(input_data, np.ndarray) else len(input_data)
actual_num_inputs = 1 if isinstance(input_data, (np.ndarray, ov.Tensor)) else len(input_data)
if actual_num_inputs != self.number_of_inputs:
raise RuntimeError(f"Model expects {self.number_of_inputs} inputs, but {actual_num_inputs} are provided.")
if isinstance(input_data, dict):
Expand All @@ -53,8 +53,8 @@ def _check_input_data_format(
raise RuntimeError(f"Missing a required input: {name} to run the model.")

def infer(
self, input_data: Union[np.ndarray, List[np.ndarray], Tuple[np.ndarray], Dict[str, np.ndarray]]
) -> Dict[str, np.ndarray]:
self, input_data: Union[np.ndarray, List[np.ndarray], Tuple[np.ndarray], Dict[str, np.ndarray], ov.Tensor]
) -> Union[Dict[str, np.ndarray], ov.Tensor]:
andrey-churkin marked this conversation as resolved.
Show resolved Hide resolved
"""
Runs model on the provided input via OpenVINO Runtime.
Returns the dictionary of model outputs by node names.
Expand Down Expand Up @@ -90,8 +90,8 @@ def __init__(self, model: ov.Model, target_device: TargetDevice = TargetDevice.C
self.engine = OVCompiledModelEngine(compiled_model)

def infer(
self, input_data: Union[np.ndarray, List[np.ndarray], Tuple[np.ndarray], Dict[str, np.ndarray]]
) -> Dict[str, np.ndarray]:
self, input_data: Union[np.ndarray, List[np.ndarray], Tuple[np.ndarray], Dict[str, np.ndarray], ov.Tensor]
) -> Union[Dict[str, np.ndarray], ov.Tensor]:
"""
Runs model on the provided input via OpenVINO Runtime.
Returns the dictionary of model outputs by node names.
Expand Down
39 changes: 19 additions & 20 deletions nncf/openvino/graph/model_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,24 +235,21 @@ def _apply_quantizer_insertion_transformations(
return model

@staticmethod
def convert_params_to_fp16(
fq_params: FakeQuantizeParameters,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
def convert_params_to_operations(
fq_params: FakeQuantizeParameters, dtype: ov.Type
) -> Tuple[ov.Node, ov.Node, ov.Node, ov.Node]:
"""
Converts FakeQuantize parameters to FP16 precision.
Converts FakeQuantize parameters to operations with provided dtype.

:param fq_params: FakeQuantize node attributes.
:return: FakeQuantize parameters in FP16 precision.
:param dtype: Data type for operations.
:return: FakeQuantize parameters as operations.
"""

def _convert_to_fp16(data):
clip_data = np.clip(data, np.finfo(np.float16).min, np.finfo(np.float16).max)
return clip_data.astype(np.float16)

input_low = _convert_to_fp16(fq_params.input_low.data)
input_high = _convert_to_fp16(fq_params.input_high.data)
output_low = _convert_to_fp16(fq_params.output_low.data)
output_high = _convert_to_fp16(fq_params.output_high.data)
input_low = opset.constant(value=fq_params.input_low.data, dtype=dtype)
input_high = opset.constant(value=fq_params.input_high.data, dtype=dtype)
output_low = opset.constant(value=fq_params.output_low.data, dtype=dtype)
output_high = opset.constant(value=fq_params.output_high.data, dtype=dtype)
l-bat marked this conversation as resolved.
Show resolved Hide resolved
return input_low, input_high, output_low, output_high

@staticmethod
Expand Down Expand Up @@ -280,8 +277,9 @@ def _insert_fake_quantize_op(
inp_node = target_node.input(port_id)
input_node_output = inp_node.get_source_output()
data_type = inp_node.get_element_type()
if data_type == ov.Type(np.float16):
input_low, input_high, output_low, output_high = OVModelTransformer.convert_params_to_fp16(fq_params)
input_low, input_high, output_low, output_high = OVModelTransformer.convert_params_to_operations(
fq_params, data_type
)
name = "fq_weights" if transform_type == TargetType.OPERATION_WITH_WEIGHTS else "fq_input"
fq_name = f"{node_name}/{name}_{port_id}"

Expand All @@ -299,8 +297,9 @@ def _insert_fake_quantize_op(
elif transform_type == TargetType.POST_LAYER_OPERATION:
output = target_node.output(port_id)
data_type = output.get_element_type()
if data_type == ov.Type(np.float16):
input_low, input_high, output_low, output_high = OVModelTransformer.convert_params_to_fp16(fq_params)
input_low, input_high, output_low, output_high = OVModelTransformer.convert_params_to_operations(
fq_params, data_type
)
target_inputs = output.get_target_inputs()
fq_name = f"{node_name}/fq_output_{port_id}"
fq = opset.fake_quantize(output, input_low, input_high, output_low, output_high, levels, name=fq_name)
Expand Down Expand Up @@ -355,11 +354,11 @@ def _set_const_value(node_with_const: ov.Node, const_port_id: int, const_value:
raise RuntimeError("Constant node was expected but could not find it.")

const_shape = const_node.data.shape
const_dtype = const_node.data.dtype
const_value = np.reshape(const_value, const_shape).astype(const_dtype)
const_dtype = const_node.get_element_type()
const_tensor = ov.Tensor(const_value, const_shape, const_dtype)

# TODO(andrey-churkin): Replace on opset13.constant() in a future release
new_const_node = ov.op.Constant(const_value, shared_memory=True)
new_const_node = ov.op.Constant(const_tensor, shared_memory=True)
new_const_node.set_friendly_name(const_node.get_friendly_name())
const_port.replace_source_output(new_const_node.output(0))

Expand Down
1 change: 1 addition & 0 deletions nncf/openvino/graph/nncf_graph_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def convert_to_nncf_dtype(ov_dtype: str) -> Dtype:
"""
conversion_map = {
"f16": "float",
"bf16": "float",
"f32": "float",
"f64": "float",
"i4": "int",
Expand Down
3 changes: 2 additions & 1 deletion tests/openvino/native/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def get_dataset_for_test(model):
input_data = {}
for param in model.get_parameters():
input_shape = param.partial_shape.get_max_shape()
input_data[param.get_output_tensor(0).get_any_name()] = rng.uniform(0, 1, input_shape)
tensor = param.get_output_tensor(0)
input_data[tensor.get_any_name()] = rng.uniform(0, 1, input_shape).astype(tensor.get_element_type().to_dtype())

dataset = Dataset([input_data])
return dataset
Expand Down
14 changes: 7 additions & 7 deletions tests/openvino/native/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,21 +260,21 @@ def _create_ov_model(self):


class FPModel(OVReferenceModel):
def __init__(self, const_dtype="FP32", input_dtype="FP32"):
self.const_dtype = np.float32 if const_dtype == "FP32" else np.float16
self.input_dtype = np.float32 if input_dtype == "FP32" else np.float16
def __init__(self, const_dtype: ov.Type = ov.Type.f32, input_dtype: ov.Type = ov.Type.f32):
self.const_dtype = const_dtype
self.input_dtype = input_dtype
super().__init__()

def _create_ov_model(self):
input_shape = [1, 3, 4, 2]
input_1 = opset.parameter(input_shape, name="Input", dtype=self.input_dtype)
data = self._rng.random((1, 3, 4, 5)).astype(self.const_dtype)
data = opset.constant(value=self._rng.random((1, 3, 4, 5)), dtype=self.const_dtype)
if self.const_dtype != self.input_dtype:
data = opset.convert(data, self.input_dtype)
data = opset.convert(data, self.input_dtype.to_string())
matmul = opset.matmul(input_1, data, transpose_a=True, transpose_b=False, name="MatMul")
bias = self._rng.random((1, 3, 1, 1)).astype(self.const_dtype)
bias = opset.constant(value=self._rng.random((1, 3, 1, 1)), dtype=self.const_dtype)
if self.const_dtype != self.input_dtype:
bias = opset.convert(bias, self.input_dtype)
bias = opset.convert(bias, self.input_dtype.to_string())
add = opset.add(matmul, bias, name="Add")
result = opset.result(add, name="Result_Add")
result.get_output_tensor(0).set_names(set(["Result_Add"]))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

from pathlib import Path

import numpy as np
import openvino.runtime as ov
import pytest

Expand Down Expand Up @@ -172,8 +171,8 @@ def test_synthetic_models_fq_shapes(model_creator_func, ref_shapes, inplace_stat
assert node["output_high"].shape == ref_shapes[node_name]


@pytest.mark.parametrize("const_dtype", ["FP16", "FP32"])
@pytest.mark.parametrize("input_dtype", ["FP16", "FP32"])
@pytest.mark.parametrize("const_dtype", [ov.Type.f16, ov.Type.f32, ov.Type.bf16])
@pytest.mark.parametrize("input_dtype", [ov.Type.f16, ov.Type.f32, ov.Type.bf16])
def test_fq_precision_orig_fp32model(const_dtype, input_dtype, inplace_statistics):
model = FPModel(const_dtype, input_dtype)
quantized_model = quantize_model(
Expand All @@ -183,10 +182,10 @@ def test_fq_precision_orig_fp32model(const_dtype, input_dtype, inplace_statistic
if op.get_type_name() == "FakeQuantize":
inp_node = op.input(0)
fq_input_node = inp_node.get_source_output().get_node()
if fq_input_node.get_element_type() == "Constant":
assert op.get_element_type() == ov.Type(np.float32 if input_dtype == "FP32" else np.float16)
if fq_input_node.get_type_name() == "Constant":
assert op.get_element_type() == const_dtype
elif op.get_type_name() == "Convert":
inp_node = op.input(0)
fq_input_node = inp_node.get_source_output().get_node()
if fq_input_node.get_element_type() == "Constant":
assert op.get_element_type() == ov.Type(np.float32 if const_dtype == "FP32" else np.float16)
if fq_input_node.get_type_name() == "Constant":
assert op.get_element_type() == input_dtype
14 changes: 13 additions & 1 deletion tests/openvino/native/test_model_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,19 @@ def test_fq_insertion_weights(target_layers, ref_fq_names):
"refs": [2.0],
},
{
"model": FPModel(const_dtype="FP16").ov_model,
"model": FPModel(const_dtype=ov.Type.f16).ov_model,
"layers": ["MatMul"],
"values": [np.full((3,), 2)],
"refs": [2.0],
},
{
"model": FPModel(const_dtype=ov.Type.f16, input_dtype=ov.Type.f16).ov_model,
"layers": ["MatMul"],
"values": [np.full((3,), 2)],
"refs": [2.0],
},
{
"model": FPModel(const_dtype=ov.Type.bf16, input_dtype=ov.Type.bf16).ov_model,
"layers": ["MatMul"],
"values": [np.full((3,), 2)],
"refs": [2.0],
Expand Down
8 changes: 5 additions & 3 deletions tests/openvino/native/test_node_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# limitations under the License.

import numpy as np
import openvino.runtime as ov
import pytest
from openvino.runtime import opset9 as opset

Expand All @@ -30,13 +31,14 @@
from tests.openvino.native.models import MatMul2DNotBiasModel


def test_get_weight_value_const_with_convert():
model = FPModel(const_dtype="FP16").ov_model
@pytest.mark.parametrize("precision", [ov.Type.f16, ov.Type.bf16])
def test_get_weight_value_const_with_convert(precision):
model = FPModel(const_dtype=precision).ov_model
nncf_graph = NNCFGraphFactory.create(model)
node_with_weight = nncf_graph.get_node_by_name("MatMul")

actual_value = get_weight_value(node_with_weight, model, port_id=1)
assert actual_value.dtype == np.float16
assert actual_value.dtype == precision.to_dtype()


@pytest.mark.parametrize(
Expand Down