diff --git a/src/plugins/intel_gpu/src/plugin/transformations/bcast_and_pad_zp_buffers.cpp b/src/plugins/intel_gpu/src/plugin/transformations/bcast_and_pad_zp_buffers.cpp index c35816b7d3febc..9e85fbbb535946 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/bcast_and_pad_zp_buffers.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/bcast_and_pad_zp_buffers.cpp @@ -75,34 +75,39 @@ bool all_same_value(const T* qp_ptr, size_t size) { }); } -std::shared_ptr scalar_parameter(std::shared_ptr qp) { +template +std::shared_ptr +create_scalar_constant(const std::shared_ptr& qp) { auto type = qp->get_element_type(); - size_t size = ov::shape_size(qp->get_shape()); - bool has_same_value = false; - switch (type) { - case ov::element::u8: - has_same_value = all_same_value(static_cast(qp->get_data_ptr()), size); - break; - case ov::element::i8: - has_same_value = all_same_value(static_cast(qp->get_data_ptr()), size); - break; - case ov::element::f16: - has_same_value = all_same_value(static_cast(qp->get_data_ptr()), size); - break; - case ov::element::f32: - has_same_value = all_same_value(static_cast(qp->get_data_ptr()), size); - break; - default: OPENVINO_THROW("[GPU] Can't pad quantization parameter for ", type, " element type"); + auto shape = qp->get_shape(); + if (all_same_value(static_cast(qp->get_data_ptr()), ov::shape_size(shape))) { + ov::Shape new_shape(shape.size(), 1); + ov::Tensor new_tensor(type, new_shape); + auto new_qp = std::make_shared(new_tensor); + auto val = qp->get_vector()[0]; + new_qp->fill_data(type, val); + return new_qp; } + return nullptr; +} - if (has_same_value) { - auto new_shape = qp->get_shape(); - std::fill(new_shape.begin(), new_shape.end(), 1); - ov::Tensor new_qp(type, new_shape); - return std::make_shared(new_qp); +std::shared_ptr scalar_parameter(std::shared_ptr qp) { + auto type = qp->get_element_type(); + std::shared_ptr new_qp = nullptr; + + if (type == ov::element::u8) { + new_qp = create_scalar_constant(qp); + } else if (type == ov::element::i8) { + new_qp = create_scalar_constant(qp); + } else if (type == ov::element::f16) { + new_qp = create_scalar_constant(qp); + } else if (type == ov::element::f32) { + new_qp = create_scalar_constant(qp); + } else { + OPENVINO_THROW("[GPU] Can't pad quantization parameter for ", type, " element type"); } - return nullptr; + return new_qp; } } // namespace diff --git a/src/plugins/intel_gpu/tests/unit/transformations/bcast_and_pad_zp_buffers_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/bcast_and_pad_zp_buffers_test.cpp index 5bea993237855d..218519f5bedbb1 100644 --- a/src/plugins/intel_gpu/tests/unit/transformations/bcast_and_pad_zp_buffers_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/transformations/bcast_and_pad_zp_buffers_test.cpp @@ -184,6 +184,61 @@ TEST_F(TransformationTestsF, BroadcastAndPadZeroPointBuffers_3) { } } +TEST_F(TransformationTestsF, BroadcastAndPadZeroPointBuffers_scalar_wzp) { + ov::Strides strides{1, 1}; + ov::Strides dilations{1, 1}; + ov::CoordinateDiff pads_begin{0, 0}; + ov::CoordinateDiff pads_end{0, 0}; + { + auto input = std::make_shared(ov::element::u8, ov::PartialShape{ 1, 8, 11, 12 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 8, 8, 3, 3 }, { 1 }); + auto no_bias = std::make_shared(); + auto azp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 1, 1, 1, 1 }, { 1 }); + auto wzp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 1, 8, 1, 1 }, { 12 }); + auto compensation = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 1, 8, 1, 1 }, { 1 }); + auto conv = std::make_shared(input, + weights_const, + no_bias, + azp_const, + wzp_const, + compensation, + strides, + pads_begin, + pads_end, + dilations, + -1, + ov::op::PadType::EXPLICIT, + ov::element::f32); + + model = std::make_shared(ov::NodeVector{ conv }, ov::ParameterVector{ input }); + manager.register_pass(8, true); + } + { + auto input = std::make_shared(ov::element::u8, ov::PartialShape{ 1, 8, 11, 12 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 8, 8, 3, 3 }, { 1 }); + auto no_bias = std::make_shared(); + auto azp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 1, 8, 1, 1 }, { 1 }); + auto wzp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 1, 1, 1, 1 }, { 12 }); + auto compensation = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 1, 8, 1, 1 }, { 1 }); + auto conv = std::make_shared(input, + weights_const, + no_bias, + azp_const, + wzp_const, + compensation, + strides, + pads_begin, + pads_end, + dilations, + -1, + ov::op::PadType::EXPLICIT, + ov::element::f32); + + model_ref = std::make_shared(ov::NodeVector{ conv }, ov::ParameterVector{ input }); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} + } // namespace intel_gpu } // namespace test } // namespace ov