diff --git a/src/plugins/intel_gpu/include/intel_gpu/op/fully_connected.hpp b/src/plugins/intel_gpu/include/intel_gpu/op/fully_connected.hpp index a77c39c20338c0..66b97542520564 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/op/fully_connected.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/op/fully_connected.hpp @@ -19,6 +19,7 @@ class FullyConnected : public ov::op::Op { FullyConnected(const ov::Output& A, const ov::Output& B, + const ov::Output& bias, const ov::element::Type output_type = ov::element::undefined); bool visit_attributes(ov::AttributeVisitor &visitor) override; diff --git a/src/plugins/intel_gpu/include/intel_gpu/op/fully_connected_compressed.hpp b/src/plugins/intel_gpu/include/intel_gpu/op/fully_connected_compressed.hpp index 6835088eb88967..7e63a523660817 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/op/fully_connected_compressed.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/op/fully_connected_compressed.hpp @@ -18,12 +18,14 @@ class FullyConnectedCompressed : public FullyConnected { FullyConnectedCompressed(const ov::Output &A, const ov::Output &B, + const ov::Output &bias, const ov::Output &decompression_scale, const ov::Output &decompression_zero_point, const ov::element::Type output_type = ov::element::undefined); FullyConnectedCompressed(const ov::Output &A, const ov::Output &B, + const ov::Output &bias, const ov::Output &decompression_scale, const ov::element::Type output_type = ov::element::undefined); diff --git a/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp b/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp index 8a628809266d43..5b1b8f353863cd 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp @@ -26,14 +26,15 @@ namespace ov { namespace intel_gpu { static void CreateFullyConnectedCompressedOp(ProgramBuilder& p, const std::shared_ptr& op) { - validate_inputs_count(op, {3, 4}); + validate_inputs_count(op, {4, 5}); auto inputs = p.GetInputInfo(op); std::string primitive_name = layer_type_name_ID(op); auto input_name = inputs[0].pid; auto weights_name = inputs[1].pid; - auto scale_name = inputs[2].pid; - auto zp_name = inputs.size() == 4 ? inputs[3].pid : ""; + auto bias_name = inputs[2].pid; + auto scale_name = inputs[3].pid; + auto zp_name = inputs.size() == 5 ? inputs[4].pid : ""; float zp_value = 0.0f; bool has_scalar_zp = false; @@ -47,7 +48,7 @@ static void CreateFullyConnectedCompressedOp(ProgramBuilder& p, const std::share auto fc = cldnn::fully_connected(primitive_name, cldnn::input_info(input_name), weights_name, - "", + bias_name, scale_name, has_scalar_zp ? "" : zp_name, cldnn::element_type_to_data_type(op->get_output_element_type(0)), @@ -63,12 +64,13 @@ static void CreateFullyConnectedCompressedOp(ProgramBuilder& p, const std::share } static void CreateFullyConnectedOp(ProgramBuilder& p, const std::shared_ptr& op) { - validate_inputs_count(op, {2}); + validate_inputs_count(op, {3}); auto inputs = p.GetInputInfo(op); std::string layerName = layer_type_name_ID(op); auto input_name = inputs[0].pid; auto weights_name = inputs[1].pid; + auto bias_name = inputs[2].pid; auto shape_a = op->get_input_partial_shape(0); auto shape_b = op->get_input_partial_shape(1); @@ -79,7 +81,7 @@ static void CreateFullyConnectedOp(ProgramBuilder& p, const std::shared_ptrget_output_element_type(0)), cldnn::padding(), rank_a, diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp index 32b9dcfa5ff244..67100b1d764cff 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp @@ -59,14 +59,16 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon auto transpose_m = wrap_type({transpose_input, transpose_const_m}); auto data_m = any_input(); + auto bias_m = any_input(); auto weights_input_m = std::make_shared(ov::OutputVector{reshape_m, transpose_m, mul_m}); - auto fully_connected_m = wrap_type({data_m, weights_input_m}); + auto fully_connected_m = wrap_type({data_m, weights_input_m, bias_m}); ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { const auto& pattern_map = m.get_pattern_value_map(); OPENVINO_ASSERT(pattern_map.count(fully_connected_m)); OPENVINO_ASSERT(pattern_map.count(mul_const_m)); OPENVINO_ASSERT(pattern_map.count(weights_m)); + OPENVINO_ASSERT(pattern_map.count(bias_m)); OPENVINO_ASSERT(pattern_map.count(convert_m)); auto fc = std::dynamic_pointer_cast(pattern_map.at(fully_connected_m).get_node_shared_ptr()); if (!fc || transformation_callback(fc)) { @@ -103,6 +105,7 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon std::shared_ptr fc_input_b = reshape_const_to_2d(pattern_map.at(weights_m).get_node_shared_ptr()); std::shared_ptr fc_input_scale = scale; std::shared_ptr fc_input_zp = optional_zero_point; + std::shared_ptr fc_input_bias = pattern_map.at(bias_m).get_node_shared_ptr(); std::vector> result_nodes = {}; if (has_transpose) { const auto& transpose = pattern_map.at(transpose_m).get_node_shared_ptr(); @@ -128,12 +131,14 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon if (with_zero_point) { new_fc = std::make_shared(fc_input_a, fc_input_b, + fc_input_bias, fc_input_scale, fc_input_zp, fc->get_output_type()); } else { new_fc = std::make_shared(fc_input_a, fc_input_b, + fc_input_bias, fc_input_scale, fc->get_output_type()); } diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp index 0cd5e1090eb2df..411c4389ea247d 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp @@ -3,6 +3,7 @@ // #include "intel_gpu/op/fully_connected.hpp" +#include "intel_gpu/op/placeholder.hpp" #include "convert_matmul_to_fc.hpp" #include "openvino/op/matmul.hpp" #include "openvino/op/convert.hpp" @@ -177,8 +178,10 @@ ConvertMatMulToFullyConnected::ConvertMatMulToFullyConnected() { fc_input_b = convert; } + auto no_bias = std::make_shared(); + // Create FullyConnected - auto fc = std::make_shared(fc_input_a, fc_input_b, matmul->get_output_element_type(0)); + auto fc = std::make_shared(fc_input_a, fc_input_b, no_bias, matmul->get_output_element_type(0)); fc->set_friendly_name(matmul->get_friendly_name()); new_ops.push_back(fc); ov::copy_runtime_info(matmul, new_ops); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/fc_convert_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/fc_convert_fusion.cpp index a5d798e4c2721c..e5f992ad9cd8b4 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/fc_convert_fusion.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/fc_convert_fusion.cpp @@ -20,8 +20,9 @@ FullyConnectedConvertFusion::FullyConnectedConvertFusion() { auto data = any_input(); auto weights = any_input(); - auto fully_connected = wrap_type({data, weights}, consumers_count(1)); - auto fully_connected_compressed = wrap_type({data, weights, any_input(), any_input()}, consumers_count(1)); + auto bias = any_input(); + auto fully_connected = wrap_type({data, weights, bias}, consumers_count(1)); + auto fully_connected_compressed = wrap_type({data, weights, bias, any_input(), any_input()}, consumers_count(1)); auto fc = std::make_shared(OutputVector{fully_connected, fully_connected_compressed}); auto convert = wrap_type({fc}, type_matches(element::f32)); @@ -30,6 +31,7 @@ FullyConnectedConvertFusion::FullyConnectedConvertFusion() { const auto& m_data = pattern_map.at(data).get_node_shared_ptr(); const auto& m_weights = pattern_map.at(weights).get_node_shared_ptr(); + const auto& m_bias = pattern_map.at(bias).get_node_shared_ptr(); const auto& m_convert = pattern_map.at(convert).get_node_shared_ptr(); auto output_type = m_convert->get_output_element_type(0); @@ -38,13 +40,14 @@ FullyConnectedConvertFusion::FullyConnectedConvertFusion() { auto it = pattern_map.find(fully_connected); if (it != pattern_map.end()) { m_fc = it->second.get_node_shared_ptr(); - new_fc = std::make_shared(m_data, m_weights, output_type); + new_fc = std::make_shared(m_data, m_weights, m_bias, output_type); } else { m_fc = pattern_map.at(fully_connected_compressed).get_node_shared_ptr(); new_fc = std::make_shared(m_data, m_weights, - m_fc->input_value(2), + m_bias, m_fc->input_value(3), + m_fc->input_value(4), output_type); } new_fc->set_friendly_name(m_convert->get_friendly_name()); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/move_fc_reshape_to_weights.cpp b/src/plugins/intel_gpu/src/plugin/transformations/move_fc_reshape_to_weights.cpp index 8ed48f4768ec42..8b54d32d1c5559 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/move_fc_reshape_to_weights.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/move_fc_reshape_to_weights.cpp @@ -44,7 +44,7 @@ MoveFCReshapeToWeights::MoveFCReshapeToWeights() { auto weights_input_m = std::make_shared(ov::OutputVector{reshape_m, transpose_m}); auto data_m = any_input(); - auto fully_connected_m = wrap_type({data_m, weights_input_m}); + auto fully_connected_m = wrap_type({data_m, weights_input_m, any_input()}); ov::matcher_pass_callback callback = [&](ov::pass::pattern::Matcher& m) { const auto fully_connected = m.get_match_root(); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/op/fully_connected.cpp b/src/plugins/intel_gpu/src/plugin/transformations/op/fully_connected.cpp index e10e2e2edcaba7..bd89197cba910d 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/op/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/op/fully_connected.cpp @@ -11,24 +11,25 @@ namespace op { FullyConnected::FullyConnected(const ov::Output& A, const ov::Output& B, + const ov::Output& bias, const ov::element::Type output_type) - : Op({A, B}), m_output_type(output_type) { + : Op({A, B, bias}), m_output_type(output_type) { validate_and_infer_types(); } std::shared_ptr FullyConnected::clone_with_new_inputs(const ov::OutputVector& new_args) const { check_new_args_count(this, new_args); - return std::make_shared(new_args.at(0), new_args.at(1), m_output_type); + return std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2), m_output_type); } void FullyConnected::validate_and_infer_types() { const auto input_size = get_input_size(); NODE_VALIDATION_CHECK(this, - input_size >= 2, + input_size >= 3, "Number of inputs is incorrect. Current value is: ", input_size, - ", expected at least 2."); + ", expected at least 3."); ov::op::v0::MatMul op; op.set_transpose_a(false); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/op/fully_connected_compressed.cpp b/src/plugins/intel_gpu/src/plugin/transformations/op/fully_connected_compressed.cpp index 1ecfc1e21081b5..4eb73cfcaf5280 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/op/fully_connected_compressed.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/op/fully_connected_compressed.cpp @@ -10,37 +10,41 @@ namespace op { FullyConnectedCompressed::FullyConnectedCompressed(const ov::Output& A, const ov::Output& B, + const ov::Output& bias, const ov::Output& decompression_scale, const ov::Output& decompression_zero_point, const ov::element::Type output_type) - : FullyConnected(A, B, output_type) { - set_argument(2, decompression_scale); - set_argument(3, decompression_zero_point); + : FullyConnected(A, B, bias, output_type) { + set_argument(3, decompression_scale); + set_argument(4, decompression_zero_point); validate_and_infer_types(); } FullyConnectedCompressed::FullyConnectedCompressed(const ov::Output& A, const ov::Output& B, + const ov::Output& bias, const ov::Output& decompression_scale, const ov::element::Type output_type) - : FullyConnected(A, B, output_type) { - set_argument(2, decompression_scale); + : FullyConnected(A, B, bias, output_type) { + set_argument(3, decompression_scale); validate_and_infer_types(); } std::shared_ptr FullyConnectedCompressed::clone_with_new_inputs(const ov::OutputVector& new_args) const { check_new_args_count(this, new_args); - if (new_args.size() == 3) + if (new_args.size() == 4) return std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2), + new_args.at(3), m_output_type); - else if (new_args.size() == 4) + else if (new_args.size() == 5) return std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3), + new_args.at(4), m_output_type); else OPENVINO_THROW("Unexpected inputs count for FullyConnectedCompressed op: ", new_args.size()); diff --git a/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp index 60920be9c90a09..1c7ebe72990ae4 100644 --- a/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp @@ -17,6 +17,7 @@ #include "openvino/op/add.hpp" #include "intel_gpu/op/fully_connected.hpp" #include "intel_gpu/op/fully_connected_compressed.hpp" +#include "intel_gpu/op/placeholder.hpp" #include "plugin/transformations/convert_fc_to_compressed.hpp" @@ -36,7 +37,8 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed1) { auto convert = std::make_shared(weights_const, ov::element::f32); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); auto scale = std::make_shared(convert, scale_const); - auto fc = std::make_shared(input1, scale); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); manager.register_pass(); @@ -44,8 +46,9 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed1) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); - auto fc_compressed = std::make_shared(input1, weights_const, scale_const); + auto fc_compressed = std::make_shared(input1, weights_const, no_bias, scale_const); model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); } @@ -60,7 +63,8 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed2) { auto sub = std::make_shared(convert, zp_const); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); auto scale = std::make_shared(sub, scale_const); - auto fc = std::make_shared(input1, scale); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); manager.register_pass(); @@ -68,9 +72,10 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed2) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); - auto fc_compressed = std::make_shared(input1, weights_const, scale_const, zp_const); + auto fc_compressed = std::make_shared(input1, weights_const, no_bias, scale_const, zp_const); model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); } @@ -87,7 +92,8 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed3) { auto scale = std::make_shared(sub, scale_const); auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { -1, 16 }); auto reshape = std::make_shared(scale, reshape_const, false); - auto fc = std::make_shared(input1, reshape); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, reshape, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); manager.register_pass(); @@ -95,9 +101,10 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed3) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4 }, { 1 }); auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4 }, { 1 }); - auto fc_compressed = std::make_shared(input1, weights_const, scale_const, zp_const); + auto fc_compressed = std::make_shared(input1, weights_const, no_bias, scale_const, zp_const); model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); } @@ -114,7 +121,8 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed4) { auto scale = std::make_shared(sub, scale_const); auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { -1, 16 }); auto reshape = std::make_shared(scale, reshape_const, false); - auto fc = std::make_shared(input1, reshape); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, reshape, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); manager.register_pass(); @@ -122,9 +130,10 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed4) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 32, 16 }, { 1 }); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4 }, { 1 }); auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 1, 1 }, { 1 }); - auto fc_compressed = std::make_shared(input1, weights_const, scale_const, zp_const); + auto fc_compressed = std::make_shared(input1, weights_const, no_bias, scale_const, zp_const); model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); } @@ -143,7 +152,8 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed5) { auto reshape = std::make_shared(scale, reshape_const, false); auto transpose_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose = std::make_shared(reshape, transpose_const); - auto fc = std::make_shared(input1, transpose); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, transpose, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); manager.register_pass(); @@ -153,11 +163,12 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed5) { auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 16, 32 }, { 1 }); auto transpose_weights_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_weights = std::make_shared(weights_const, transpose_weights_const); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 32 }, { 1 }); auto transpose_scale_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_scale = std::make_shared(scale_const, transpose_scale_const); auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 1, 1 }, { 1 }); - auto fc_compressed = std::make_shared(input1, transpose_weights, transpose_scale, zp_const); + auto fc_compressed = std::make_shared(input1, transpose_weights, no_bias, transpose_scale, zp_const); model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); } @@ -176,7 +187,8 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed6) { auto reshape = std::make_shared(scale, reshape_const, false); auto transpose_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose = std::make_shared(reshape, transpose_const); - auto fc = std::make_shared(input1, transpose); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, transpose, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); manager.register_pass(); @@ -186,13 +198,14 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed6) { auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 16, 32 }, { 1 }); auto transpose_weights_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_weights = std::make_shared(weights_const, transpose_weights_const); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 32 }, { 1 }); auto transpose_scale_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_scale = std::make_shared(scale_const, transpose_scale_const); auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 32 }, { 1 }); auto transpose_zp_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_zp = std::make_shared(zp_const, transpose_zp_const); - auto fc_compressed = std::make_shared(input1, transpose_weights, transpose_scale, transpose_zp); + auto fc_compressed = std::make_shared(input1, transpose_weights, no_bias, transpose_scale, transpose_zp); model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); } @@ -211,7 +224,8 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed7) { auto reshape = std::make_shared(scale, reshape_const, false); auto transpose_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose = std::make_shared(reshape, transpose_const); - auto fc = std::make_shared(input1, transpose); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, transpose, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); manager.register_pass(); @@ -221,13 +235,14 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed7) { auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 16, 32 }, { 1 }); auto transpose_weights_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_weights = std::make_shared(weights_const, transpose_weights_const); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 32 }, { 1 }); auto transpose_scale_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_scale = std::make_shared(scale_const, transpose_scale_const); auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 32 }, { 1 }); auto transpose_zp_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_zp = std::make_shared(zp_const, transpose_zp_const); - auto fc_compressed = std::make_shared(input1, transpose_weights, transpose_scale, transpose_zp); + auto fc_compressed = std::make_shared(input1, transpose_weights, no_bias, transpose_scale, transpose_zp); model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); } @@ -325,6 +340,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed8) { auto reshape = std::make_shared(scale, reshape_const, false); auto transpose_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose = std::make_shared(reshape, transpose_const); + auto no_bias = std::make_shared(); auto param1 = std::make_shared(ov::element::f16, ov::PartialShape{-1, 15}); auto const_value1 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{1, 1}, {1}); @@ -344,7 +360,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed8) { args[i] = subgraph_parameters[i]->output(0); } auto subgraph_op = std::make_shared(args, submodel); - auto fc = std::make_shared(subgraph_op->output(1), transpose); + auto fc = std::make_shared(subgraph_op->output(1), transpose, no_bias); model = std::make_shared(ov::NodeVector{std::make_shared(subgraph_op->output(0)), fc}, subgraph_parameters); manager.register_pass(); @@ -353,6 +369,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed8) { auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 16, 32 }, { 1 }); auto transpose_weights_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_weights = std::make_shared(weights_const, transpose_weights_const); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 32 }, { 1 }); auto transpose_scale_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_scale = std::make_shared(scale_const, transpose_scale_const); @@ -378,7 +395,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed8) { args[i] = subgraph_parameters[i]->output(0); } auto subgraph_op = std::make_shared(args, submodel); - auto fc_compressed = std::make_shared(subgraph_op->output(1), transpose_weights, transpose_scale, transpose_zp); + auto fc_compressed = std::make_shared(subgraph_op->output(1), transpose_weights, no_bias, transpose_scale, transpose_zp); model_ref = std::make_shared(ov::NodeVector{ std::make_shared(subgraph_op->output(0)), fc_compressed }, subgraph_parameters); } diff --git a/src/plugins/intel_gpu/tests/unit/transformations/convert_matmul_to_fc_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/convert_matmul_to_fc_test.cpp index 2b6dfb4a8f0602..a840902ac422de 100644 --- a/src/plugins/intel_gpu/tests/unit/transformations/convert_matmul_to_fc_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/transformations/convert_matmul_to_fc_test.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -42,7 +43,9 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest1) { auto transpose_constant2 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 }); auto transpose2 = std::make_shared(input2, transpose_constant2); - auto matmul = std::make_shared(transpose1, transpose2); + auto no_bias = std::make_shared(); + + auto matmul = std::make_shared(transpose1, transpose2, no_bias); model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); } @@ -78,7 +81,8 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest3) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 2}, {1}); - auto matmul = std::make_shared(input1, input2); + auto no_bias = std::make_shared(); + auto matmul = std::make_shared(input1, input2, no_bias); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } @@ -96,7 +100,8 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest4) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 2}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 2}, {1}); - auto matmul = std::make_shared(input1, input2); + auto no_bias = std::make_shared(); + auto matmul = std::make_shared(input1, input2, no_bias); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } @@ -132,7 +137,8 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest7) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{3, 2}, {1}); - auto fc = std::make_shared(input1, input2); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, input2, no_bias); model_ref = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{input1}); } @@ -150,8 +156,9 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest8) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 2}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{3, 2}, {1}); + auto no_bias = std::make_shared(); - auto fc = std::make_shared(input1, input2); + auto fc = std::make_shared(input1, input2, no_bias); auto a_shape = std::make_shared(input1); auto I = ov::op::util::node_to_get_shape_value_of_indices_from_shape_node(a_shape, {0, 1}); @@ -174,7 +181,8 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest9) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 2}, {1}); - auto matmul = std::make_shared(input1, input2); + auto no_bias = std::make_shared(); + auto matmul = std::make_shared(input1, input2, no_bias); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } @@ -219,7 +227,8 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest13) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 1}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 80, 1}, {1}); - auto matmul = std::make_shared(input1, input2); + auto no_bias = std::make_shared(); + auto matmul = std::make_shared(input1, input2, no_bias); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } @@ -243,7 +252,8 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest14) { { auto input1 = std::make_shared(ov::element::u8, ov::PartialShape{-1, -1, 1}); auto input2 = ov::opset1::Constant::create(ov::element::i8, ov::Shape{1, 80, 1}, {1}); - auto matmul = std::make_shared(input1, input2, ov::element::f32); + auto no_bias = std::make_shared(); + auto matmul = std::make_shared(input1, input2, no_bias, ov::element::f32); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } @@ -272,9 +282,10 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest15) { auto transpose_constant = ov::opset1::Constant::create(ov::element::i32, ov::Shape{2}, {1, 0}); auto transpose = std::make_shared(input3, transpose_constant); auto convert = std::make_shared(transpose, ov::element::f32); + auto no_bias = std::make_shared(); - auto matmul1 = std::make_shared(input1, convert); - auto matmul2 = std::make_shared(input2, convert); + auto matmul1 = std::make_shared(input1, convert, no_bias); + auto matmul2 = std::make_shared(input2, convert, no_bias); model_ref = std::make_shared(ov::NodeVector{matmul1, matmul2}, ov::ParameterVector{input1, input2}); } @@ -292,7 +303,8 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest_second_input_rank { auto input1 = std::make_shared(ov::element::f32, ov::Shape{5, 2, 3}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 2, 3}, {1}); - auto matmul = std::make_shared(input1, input2); + auto no_bias = std::make_shared(); + auto matmul = std::make_shared(input1, input2, no_bias); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } } @@ -309,7 +321,8 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest_second_input_rank { auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 2, 3 }); auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 2, 3 }, { 1 }); - auto matmul = std::make_shared(input1, weights); + auto no_bias = std::make_shared(); + auto matmul = std::make_shared(input1, weights, no_bias); model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); } @@ -328,7 +341,8 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest_second_input_rank auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 5, 2, 3 }); auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 1, 2, 3 }, { 1 }); - auto matmul = std::make_shared(input1, weights); + auto no_bias = std::make_shared(); + auto matmul = std::make_shared(input1, weights, no_bias); model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); } } @@ -351,8 +365,9 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest_decompress_conver auto transpose_constant = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 }); auto transpose = std::make_shared(input2, transpose_constant); auto convert = std::make_shared(transpose, ov::element::f32); + auto no_bias = std::make_shared(); - auto matmul = std::make_shared(input1, convert); + auto matmul = std::make_shared(input1, convert, no_bias); model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); } @@ -378,8 +393,9 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest_decompress_conver auto transpose_constant2 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 }); auto transpose2 = std::make_shared(input2, transpose_constant2); auto convert = std::make_shared(transpose2, ov::element::f32); + auto no_bias = std::make_shared(); - auto matmul = std::make_shared(transpose1, convert); + auto matmul = std::make_shared(transpose1, convert, no_bias); model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); } @@ -410,7 +426,8 @@ TEST_F(TransformationTestsF, ConvertMatMulToFullyConnectedTest_compressed_u8_wei auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {3}, {0, 2, 1}); auto transpose = std::make_shared(mul, transpose_const); - auto matmul = std::make_shared(data, transpose); + auto no_bias = std::make_shared(); + auto matmul = std::make_shared(data, transpose, no_bias); model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ data }); } diff --git a/src/plugins/intel_gpu/tests/unit/transformations/fc_convert_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/fc_convert_fusion_test.cpp index 0440918e9f8caf..b9d9f3d85894d6 100644 --- a/src/plugins/intel_gpu/tests/unit/transformations/fc_convert_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/transformations/fc_convert_fusion_test.cpp @@ -19,6 +19,7 @@ #include "openvino/op/parameter.hpp" #include "intel_gpu/op/fully_connected.hpp" #include "intel_gpu/op/fully_connected_compressed.hpp" +#include "intel_gpu/op/placeholder.hpp" using namespace testing; using namespace ov::intel_gpu; @@ -27,9 +28,10 @@ TEST_F(TransformationTestsF, FullyConnectedConvertFusionTest1) { { auto input = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); - auto fc_compressed = std::make_shared(input, weights_const, scale_const, zp_const); + auto fc_compressed = std::make_shared(input, weights_const, no_bias, scale_const, zp_const); auto convert = std::make_shared(fc_compressed, ov::element::f32); model = std::make_shared(ov::NodeVector{convert}, ov::ParameterVector{input}); @@ -38,9 +40,10 @@ TEST_F(TransformationTestsF, FullyConnectedConvertFusionTest1) { { auto input = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); - auto fc_compressed = std::make_shared(input, weights_const, scale_const, zp_const, ov::element::f32); + auto fc_compressed = std::make_shared(input, weights_const, no_bias, scale_const, zp_const, ov::element::f32); model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input }); } @@ -50,7 +53,8 @@ TEST_F(TransformationTestsF, FullyConnectedConvertFusionTest2) { { auto input1 = std::make_shared(ov::element::f16, ov::Shape{3, 2, 2}); auto input2 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{2, 2}, {1}); - auto matmul = std::make_shared(input1, input2); + auto no_bias = std::make_shared(); + auto matmul = std::make_shared(input1, input2, no_bias); auto convert = std::make_shared(matmul, ov::element::f32); model = std::make_shared(ov::NodeVector{convert}, ov::ParameterVector{input1}); @@ -59,7 +63,8 @@ TEST_F(TransformationTestsF, FullyConnectedConvertFusionTest2) { { auto input1 = std::make_shared(ov::element::f16, ov::Shape{3, 2, 2}); auto input2 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{2, 2}, {1}); - auto matmul = std::make_shared(input1, input2, ov::element::f32); + auto no_bias = std::make_shared(); + auto matmul = std::make_shared(input1, input2, no_bias, ov::element::f32); model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); } diff --git a/src/plugins/intel_gpu/tests/unit/transformations/move_fc_reshape_to_weights.cpp b/src/plugins/intel_gpu/tests/unit/transformations/move_fc_reshape_to_weights.cpp index 8b760790e34aaa..90c8c18c192096 100644 --- a/src/plugins/intel_gpu/tests/unit/transformations/move_fc_reshape_to_weights.cpp +++ b/src/plugins/intel_gpu/tests/unit/transformations/move_fc_reshape_to_weights.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -71,7 +72,9 @@ class MoveFCReshapeToWeightsTests : public TransformationTestsF, public WithPara auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {2}, {1, 0}); weights_path = std::make_shared(weights_path, transpose_const); } - auto fully_connected = std::make_shared(data, weights_path); + auto no_bias = std::make_shared(); + + auto fully_connected = std::make_shared(data, weights_path, no_bias); return std::make_shared(ov::NodeVector{fully_connected}, ov::ParameterVector{data}); }