Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] Implement bias on internal FC op #23317

Merged
merged 3 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class FullyConnected : public ov::op::Op {

FullyConnected(const ov::Output<Node>& A,
const ov::Output<Node>& B,
const ov::Output<Node>& bias,
const ov::element::Type output_type = ov::element::undefined);

bool visit_attributes(ov::AttributeVisitor &visitor) override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ class FullyConnectedCompressed : public FullyConnected {

FullyConnectedCompressed(const ov::Output<Node> &A,
const ov::Output<Node> &B,
const ov::Output<Node> &bias,
const ov::Output<Node> &decompression_scale,
const ov::Output<Node> &decompression_zero_point,
const ov::element::Type output_type = ov::element::undefined);

FullyConnectedCompressed(const ov::Output<Node> &A,
const ov::Output<Node> &B,
const ov::Output<Node> &bias,
const ov::Output<Node> &decompression_scale,
const ov::element::Type output_type = ov::element::undefined);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since Placeholder op was added recently, we may now have single c-tor for this op with all arguments. Could you modify that too? Can be done in a separate PR if you want


Expand Down
14 changes: 8 additions & 6 deletions src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,15 @@ namespace ov {
namespace intel_gpu {

static void CreateFullyConnectedCompressedOp(ProgramBuilder& p, const std::shared_ptr<op::FullyConnectedCompressed>& op) {
validate_inputs_count(op, {3, 4});
validate_inputs_count(op, {4, 5});
auto inputs = p.GetInputInfo(op);
std::string primitive_name = layer_type_name_ID(op);

auto input_name = inputs[0].pid;
auto weights_name = inputs[1].pid;
auto scale_name = inputs[2].pid;
auto zp_name = inputs.size() == 4 ? inputs[3].pid : "";
auto bias_name = inputs[2].pid;
auto scale_name = inputs[3].pid;
auto zp_name = inputs.size() == 5 ? inputs[4].pid : "";

float zp_value = 0.0f;
bool has_scalar_zp = false;
Expand All @@ -47,7 +48,7 @@ static void CreateFullyConnectedCompressedOp(ProgramBuilder& p, const std::share
auto fc = cldnn::fully_connected(primitive_name,
cldnn::input_info(input_name),
weights_name,
"",
bias_name,
scale_name,
has_scalar_zp ? "" : zp_name,
cldnn::element_type_to_data_type(op->get_output_element_type(0)),
Expand All @@ -63,12 +64,13 @@ static void CreateFullyConnectedCompressedOp(ProgramBuilder& p, const std::share
}

static void CreateFullyConnectedOp(ProgramBuilder& p, const std::shared_ptr<op::FullyConnected>& op) {
validate_inputs_count(op, {2});
validate_inputs_count(op, {3});
auto inputs = p.GetInputInfo(op);
std::string layerName = layer_type_name_ID(op);

auto input_name = inputs[0].pid;
auto weights_name = inputs[1].pid;
auto bias_name = inputs[2].pid;

auto shape_a = op->get_input_partial_shape(0);
auto shape_b = op->get_input_partial_shape(1);
Expand All @@ -79,7 +81,7 @@ static void CreateFullyConnectedOp(ProgramBuilder& p, const std::shared_ptr<op::
auto fcPrim = cldnn::fully_connected(layerName,
cldnn::input_info(input_name),
weights_name,
"",
bias_name,
cldnn::element_type_to_data_type(op->get_output_element_type(0)),
cldnn::padding(),
rank_a,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,16 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon
auto transpose_m = wrap_type<ov::op::v1::Transpose>({transpose_input, transpose_const_m});

auto data_m = any_input();
auto bias_m = any_input();
auto weights_input_m = std::make_shared<ov::pass::pattern::op::Or>(ov::OutputVector{reshape_m, transpose_m, mul_m});
auto fully_connected_m = wrap_type<op::FullyConnected>({data_m, weights_input_m});
auto fully_connected_m = wrap_type<op::FullyConnected>({data_m, weights_input_m, bias_m});

ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
const auto& pattern_map = m.get_pattern_value_map();
OPENVINO_ASSERT(pattern_map.count(fully_connected_m));
OPENVINO_ASSERT(pattern_map.count(mul_const_m));
OPENVINO_ASSERT(pattern_map.count(weights_m));
OPENVINO_ASSERT(pattern_map.count(bias_m));
OPENVINO_ASSERT(pattern_map.count(convert_m));
auto fc = std::dynamic_pointer_cast<op::FullyConnected>(pattern_map.at(fully_connected_m).get_node_shared_ptr());
if (!fc || transformation_callback(fc)) {
Expand Down Expand Up @@ -103,6 +105,7 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon
std::shared_ptr<ov::Node> fc_input_b = reshape_const_to_2d(pattern_map.at(weights_m).get_node_shared_ptr());
std::shared_ptr<ov::Node> fc_input_scale = scale;
std::shared_ptr<ov::Node> fc_input_zp = optional_zero_point;
std::shared_ptr<ov::Node> fc_input_bias = pattern_map.at(bias_m).get_node_shared_ptr();
std::vector<std::shared_ptr<ov::Node>> result_nodes = {};
if (has_transpose) {
const auto& transpose = pattern_map.at(transpose_m).get_node_shared_ptr();
Expand All @@ -128,12 +131,14 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon
if (with_zero_point) {
new_fc = std::make_shared<op::FullyConnectedCompressed>(fc_input_a,
fc_input_b,
fc_input_bias,
fc_input_scale,
fc_input_zp,
fc->get_output_type());
} else {
new_fc = std::make_shared<op::FullyConnectedCompressed>(fc_input_a,
fc_input_b,
fc_input_bias,
fc_input_scale,
fc->get_output_type());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//

#include "intel_gpu/op/fully_connected.hpp"
#include "intel_gpu/op/placeholder.hpp"
#include "convert_matmul_to_fc.hpp"
#include "openvino/op/matmul.hpp"
#include "openvino/op/convert.hpp"
Expand Down Expand Up @@ -177,8 +178,10 @@ ConvertMatMulToFullyConnected::ConvertMatMulToFullyConnected() {
fc_input_b = convert;
}

auto no_bias = std::make_shared<op::Placeholder>();

// Create FullyConnected
auto fc = std::make_shared<op::FullyConnected>(fc_input_a, fc_input_b, matmul->get_output_element_type(0));
auto fc = std::make_shared<op::FullyConnected>(fc_input_a, fc_input_b, no_bias, matmul->get_output_element_type(0));
fc->set_friendly_name(matmul->get_friendly_name());
new_ops.push_back(fc);
ov::copy_runtime_info(matmul, new_ops);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ FullyConnectedConvertFusion::FullyConnectedConvertFusion() {

auto data = any_input();
auto weights = any_input();
auto fully_connected = wrap_type<op::FullyConnected>({data, weights}, consumers_count(1));
auto fully_connected_compressed = wrap_type<op::FullyConnectedCompressed>({data, weights, any_input(), any_input()}, consumers_count(1));
auto bias = any_input();
auto fully_connected = wrap_type<op::FullyConnected>({data, weights, bias}, consumers_count(1));
auto fully_connected_compressed = wrap_type<op::FullyConnectedCompressed>({data, weights, bias, any_input(), any_input()}, consumers_count(1));
auto fc = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{fully_connected, fully_connected_compressed});
auto convert = wrap_type<ov::op::v0::Convert>({fc}, type_matches(element::f32));

Expand All @@ -30,6 +31,7 @@ FullyConnectedConvertFusion::FullyConnectedConvertFusion() {

const auto& m_data = pattern_map.at(data).get_node_shared_ptr();
const auto& m_weights = pattern_map.at(weights).get_node_shared_ptr();
const auto& m_bias = pattern_map.at(bias).get_node_shared_ptr();
const auto& m_convert = pattern_map.at(convert).get_node_shared_ptr();
auto output_type = m_convert->get_output_element_type(0);

Expand All @@ -38,13 +40,14 @@ FullyConnectedConvertFusion::FullyConnectedConvertFusion() {
auto it = pattern_map.find(fully_connected);
if (it != pattern_map.end()) {
m_fc = it->second.get_node_shared_ptr();
new_fc = std::make_shared<op::FullyConnected>(m_data, m_weights, output_type);
new_fc = std::make_shared<op::FullyConnected>(m_data, m_weights, m_bias, output_type);
} else {
m_fc = pattern_map.at(fully_connected_compressed).get_node_shared_ptr();
new_fc = std::make_shared<op::FullyConnectedCompressed>(m_data,
m_weights,
m_fc->input_value(2),
m_bias,
m_fc->input_value(3),
m_fc->input_value(4),
output_type);
}
new_fc->set_friendly_name(m_convert->get_friendly_name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ MoveFCReshapeToWeights::MoveFCReshapeToWeights() {
auto weights_input_m = std::make_shared<ov::pass::pattern::op::Or>(ov::OutputVector{reshape_m, transpose_m});

auto data_m = any_input();
auto fully_connected_m = wrap_type<op::FullyConnected>({data_m, weights_input_m});
auto fully_connected_m = wrap_type<op::FullyConnected>({data_m, weights_input_m, any_input()});

ov::matcher_pass_callback callback = [&](ov::pass::pattern::Matcher& m) {
const auto fully_connected = m.get_match_root();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,25 @@ namespace op {

FullyConnected::FullyConnected(const ov::Output<Node>& A,
const ov::Output<Node>& B,
const ov::Output<Node>& bias,
const ov::element::Type output_type)
: Op({A, B}), m_output_type(output_type) {
: Op({A, B, bias}), m_output_type(output_type) {
validate_and_infer_types();
}

std::shared_ptr<ov::Node> FullyConnected::clone_with_new_inputs(const ov::OutputVector& new_args) const {
check_new_args_count(this, new_args);

return std::make_shared<FullyConnected>(new_args.at(0), new_args.at(1), m_output_type);
return std::make_shared<FullyConnected>(new_args.at(0), new_args.at(1), new_args.at(2), m_output_type);
}

void FullyConnected::validate_and_infer_types() {
const auto input_size = get_input_size();
NODE_VALIDATION_CHECK(this,
input_size >= 2,
input_size >= 3,
"Number of inputs is incorrect. Current value is: ",
input_size,
", expected at least 2.");
", expected at least 3.");

ov::op::v0::MatMul op;
op.set_transpose_a(false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,37 +10,41 @@ namespace op {

FullyConnectedCompressed::FullyConnectedCompressed(const ov::Output<Node>& A,
const ov::Output<Node>& B,
const ov::Output<Node>& bias,
const ov::Output<Node>& decompression_scale,
const ov::Output<Node>& decompression_zero_point,
const ov::element::Type output_type)
: FullyConnected(A, B, output_type) {
set_argument(2, decompression_scale);
set_argument(3, decompression_zero_point);
: FullyConnected(A, B, bias, output_type) {
set_argument(3, decompression_scale);
set_argument(4, decompression_zero_point);
validate_and_infer_types();
}

FullyConnectedCompressed::FullyConnectedCompressed(const ov::Output<Node>& A,
const ov::Output<Node>& B,
const ov::Output<Node>& bias,
const ov::Output<Node>& decompression_scale,
const ov::element::Type output_type)
: FullyConnected(A, B, output_type) {
set_argument(2, decompression_scale);
: FullyConnected(A, B, bias, output_type) {
set_argument(3, decompression_scale);
validate_and_infer_types();
}

std::shared_ptr<ov::Node> FullyConnectedCompressed::clone_with_new_inputs(const ov::OutputVector& new_args) const {
check_new_args_count(this, new_args);

if (new_args.size() == 3)
if (new_args.size() == 4)
return std::make_shared<FullyConnectedCompressed>(new_args.at(0),
new_args.at(1),
new_args.at(2),
new_args.at(3),
m_output_type);
else if (new_args.size() == 4)
else if (new_args.size() == 5)
return std::make_shared<FullyConnectedCompressed>(new_args.at(0),
new_args.at(1),
new_args.at(2),
new_args.at(3),
new_args.at(4),
m_output_type);
else
OPENVINO_THROW("Unexpected inputs count for FullyConnectedCompressed op: ", new_args.size());
Expand Down
Loading
Loading