diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index e28edf1a0447ca..c6de09403c1cef 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -878,7 +878,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf) new_layout.data_type = data_types::f32; auto new_input = rf.get_reorder(input.id(), input_layout, new_layout); if (new_input.first) { - p.add_intermediate(new_input.first, fc_node, 0); + p.add_intermediate(new_input.first, fc_node, 0, !new_input.second); } } diff --git a/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp index 591d4023f83946..b196701c070449 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp @@ -19,6 +19,7 @@ #include "batch_to_space_inst.h" #include "permute_inst.h" #include "concatenation_inst.h" +#include "fully_connected_inst.h" #include "pass_manager.h" #include "to_string_utils.h" @@ -399,6 +400,58 @@ TEST(reorder_inputs, no_need_of_reorder_to_change_input_rank_for_rdft) { ASSERT_EQ(size_t(4), format::dimension(dft_node.get_input_layouts()[0].format)); } +TEST(reorder_inputs, add_reorder_between_single_output_type_node_and_multiple_users) { + // Topology: + // + // Add (single output) Add + // | | + // 0->0 -------- 0->0 ------------> Reorder + // | | | | + // FC FC FC FC + // + // Description : + // : Test the case where a node which doens't have muptiple output but have multiple users, + // : and port number to each user is same all. + // : In this case reorder should be inserted to each FC + + tests::random_generator rg(GET_SUITE_NAME); + auto& engine = get_test_engine(); + + auto in_layout1 = layout{ ov::PartialShape{1, 4096, 256}, data_types::i32, format::bfyx }; + auto weights = engine.allocate_memory({ data_types::i32, format::bfyx, {128, 256, 1, 1} }); + + topology topology( + input_layout("input1", in_layout1), + input_layout("input2", in_layout1), + data("weights1", weights), + data("weights2", weights), + eltwise("add", input_info("input1"), input_info("input2"), eltwise_mode::sum), + fully_connected("fc1", input_info("add"), "weights1"), + fully_connected("fc2", input_info("add"), "weights2") + ); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + + auto program = program::build_program(engine, topology, config, false, true); + layout_optimizer lo(true); + reorder_factory rf; + program_wrapper::apply_opt_pass(*program, lo, rf); + + ASSERT_NE(program, nullptr); + + auto& add = program->get_node("add"); + for (auto& user : add.get_users()) { + ASSERT_TRUE(user->is_type()); + } + + auto& fc1 = program->get_node("fc1"); + auto& fc2 = program->get_node("fc2"); + + ASSERT_TRUE(fc1.get_dependency(0).is_type()); + ASSERT_TRUE(fc2.get_dependency(0).is_type()); +} + // TODO Not yet implemented //TEST(reorder_inputs, impl_forcing_conv_format_kernel) { // auto& engine = get_test_engine();