diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/concat_input_order.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/concat_input_order.cpp index 266e169fe79581..75bee22d0c8a66 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/concat_input_order.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/concat_input_order.cpp @@ -19,14 +19,14 @@ namespace { using shuffle_range = std::pair; -bool can_shuffle_features(program_node& node, stream& stream) { +bool can_shuffle_features(program_node& node, program_node& concat_node, stream& stream) { if (node.is_type()) { auto& conv_node = node.as(); auto& wei_node = conv_node.weights(); if (ov::element::Type(wei_node.get_output_layout().data_type).bitwidth() < 8) return false; - return conv_node.get_groups() == 1 && + return conv_node.get_groups() == 1 && node.get_dependency_index(concat_node) == 0 && conv_node.get_deformable_groups() == 1 && !conv_node.get_transposed() && !conv_node.activations_zero_points_term() && wei_node.is_type() && wei_node.is_constant() && !wei_node.is_output(); @@ -37,7 +37,7 @@ bool can_shuffle_features(program_node& node, stream& stream) { if (ov::element::Type(wei_node.get_output_layout().data_type).bitwidth() < 8) return false; - return wei_node.is_type() && wei_node.is_constant() && !wei_node.is_output(); + return node.get_dependency_index(concat_node) == 0 && wei_node.is_type() && wei_node.is_constant() && !wei_node.is_output(); } bool pass_through = false; @@ -48,7 +48,7 @@ bool can_shuffle_features(program_node& node, stream& stream) { if (pass_through) { // Primitives that are feature order invariant, pass-through shuffled features to users for (auto& user : node.get_users()) { - if (!can_shuffle_features(*user, stream)) + if (!can_shuffle_features(*user, concat_node, stream)) return false; } return true; @@ -160,7 +160,7 @@ void concat_input_order::run(program& p) { // Check that we can fuse shuffling to users bool can_shuffle_users = true; for (auto user : concat_node.get_users()) { - can_shuffle_users &= can_shuffle_features(*user, p.get_stream()); + can_shuffle_users &= can_shuffle_features(*user, concat_node, p.get_stream()); } if (!along_f || !no_fusing || !correct_format || !single_format || already_aligned || !can_shuffle_users) diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp index 14c7231a3f29a4..9866952a27cb19 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp @@ -5,6 +5,7 @@ #include "test_utils.h" #include "random_generator.hpp" #include "concatenation_inst.h" +#include "permute_inst.h" #include #include @@ -715,6 +716,67 @@ TEST(concat_gpu, i8_optimization_with_pool_conv) { } } +TEST(concat_gpu, no_exception_in_input_order_opt_b_fs_yx_fsv16_with_conv_port2) { + auto& engine = get_test_engine(); + + auto concat_input0 = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { 1, 24, 6, 6 }}); + auto concat_input1 = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { 1, 48, 6, 6 }}); + auto concat_input2 = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { 1, 96, 6, 6 }}); + auto concat_input3 = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { 1, 128, 6, 6 }}); + auto conv_input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { 1, 192, 6, 6 } }); + auto weights0 = engine.allocate_memory({ data_types::f32, format::bfyx, { 296, 192, 1, 1 } }); + + std::vector concat_input0_data(concat_input0->get_layout().count()); + std::vector concat_input1_data(concat_input1->get_layout().count()); + std::vector concat_input2_data(concat_input2->get_layout().count()); + std::vector concat_input3_data(concat_input3->get_layout().count()); + std::vector conv_input_data(conv_input->get_layout().count()); + std::vector weights0_data(weights0->get_layout().count()); + + std::iota(concat_input0_data.begin(), concat_input0_data.end(), 0.f); + std::iota(concat_input1_data.begin(), concat_input1_data.end(), 0.f); + std::iota(concat_input2_data.begin(), concat_input2_data.end(), 0.f); + std::iota(concat_input3_data.begin(), concat_input3_data.end(), 0.f); + std::iota(conv_input_data.begin(), conv_input_data.end(), 0.f); + std::iota(weights0_data.begin(), weights0_data.end(), 0.f); + + set_values(concat_input0, concat_input0_data); + set_values(concat_input1, concat_input1_data); + set_values(concat_input2, concat_input2_data); + set_values(concat_input3, concat_input3_data); + set_values(conv_input, conv_input_data); + set_values(weights0, weights0_data); + + layout reorder_layout(data_types::f32, format::b_fs_yx_fsv16, {1, 296, 6, 6}); + + topology topology(input_layout("concat_input0", concat_input0->get_layout()), + input_layout("concat_input1", concat_input1->get_layout()), + input_layout("concat_input2", concat_input2->get_layout()), + input_layout("concat_input3", concat_input3->get_layout()), + input_layout("conv_input", conv_input->get_layout()), + concatenation("concat", + { input_info("concat_input0"), input_info("concat_input1"), input_info("concat_input2"), input_info("concat_input3") }, + 1, + data_types::f32, + padding{{0, 0, 0, 0}, 0}), + data("weights0", weights0), + convolution("conv0", input_info("conv_input"), "weights0", "", 1, { 1, 1 }, {1, 1}, {0, 0}, {0, 0}, false), + eltwise("eltwise", input_info("conv0"), input_info("concat"), eltwise_mode::sum), + permute("permute", input_info("eltwise"), {0, 1, 2, 3})); + + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); + + network.set_input_data("concat_input0", concat_input0); + network.set_input_data("concat_input1", concat_input1); + network.set_input_data("concat_input2", concat_input2); + network.set_input_data("concat_input3", concat_input3); + network.set_input_data("conv_input", conv_input); + + ASSERT_NO_FATAL_FAILURE(network.execute()); +} + using TestParamType_concat = ::testing::tuple, // 1 - Inputs Features Sizes size_t, // 2 - Input Y Size