Skip to content

Commit

Permalink
[GPU] Skip concat_input_order (#23573)
Browse files Browse the repository at this point in the history
### Details:
- *Skip concat_input_order for an element which lies outside of the
buffer range*

### Tickets:
 - *135790*
  • Loading branch information
steve-y authored Mar 28, 2024
1 parent 606950d commit 561d78f
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ namespace {

using shuffle_range = std::pair<int32_t, int32_t>;

bool can_shuffle_features(program_node& node, stream& stream) {
bool can_shuffle_features(program_node& node, program_node& concat_node, stream& stream) {
if (node.is_type<convolution>()) {
auto& conv_node = node.as<convolution>();
auto& wei_node = conv_node.weights();
if (ov::element::Type(wei_node.get_output_layout().data_type).bitwidth() < 8)
return false;

return conv_node.get_groups() == 1 &&
return conv_node.get_groups() == 1 && node.get_dependency_index(concat_node) == 0 &&
conv_node.get_deformable_groups() == 1 && !conv_node.get_transposed() &&
!conv_node.activations_zero_points_term() &&
wei_node.is_type<data>() && wei_node.is_constant() && !wei_node.is_output();
Expand All @@ -37,7 +37,7 @@ bool can_shuffle_features(program_node& node, stream& stream) {
if (ov::element::Type(wei_node.get_output_layout().data_type).bitwidth() < 8)
return false;

return wei_node.is_type<data>() && wei_node.is_constant() && !wei_node.is_output();
return node.get_dependency_index(concat_node) == 0 && wei_node.is_type<data>() && wei_node.is_constant() && !wei_node.is_output();
}

bool pass_through = false;
Expand All @@ -48,7 +48,7 @@ bool can_shuffle_features(program_node& node, stream& stream) {
if (pass_through) {
// Primitives that are feature order invariant, pass-through shuffled features to users
for (auto& user : node.get_users()) {
if (!can_shuffle_features(*user, stream))
if (!can_shuffle_features(*user, concat_node, stream))
return false;
}
return true;
Expand Down Expand Up @@ -160,7 +160,7 @@ void concat_input_order::run(program& p) {
// Check that we can fuse shuffling to users
bool can_shuffle_users = true;
for (auto user : concat_node.get_users()) {
can_shuffle_users &= can_shuffle_features(*user, p.get_stream());
can_shuffle_users &= can_shuffle_features(*user, concat_node, p.get_stream());
}

if (!along_f || !no_fusing || !correct_format || !single_format || already_aligned || !can_shuffle_users)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "test_utils.h"
#include "random_generator.hpp"
#include "concatenation_inst.h"
#include "permute_inst.h"

#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/convolution.hpp>
Expand Down Expand Up @@ -715,6 +716,67 @@ TEST(concat_gpu, i8_optimization_with_pool_conv) {
}
}

TEST(concat_gpu, no_exception_in_input_order_opt_b_fs_yx_fsv16_with_conv_port2) {
auto& engine = get_test_engine();

auto concat_input0 = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { 1, 24, 6, 6 }});
auto concat_input1 = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { 1, 48, 6, 6 }});
auto concat_input2 = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { 1, 96, 6, 6 }});
auto concat_input3 = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { 1, 128, 6, 6 }});
auto conv_input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { 1, 192, 6, 6 } });
auto weights0 = engine.allocate_memory({ data_types::f32, format::bfyx, { 296, 192, 1, 1 } });

std::vector<float> concat_input0_data(concat_input0->get_layout().count());
std::vector<float> concat_input1_data(concat_input1->get_layout().count());
std::vector<float> concat_input2_data(concat_input2->get_layout().count());
std::vector<float> concat_input3_data(concat_input3->get_layout().count());
std::vector<float> conv_input_data(conv_input->get_layout().count());
std::vector<float> weights0_data(weights0->get_layout().count());

std::iota(concat_input0_data.begin(), concat_input0_data.end(), 0.f);
std::iota(concat_input1_data.begin(), concat_input1_data.end(), 0.f);
std::iota(concat_input2_data.begin(), concat_input2_data.end(), 0.f);
std::iota(concat_input3_data.begin(), concat_input3_data.end(), 0.f);
std::iota(conv_input_data.begin(), conv_input_data.end(), 0.f);
std::iota(weights0_data.begin(), weights0_data.end(), 0.f);

set_values(concat_input0, concat_input0_data);
set_values(concat_input1, concat_input1_data);
set_values(concat_input2, concat_input2_data);
set_values(concat_input3, concat_input3_data);
set_values(conv_input, conv_input_data);
set_values(weights0, weights0_data);

layout reorder_layout(data_types::f32, format::b_fs_yx_fsv16, {1, 296, 6, 6});

topology topology(input_layout("concat_input0", concat_input0->get_layout()),
input_layout("concat_input1", concat_input1->get_layout()),
input_layout("concat_input2", concat_input2->get_layout()),
input_layout("concat_input3", concat_input3->get_layout()),
input_layout("conv_input", conv_input->get_layout()),
concatenation("concat",
{ input_info("concat_input0"), input_info("concat_input1"), input_info("concat_input2"), input_info("concat_input3") },
1,
data_types::f32,
padding{{0, 0, 0, 0}, 0}),
data("weights0", weights0),
convolution("conv0", input_info("conv_input"), "weights0", "", 1, { 1, 1 }, {1, 1}, {0, 0}, {0, 0}, false),
eltwise("eltwise", input_info("conv0"), input_info("concat"), eltwise_mode::sum),
permute("permute", input_info("eltwise"), {0, 1, 2, 3}));

ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);

network.set_input_data("concat_input0", concat_input0);
network.set_input_data("concat_input1", concat_input1);
network.set_input_data("concat_input2", concat_input2);
network.set_input_data("concat_input3", concat_input3);
network.set_input_data("conv_input", conv_input);

ASSERT_NO_FATAL_FAILURE(network.execute());
}

using TestParamType_concat = ::testing::tuple<size_t, // 0 - Input Batch size
std::vector<size_t>, // 1 - Inputs Features Sizes
size_t, // 2 - Input Y Size
Expand Down

0 comments on commit 561d78f

Please sign in to comment.