Skip to content

Commit

Permalink
[GPU] Add condition for dynamic shape split_lengths for in place crop…
Browse files Browse the repository at this point in the history
… buffer fusing (#25595)

### Details:
- Add condition for dynamic shape split_lengths for in place crop buffer
fusing

### Tickets:
 - 146739
  • Loading branch information
wilson-seok authored Jul 26, 2024
1 parent a9c8b99 commit e110479
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/crop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ crop_inst::typed_primitive_inst(network& network, crop_node const& node) : paren
"Invalid Batch offset: exceeds data for output!");
}

if (node.can_be_optimized()) {
if (!node.is_dynamic() && node.can_be_optimized()) {
update_output_memory();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,14 @@ bool crop_in_place_optimization::match(const program_node& node,
if (node.is_constant())
return false;

// do not optimize variadic_split crop when either input1 or input2 is not constant.
// VariadicSplit ngraph shape infer requires value of axis(input1) and split_lengths(input2).
// And non_constant input1/input2 makes risky execution of runtime buffer fusing.
auto& crop_node = node.as<crop>();
if ((crop_node.get_primitive()->op_mode == cldnn::crop_ngraph_op_mode::variadic_split) &&
(!crop_node.get_dependency(1).is_constant() || !crop_node.get_dependency(2).is_constant()))
return false;

if (node.get_users().size() > 0) {
if (node.get_program().is_body_program() && node.get_dependency(0).is_type<lstm_elt>()) {
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,91 @@ TEST(prepare_buffer_fusing, in_place_crop_dynamic) {
ASSERT_EQ(output_ptr_3[i], out3[i]);
}

TEST(prepare_buffer_fusing, in_place_crop_dynamic_split_lengths) {
auto& engine = get_test_engine();

auto in_layout = layout{ ov::PartialShape{-1, -1, -1}, data_types::f32, format::bfyx};
auto in2_layout = layout{ ov::PartialShape{-1, -1}, data_types::f32, format::bfyx};
auto input_mem = engine.allocate_memory({ {1, 2, 4}, data_types::f32, format::bfyx });
auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::u8, format::bfyx });
auto bias_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx });
auto scale_mem = engine.allocate_memory({ {8, 1}, data_types::f32, format::bfyx });
auto zp_mem = engine.allocate_memory({ {8, 1}, data_types::f32, format::bfyx });
auto axis_mem = engine.allocate_memory({ {}, data_types::i64, format::bfyx });
auto shapeof_mem = engine.allocate_memory({ {2, 6}, data_types::f32, format::bfyx });

int64_t axis = 2;
set_values(input_mem, { -0.5f, 2.0f, 0.5f, 1.0f,
0.5f, -2.0f, -0.5f, -1.0f });
set_values<int64_t>(axis_mem, {axis});
set_values(shapeof_mem, { 1.0f, 2.0f, 3.0f, 4.0f,
5.0f, 6.0f, 7.0f, 8.0f,
9.0f, 10.0f, 11.0f, 12.0f});
set_values<uint8_t>(weights_mem, { 1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 0,
15, 14, 13, 12,
11, 10, 9, 8,
7, 6, 5, 4,
3, 2, 1, 0});
set_values(bias_mem, { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, 2.0f });
set_values(scale_mem, { 2.0f, 4.0f, -2.0f, -4.0f, 0.5f, -0.5f, 2.0f, 2.0f });
set_values(zp_mem, { 1.0f, 2.0f, 2.0f, 1.0f, 4.0f, 1.0f, 6.0f, 2.0f });

std::vector<float> out1 = { 13.f, 58.f, -11.f, -62.f };
std::vector<float> out2 = { -51.f, -108.f, 18.5f, -18.f, 1.f, -4.f, 57.f, 100.f, -8.5f, 6.f, 13.f, 8.f };
std::vector<float> out3 = { 13.f, 58.f, -51.f, -108.f, 18.5f, -18.f, 1.f, -4.f, -11.f, -62.f, 57.f, 100.f, -8.5f, 6.f, 13.f, 8.f };

cldnn::crop_ngraph_op_mode op_mode = cldnn::crop_ngraph_op_mode::variadic_split;
topology topology(
input_layout("input", in_layout),
input_layout("input_shapeof", in2_layout),
data("axis", axis_mem),
data("weights", weights_mem),
data("bias", bias_mem),
data("scale", scale_mem),
data("zp", zp_mem),
fully_connected("fc", input_info("input"), "weights", "bias", "scale", "zp", data_types::f32, 3, 2),
shape_of("shapeof", input_info("input_shapeof"), cldnn::data_types::i64),
crop("crop1", { input_info("fc"), input_info("axis"), input_info("shapeof") }, cldnn::tensor(1), cldnn::tensor(0), op_mode, 0, axis),
reorder("output1", input_info("crop1"), format::bfyx, data_types::f32),
crop("crop2", { input_info("fc"), input_info("axis"), input_info("shapeof") }, cldnn::tensor(1), cldnn::tensor(0), op_mode, 1, axis),
reshape("reshape", input_info("crop2"), true, std::vector<int64_t>{0, 0, 3, 2}, ov::PartialShape{-1, -1, 3, 2}, cldnn::reshape::reshape_mode::base),
reorder("output2", input_info("reshape"), format::bfyx, data_types::f32, std::vector<float>(), reorder_mean_mode::subtract, padding(), true),
reorder("output3", input_info("fc"), format::bfyx, data_types::f32)
);

auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);

network.set_input_data("input", input_mem);
network.set_input_data("input_shapeof", shapeof_mem);

std::map<cldnn::primitive_id, cldnn::network_output> outputs;
EXPECT_NO_THROW(outputs = network.execute());

auto output = outputs.at("output1").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());

for (size_t i = 0; i < out1.size(); i++)
ASSERT_EQ(output_ptr[i], out1[i]);

auto output_2 = outputs.at("output2").get_memory();
cldnn::mem_lock<float> output_ptr_2(output_2, get_test_stream());

for (size_t i = 0; i < out2.size(); i++)
ASSERT_EQ(output_ptr_2[i], out2[i]);

auto output_3 = outputs.at("output3").get_memory();
cldnn::mem_lock<float> output_ptr_3(output_3, get_test_stream());

for (size_t i = 0; i < out3.size(); i++)
ASSERT_EQ(output_ptr_3[i], out3[i]);
}

// Testing for implicit crop along batch axis and outer padding optimzing.
// Outer padding opt includes opt out of reshape and reorder which has padded input only in batch axis
// This optimzing also includes offset(outer axis padded input) handling of oneDNN primitive.
Expand Down

0 comments on commit e110479

Please sign in to comment.