Skip to content

Commit

Permalink
add condition for dynamic shape split_lengths for in place crop buffe…
Browse files Browse the repository at this point in the history
…r fusing
  • Loading branch information
wilson-seok committed Jul 17, 2024
1 parent dcdfdc5 commit 679a50a
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/crop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ crop_inst::typed_primitive_inst(network& network, crop_node const& node) : paren
"Invalid Batch offset: exceeds data for output!");
}

if (node.can_be_optimized()) {
if (!node.is_dynamic() && node.can_be_optimized()) {
update_output_memory();
}
}
Expand Down
8 changes: 7 additions & 1 deletion src/plugins/intel_gpu/src/graph/primitive_inst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1432,7 +1432,13 @@ void primitive_inst::do_runtime_in_place_crop() {

for (auto u : get_user_insts()) {
if (u->get_node().is_type<crop>()) {
if (u->get_node().can_be_optimized()) {
// Check whether the crop is VariadicSplit op or not.
// The VariadicSplit shape infer requires executed value of input[2](split_lengths)
// So skip update_shape here when _node is input[2] of crop
auto crop_op_mode = u->_impl_params->typed_desc<crop>()->op_mode;
bool is_split_lengths = (crop_op_mode == cldnn::crop_ngraph_op_mode::variadic_split) &&
(_node->get_unique_id() == u->_deps[2].first->_node->get_unique_id());
if (u->get_node().can_be_optimized() && !is_split_lengths) {
GPU_DEBUG_TRACE_DETAIL << "[In place crop] update shape for " << u->id() << std::endl;
u->update_shape();
u->update_shape_done_by_other = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,90 @@ TEST(prepare_buffer_fusing, in_place_crop_dynamic) {
ASSERT_EQ(output_ptr_3[i], out3[i]);
}

TEST(prepare_buffer_fusing, in_place_crop_dynamic_split_lengths) {
auto& engine = get_test_engine();

auto in_layout = layout{ ov::PartialShape{-1, -1, -1}, data_types::f32, format::bfyx};
auto in2_layout = layout{ ov::PartialShape{-1, -1}, data_types::f32, format::bfyx};
auto input_mem = engine.allocate_memory({ {1, 2, 4}, data_types::f32, format::bfyx });
auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::u8, format::bfyx });
auto bias_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx });
auto scale_mem = engine.allocate_memory({ {8, 1}, data_types::f32, format::bfyx });
auto zp_mem = engine.allocate_memory({ {8, 1}, data_types::f32, format::bfyx });
auto axis_mem = engine.allocate_memory({ {}, data_types::i64, format::bfyx });
auto shapeof_mem = engine.allocate_memory({ {2, 6}, data_types::f32, format::bfyx });

int64_t axis = 2;
set_values(input_mem, { -0.5f, 2.0f, 0.5f, 1.0f,
0.5f, -2.0f, -0.5f, -1.0f });
set_values<int64_t>(axis_mem, {axis});
set_values(shapeof_mem, { 1.0f, 2.0f, 3.0f, 4.0f,
5.0f, 6.0f, 7.0f, 8.0f,
9.0f, 10.0f, 11.0f, 12.0f});
set_values<uint8_t>(weights_mem, { 1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 0,
15, 14, 13, 12,
11, 10, 9, 8,
7, 6, 5, 4,
3, 2, 1, 0});
set_values(bias_mem, { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, 2.0f });
set_values(scale_mem, { 2.0f, 4.0f, -2.0f, -4.0f, 0.5f, -0.5f, 2.0f, 2.0f });
set_values(zp_mem, { 1.0f, 2.0f, 2.0f, 1.0f, 4.0f, 1.0f, 6.0f, 2.0f });

std::vector<float> out1 = { 13.f, 58.f, -11.f, -62.f };
std::vector<float> out2 = { -51.f, -108.f, 18.5f, -18.f, 1.f, -4.f, 57.f, 100.f, -8.5f, 6.f, 13.f, 8.f };
std::vector<float> out3 = { 13.f, 58.f, -51.f, -108.f, 18.5f, -18.f, 1.f, -4.f, -11.f, -62.f, 57.f, 100.f, -8.5f, 6.f, 13.f, 8.f };

cldnn::crop_ngraph_op_mode op_mode = cldnn::crop_ngraph_op_mode::variadic_split;
topology topology(
input_layout("input", in_layout),
input_layout("input_shapeof", in2_layout),
data("axis", axis_mem),
data("weights", weights_mem),
data("bias", bias_mem),
data("scale", scale_mem),
data("zp", zp_mem),
fully_connected("fc", input_info("input"), "weights", "bias", "scale", "zp", data_types::f32, padding(), 3, 2),
shape_of("shapeof", input_info("input_shapeof"), cldnn::data_types::i64),
crop("crop1", { input_info("fc"), input_info("axis"), input_info("shapeof") }, cldnn::tensor(1), cldnn::tensor(0), op_mode, 0, axis),
reorder("output1", input_info("crop1"), format::bfyx, data_types::f32),
crop("crop2", { input_info("fc"), input_info("axis"), input_info("shapeof") }, cldnn::tensor(1), cldnn::tensor(0), op_mode, 1, axis),
reshape("reshape", input_info("crop2"), true, std::vector<int64_t>{0, 0, 3, 2}, ov::PartialShape{-1, -1, 3, 2}, cldnn::reshape::reshape_mode::base),
reorder("output2", input_info("reshape"), format::bfyx, data_types::f32, std::vector<float>(), reorder_mean_mode::subtract, padding(), true),
reorder("output3", input_info("fc"), format::bfyx, data_types::f32)
);

auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);

network.set_input_data("input", input_mem);
network.set_input_data("input_shapeof", shapeof_mem);

std::map<cldnn::primitive_id, cldnn::network_output> outputs;
EXPECT_NO_THROW(outputs = network.execute());

auto output = outputs.at("output1").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());

for (size_t i = 0; i < out1.size(); i++)
ASSERT_EQ(output_ptr[i], out1[i]);

auto output_2 = outputs.at("output2").get_memory();
cldnn::mem_lock<float> output_ptr_2(output_2, get_test_stream());

for (size_t i = 0; i < out2.size(); i++)
ASSERT_EQ(output_ptr_2[i], out2[i]);

auto output_3 = outputs.at("output3").get_memory();
cldnn::mem_lock<float> output_ptr_3(output_3, get_test_stream());

for (size_t i = 0; i < out3.size(); i++)
ASSERT_EQ(output_ptr_3[i], out3[i]);
}
// Testing for implicit crop along batch axis and outer padding optimzing.
// Outer padding opt includes opt out of reshape and reorder which has padded input only in batch axis
// This optimzing also includes offset(outer axis padded input) handling of oneDNN primitive.
Expand Down

0 comments on commit 679a50a

Please sign in to comment.