Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] Fix dynamic padding processing of static dimension #17978

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/plugins/intel_gpu/src/kernel_selector/jitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,14 +190,14 @@ std::string toCodeString(size_t val) {
std::string toCodeString(const Tensor::Dim& dim, size_t offset, bool padded, bool pad_is_dynamic, size_t pad_offset) {
std::string pad_str = "";
if (padded) {
if (dim.pad.is_dynamic) {
if (pad_is_dynamic) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you tell me what was the situaton?
dim.pad.is_dynamic was not set as dynamic but the dim.pad was actually dynamic?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@yeonbok hmm, the issue happens for concat's dependency when dimension is static (the dimension on which we apply concat) and in the same time it has assigned dynamic padding (because of concat optimization). In that case during jitter code generation at line 200 we will chose else branch and try to obtain dim.pad.Total() for dynamic padding and get error
You can try to reproduce this situation with test from my PR

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I will check :)

pad_str = " + (shape_info[" + std::to_string(pad_offset) + "] + shape_info[" +
std::to_string(pad_offset + 1) + "])";
} else {
pad_str = " + " + std::to_string(dim.pad.Total());
}
}
if (dim.is_dynamic) {
if (dim.is_dynamic || pad_is_dynamic) {
snprintf(buf, sizeof(buf), "(shape_info[%zu] %s)", offset, pad_str.c_str());
} else {
snprintf(buf, sizeof(buf), "%zu", dim.v + (padded ? dim.pad.Total() : 0));
Expand Down
14 changes: 10 additions & 4 deletions src/plugins/intel_gpu/src/runtime/layout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,16 @@ std::string layout::to_short_string() const {

s << data_type_traits::name(data_type) << ":" << format.to_string() << ":";
dump_shape(s, size);
if (data_padding)
s << ":pad";
else
s << ":nopad";

if (data_padding.get_dynamic_pad_dims() != tensor(0)) {
s << ":dyn_pad_dims" << data_padding.get_dynamic_pad_dims().to_string();
} else {
if (data_padding)
s << ":pad";
else
s << ":nopad";
}

return s.str();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,4 +255,59 @@ TEST(prepare_buffer_fusing, in_place_concat_dynamic) {
for (size_t x = 0; x < out_l.count(); ++x) {
ASSERT_EQ(ref_output[x], output_ptr[x]);
}
}
}

TEST(prepare_buffer_fusing, in_place_concat_dynamic__static_dim_dyn_pad) {
auto& engine = get_test_engine();
auto in_layout1_0 = layout{ ov::PartialShape{-1, 2, -1, -1}, data_types::f32, format::bfyx }; // => {-1, -1, -1, 2}
auto in_layout2_0 = layout{ ov::PartialShape{1, 2, -1, -1}, data_types::f32, format::bfyx }; // => {-1, -1, 1, 2}
auto in_layout1 = layout{ ov::PartialShape{1, 2, 3, 4}, data_types::f32, format::bfyx };
auto in_layout2 = layout{ ov::PartialShape{1, 2, 4, 1}, data_types::f32, format::bfyx };

topology topology;
topology.add(input_layout("input1", in_layout1_0));
topology.add(input_layout("input2", in_layout2_0));
topology.add(permute("permute1", input_info("input1"), {0, 3, 2, 1}));
topology.add(permute("permute2", input_info("input2"), {3, 2, 0, 1}));

topology.add(concatenation("concat", { input_info("permute1"), input_info("permute2") }, 2));
topology.add(permute("output", input_info("concat"), {0, 2, 3, 1}));

ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
auto prog = program::build_program(engine, topology, config, false, false);
ASSERT_NE(prog, nullptr);
cldnn::network net(prog, 0);

auto input_memory1 = engine.allocate_memory(in_layout1);
auto input_memory2 = engine.allocate_memory(in_layout2);
set_values<float>(input_memory1,
{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 11.0, 22.0, 33.0, 44.0, 55.0, 66.0,
111.0, 222.0, 333.0, 444.0, 555.0, 666.0, 1111.0, 2222.0, 3333.0, 4444.0, 5555.0, 6666.0});
set_values<float>(input_memory2, {1234.0, 2345.0, 3456.0, 4567.0, 5678.0, 6789.0, 9012.0, 9999.0});
net.set_input_data("input1", input_memory1);
net.set_input_data("input2", input_memory2);

std::vector<float> ref_output = {1.0, 2.0, 3.0, 4.0, 111.0, 222.0, 333.0, 444.0, 5.0, 6.0, 11.0,
22.0, 555.0, 666.0, 1111.0, 2222.0, 33.0, 44.0, 55.0, 66.0, 3333.0, 4444.0,
5555.0, 6666.0, 1234.0, 2345.0, 3456.0, 4567.0, 5678.0, 6789.0, 9012.0, 9999.0};

std::map<cldnn::primitive_id, cldnn::network_output> output;
EXPECT_NO_THROW(output = net.execute());
auto out_l = net.get_output_layout("output");
auto out_mem = output.at("output").get_memory();
cldnn::mem_lock<float> output_ptr(out_mem, get_test_stream());

const auto& concat_node = net.get_primitive("concat")->get_node();
auto concat_mem = net.get_primitive("concat")->output_memory_ptr();
auto permute1_mem = net.get_primitive("permute1")->output_memory_ptr();
auto permute2_mem = net.get_primitive("permute1")->output_memory_ptr();

ASSERT_TRUE(concat_node.can_be_optimized());
ASSERT_EQ(concat_mem.get(), permute1_mem.get());
ASSERT_EQ(concat_mem.get(), permute2_mem.get());
for (size_t x = 0; x < out_l.count(); ++x) {
ASSERT_EQ(ref_output[x], output_ptr[x]);
}
}