From ba2425a118c2924dbe4f31cc3707451566c5c312 Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Fri, 9 Jun 2023 20:05:18 +0400 Subject: [PATCH] [GPU] Fix dynamic padding processing of static dimension --- .../intel_gpu/src/kernel_selector/jitter.cpp | 4 +- src/plugins/intel_gpu/src/runtime/layout.cpp | 14 +++-- .../passes/prepare_buffer_fusing_test.cpp | 57 ++++++++++++++++++- 3 files changed, 68 insertions(+), 7 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp b/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp index 1c3360d797c748..e653da1859c3c7 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp @@ -190,14 +190,14 @@ std::string toCodeString(size_t val) { std::string toCodeString(const Tensor::Dim& dim, size_t offset, bool padded, bool pad_is_dynamic, size_t pad_offset) { std::string pad_str = ""; if (padded) { - if (dim.pad.is_dynamic) { + if (pad_is_dynamic) { pad_str = " + (shape_info[" + std::to_string(pad_offset) + "] + shape_info[" + std::to_string(pad_offset + 1) + "])"; } else { pad_str = " + " + std::to_string(dim.pad.Total()); } } - if (dim.is_dynamic) { + if (dim.is_dynamic || pad_is_dynamic) { snprintf(buf, sizeof(buf), "(shape_info[%zu] %s)", offset, pad_str.c_str()); } else { snprintf(buf, sizeof(buf), "%zu", dim.v + (padded ? dim.pad.Total() : 0)); diff --git a/src/plugins/intel_gpu/src/runtime/layout.cpp b/src/plugins/intel_gpu/src/runtime/layout.cpp index f732fc1d95d06d..f2517527063f72 100644 --- a/src/plugins/intel_gpu/src/runtime/layout.cpp +++ b/src/plugins/intel_gpu/src/runtime/layout.cpp @@ -213,10 +213,16 @@ std::string layout::to_short_string() const { s << data_type_traits::name(data_type) << ":" << format.to_string() << ":"; dump_shape(s, size); - if (data_padding) - s << ":pad"; - else - s << ":nopad"; + + if (data_padding.get_dynamic_pad_dims() != tensor(0)) { + s << ":dyn_pad_dims" << data_padding.get_dynamic_pad_dims().to_string(); + } else { + if (data_padding) + s << ":pad"; + else + s << ":nopad"; + } + return s.str(); } diff --git a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp index 8cfbd5afd3dd03..6ffa6250ad40a0 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp @@ -255,4 +255,59 @@ TEST(prepare_buffer_fusing, in_place_concat_dynamic) { for (size_t x = 0; x < out_l.count(); ++x) { ASSERT_EQ(ref_output[x], output_ptr[x]); } -} \ No newline at end of file +} + +TEST(prepare_buffer_fusing, in_place_concat_dynamic__static_dim_dyn_pad) { + auto& engine = get_test_engine(); + auto in_layout1_0 = layout{ ov::PartialShape{-1, 2, -1, -1}, data_types::f32, format::bfyx }; // => {-1, -1, -1, 2} + auto in_layout2_0 = layout{ ov::PartialShape{1, 2, -1, -1}, data_types::f32, format::bfyx }; // => {-1, -1, 1, 2} + auto in_layout1 = layout{ ov::PartialShape{1, 2, 3, 4}, data_types::f32, format::bfyx }; + auto in_layout2 = layout{ ov::PartialShape{1, 2, 4, 1}, data_types::f32, format::bfyx }; + + topology topology; + topology.add(input_layout("input1", in_layout1_0)); + topology.add(input_layout("input2", in_layout2_0)); + topology.add(permute("permute1", input_info("input1"), {0, 3, 2, 1})); + topology.add(permute("permute2", input_info("input2"), {3, 2, 0, 1})); + + topology.add(concatenation("concat", { input_info("permute1"), input_info("permute2") }, 2)); + topology.add(permute("output", input_info("concat"), {0, 2, 3, 1})); + + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + auto prog = program::build_program(engine, topology, config, false, false); + ASSERT_NE(prog, nullptr); + cldnn::network net(prog, 0); + + auto input_memory1 = engine.allocate_memory(in_layout1); + auto input_memory2 = engine.allocate_memory(in_layout2); + set_values(input_memory1, + {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 11.0, 22.0, 33.0, 44.0, 55.0, 66.0, + 111.0, 222.0, 333.0, 444.0, 555.0, 666.0, 1111.0, 2222.0, 3333.0, 4444.0, 5555.0, 6666.0}); + set_values(input_memory2, {1234.0, 2345.0, 3456.0, 4567.0, 5678.0, 6789.0, 9012.0, 9999.0}); + net.set_input_data("input1", input_memory1); + net.set_input_data("input2", input_memory2); + + std::vector ref_output = {1.0, 2.0, 3.0, 4.0, 111.0, 222.0, 333.0, 444.0, 5.0, 6.0, 11.0, + 22.0, 555.0, 666.0, 1111.0, 2222.0, 33.0, 44.0, 55.0, 66.0, 3333.0, 4444.0, + 5555.0, 6666.0, 1234.0, 2345.0, 3456.0, 4567.0, 5678.0, 6789.0, 9012.0, 9999.0}; + + std::map output; + EXPECT_NO_THROW(output = net.execute()); + auto out_l = net.get_output_layout("output"); + auto out_mem = output.at("output").get_memory(); + cldnn::mem_lock output_ptr(out_mem, get_test_stream()); + + const auto& concat_node = net.get_primitive("concat")->get_node(); + auto concat_mem = net.get_primitive("concat")->output_memory_ptr(); + auto permute1_mem = net.get_primitive("permute1")->output_memory_ptr(); + auto permute2_mem = net.get_primitive("permute1")->output_memory_ptr(); + + ASSERT_TRUE(concat_node.can_be_optimized()); + ASSERT_EQ(concat_mem.get(), permute1_mem.get()); + ASSERT_EQ(concat_mem.get(), permute2_mem.get()); + for (size_t x = 0; x < out_l.count(); ++x) { + ASSERT_EQ(ref_output[x], output_ptr[x]); + } +}