From 8f07b923116fb4de9503b0dd725a017356b1f529 Mon Sep 17 00:00:00 2001 From: Roman Lyamin Date: Thu, 25 Jul 2024 10:04:25 +0400 Subject: [PATCH] [GPU] Fix incorrect selection of preferred formats for weights in case of OneDNN (#25697) ### Tickets: - *[146165](https://jira.devtools.intel.com/browse/CVS-146165)* --- .../intel_gpu/src/graph/layout_optimizer.cpp | 8 ++- src/plugins/intel_gpu/src/graph/reshape.cpp | 3 +- .../unit/test_cases/convolution_gpu_test.cpp | 54 +++++++++++++++++++ 3 files changed, 62 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index b2acc2abf1c173..bcada1fa769fea 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1937,13 +1937,17 @@ void layout_optimizer::select_preferred_formats_for_onednn(program_node& node, d prim_input = node.get_dependency_index(node.as().input()); if (node.is_type()) prim_input = node.get_dependency_index(node.as().input()); + size_t prim_weights = node.get_primitive()->input_size(); // Note: did not handle attribute properly. especially for zero-point cldnn::format src_fmt = format::any; - if (idx == prim_input) + if (idx == prim_input) { src_fmt = onednn::find_data_format(prim_desc.src_desc()); - else // Dep for fused post ops + } else if (idx == prim_weights) { + src_fmt = format::custom; + } else { // Dep for fused post ops src_fmt = onednn::find_data_format(prim_desc.dst_desc()); + } // WA: shallow convolution needs to set input format by bfyx. // onednn recommended byxf for input format. It will insert reorder before shallow conv. diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index eed87ed759211d..5cbef11dd3b045 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -109,7 +109,8 @@ layout reshape_inst::calc_output_layout(reshape_node const& node, kernel_impl_pa auto desc = impl_param.typed_desc(); if (desc->output_shape.count() == 0) { if (desc->output_partial_shape.size() != 0) { - return layout{desc->output_partial_shape, input_layout.data_type, input_layout.format}; + format out_fmt = format::adjust_to_rank(input_layout.format, desc->output_partial_shape.rank().get_length()); + return layout{desc->output_partial_shape, input_layout.data_type, out_fmt}; } else { OPENVINO_ASSERT("[GPU] Output shape is not provided"); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp index 0bf595e124db89..132b2378420a03 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -9933,6 +9934,59 @@ TEST(convolution_gpu_onednn, has_proper_synchronization) { } } +// A test that detects crashes in OneDNN convolution selection checks +TEST(convolution_gpu_onednn, grouped_runtime_weights) { + auto& engine = get_test_engine(); + + if (!engine.get_device_info().supports_immad) + return; + + tests::random_generator rg(GET_SUITE_NAME); + + int64_t input_b = 1, input_f = 256, input_y = 29, input_x = 29; + auto input_size = ov::PartialShape{ input_b, input_f, input_y, input_x }; + auto input_data = rg.generate_random_4d(input_b, input_f, input_y, input_x, -1, 1); + auto input_data_byxf = flatten_4d(format::byxf, input_data); + auto input_mem = engine.allocate_memory({ input_size, data_types::f16, format::byxf }); + set_values(input_mem, input_data_byxf); + + int64_t weights_b = 1, weights_f = 256, weights_y = 5, weights_x = 5; + auto weights_size = ov::PartialShape{ weights_b, weights_f, weights_y, weights_x }; + auto weights_data = rg.generate_random_4d(weights_b, weights_f, weights_y, weights_x, -1, 1); + auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); + auto weights_mem = engine.allocate_memory({ weights_size, data_types::f16, format::bfyx }); + set_values(weights_mem, weights_data_bfyx); + + auto input = input_layout("input", input_mem->get_layout()); + auto weights = input_layout("weights", weights_mem->get_layout()); + auto weights_reshape = reshape("reshaped_weights", input_info("weights"), true, { 256, 1, 1, 5, 5 }, { 256, 1, 1, 5, 5 }); + auto conv = convolution("conv", input_info("input"), "reshaped_weights", no_bias, 256, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, true); + auto output_reorder = reorder("reorder", input_info("conv"), { data_types::f32, format::bfyx, { 1, 256, 25, 25 } }); + + topology topology(input, weights, weights_reshape, conv, output_reorder); + + ExecutionConfig config = get_test_default_config(engine); + ov::intel_gpu::ImplementationDesc conv_impl = { format::byxf, "", impl_types::onednn }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl }})); + config.set_property(ov::intel_gpu::optimize_data(true)); + + network network(engine, topology, config); + + network.set_input_data("input", input_mem); + network.set_input_data("weights", weights_mem); + + auto output = network.execute(); + + ASSERT_EQ(output.size(), size_t(1)); + ASSERT_EQ(output.begin()->first, "reorder"); + + auto output_memory = output.at("reorder").get_memory(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + + ASSERT_EQ(output_layout.get_shape(), ov::Shape({1, 256, 25, 25})); +} + #endif // ENABLE_ONEDNN_FOR_GPU template