[GPU] Fix incorrect selection of preferred formats for weights in cas…

…e of OneDNN (openvinotoolkit#25697) ### Tickets: - *[146165](https://jira.devtools.intel.com/browse/CVS-146165)*
evkotov · Jul 25, 2024 · 8f07b92 · 8f07b92
1 parent 8b18e87
commit 8f07b92
Show file tree

Hide file tree

Showing 3 changed files with 62 additions and 3 deletions.
diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@@ -1937,13 +1937,17 @@ void layout_optimizer::select_preferred_formats_for_onednn(program_node& node, d
                 prim_input = node.get_dependency_index(node.as<convolution>().input());
             if (node.is_type<deconvolution>())
                 prim_input = node.get_dependency_index(node.as<deconvolution>().input());
+            size_t prim_weights = node.get_primitive()->input_size();
 
             // Note: did not handle attribute properly. especially for zero-point
             cldnn::format src_fmt = format::any;
-            if (idx == prim_input)
+            if (idx == prim_input) {
                 src_fmt = onednn::find_data_format(prim_desc.src_desc());
-            else  // Dep for fused post ops
+            } else if (idx == prim_weights) {
+                src_fmt = format::custom;
+            } else {  // Dep for fused post ops
                 src_fmt = onednn::find_data_format(prim_desc.dst_desc());
+            }
 
             // WA: shallow convolution needs to set input format by bfyx.
             //     onednn recommended byxf for input format. It will insert reorder before shallow conv.

diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp
@@ -109,7 +109,8 @@ layout reshape_inst::calc_output_layout(reshape_node const& node, kernel_impl_pa
     auto desc = impl_param.typed_desc<reshape>();
     if (desc->output_shape.count() == 0) {
         if (desc->output_partial_shape.size() != 0) {
-            return layout{desc->output_partial_shape, input_layout.data_type, input_layout.format};
+            format out_fmt = format::adjust_to_rank(input_layout.format, desc->output_partial_shape.rank().get_length());
+            return layout{desc->output_partial_shape, input_layout.data_type, out_fmt};
         } else {
             OPENVINO_ASSERT("[GPU] Output shape is not provided");
         }

diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp
@@ -11,6 +11,7 @@
 #include <intel_gpu/primitives/data.hpp>
 #include <intel_gpu/primitives/crop.hpp>
 #include <intel_gpu/primitives/reorder.hpp>
+#include <intel_gpu/primitives/reshape.hpp>
 
 #include <algorithm>
 #include <array>
@@ -9933,6 +9934,59 @@ TEST(convolution_gpu_onednn, has_proper_synchronization) {
     }
 }
 
+// A test that detects crashes in OneDNN convolution selection checks
+TEST(convolution_gpu_onednn, grouped_runtime_weights) {
+    auto& engine = get_test_engine();
+
+    if (!engine.get_device_info().supports_immad)
+        return;
+
+    tests::random_generator rg(GET_SUITE_NAME);
+
+    int64_t input_b = 1, input_f = 256, input_y = 29, input_x = 29;
+    auto input_size = ov::PartialShape{ input_b, input_f, input_y, input_x };
+    auto input_data = rg.generate_random_4d<ov::float16>(input_b, input_f, input_y, input_x, -1, 1);
+    auto input_data_byxf = flatten_4d(format::byxf, input_data);
+    auto input_mem = engine.allocate_memory({ input_size, data_types::f16, format::byxf });
+    set_values(input_mem, input_data_byxf);
+
+    int64_t weights_b = 1, weights_f = 256, weights_y = 5, weights_x = 5;
+    auto weights_size = ov::PartialShape{ weights_b, weights_f, weights_y, weights_x };
+    auto weights_data = rg.generate_random_4d<ov::float16>(weights_b, weights_f, weights_y, weights_x, -1, 1);
+    auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data);
+    auto weights_mem = engine.allocate_memory({ weights_size, data_types::f16, format::bfyx });
+    set_values(weights_mem, weights_data_bfyx);
+
+    auto input = input_layout("input", input_mem->get_layout());
+    auto weights = input_layout("weights", weights_mem->get_layout());
+    auto weights_reshape = reshape("reshaped_weights", input_info("weights"), true, { 256, 1, 1, 5, 5 }, { 256, 1, 1, 5, 5 });
+    auto conv = convolution("conv", input_info("input"), "reshaped_weights", no_bias, 256, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, true);
+    auto output_reorder = reorder("reorder", input_info("conv"), { data_types::f32, format::bfyx, { 1, 256, 25, 25 } });
+
+    topology topology(input, weights, weights_reshape, conv, output_reorder);
+
+    ExecutionConfig config = get_test_default_config(engine);
+    ov::intel_gpu::ImplementationDesc conv_impl = { format::byxf, "", impl_types::onednn };
+    config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl }}));
+    config.set_property(ov::intel_gpu::optimize_data(true));
+
+    network network(engine, topology, config);
+
+    network.set_input_data("input", input_mem);
+    network.set_input_data("weights", weights_mem);
+
+    auto output = network.execute();
+
+    ASSERT_EQ(output.size(), size_t(1));
+    ASSERT_EQ(output.begin()->first, "reorder");
+
+    auto output_memory = output.at("reorder").get_memory();
+    auto output_layout = output_memory->get_layout();
+    cldnn::mem_lock<float> output_ptr(output_memory, get_test_stream());
+
+    ASSERT_EQ(output_layout.get_shape(), ov::Shape({1, 256, 25, 25}));
+}
+
 #endif   // ENABLE_ONEDNN_FOR_GPU
 
 template <typename T>