From 84e3c7e9eeb01d8c2ab55a233720ac5e702fbd8b Mon Sep 17 00:00:00 2001 From: Lyamin-Roman Date: Thu, 26 Oct 2023 10:26:01 +0900 Subject: [PATCH] [GPU] Allow softmax_bf kernel for axis=X 4d case --- .../kernels/softmax/softmax_kernel_base.cpp | 8 ++++++-- .../shared_tests_instances/single_layer_tests/softmax.cpp | 7 ++++++- .../tests/unit/dynamic_execution/memory_realloc_test.cpp | 2 ++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_base.cpp index 2287562a11c7f5..87361fab0052c4 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_base.cpp @@ -92,9 +92,9 @@ bool SoftmaxKernelBaseBF::Validate(const Params& p, const optional_params& o) co switch (params.dim) { case SoftmaxDim::X: - return !input.Y().is_dynamic && input.Y().v == 1 && + return ((!input.Y().is_dynamic && input.Y().v == 1) || input.GetLayout() == DataLayout::bfyx) && !input.Z().is_dynamic && input.Z().v == 1 && - !input.Feature().is_dynamic && input.Feature().v == 1; + ((!input.Feature().is_dynamic && input.Feature().v == 1) || input.GetLayout() == DataLayout::bfyx); case SoftmaxDim::Y: return !input.X().is_dynamic && input.X().v == 1 && !input.Z().is_dynamic && input.Z().v == 1 && @@ -122,6 +122,10 @@ SoftmaxKernelBase::DispatchData SoftmaxKernelBaseBF::SetDefault(const softmax_pa OPENVINO_ASSERT(input.X().v == 1, "[GPU] SoftmaxKernelBaseBF: input.X() is expected to be 1 while actual value is ", input.X().v); dispatchData.dataSetSize = input.Y().v; dispatchData.dataSetsCount = input.Batch().v * input.Feature().v; + } else if (params.dim == SoftmaxDim::X && (input.Feature().v > 1 || input.Y().v > 1) && input.GetLayout() == DataLayout::bfyx) { + // Flatten BFY for such case + dispatchData.dataSetSize = input.X().v; + dispatchData.dataSetsCount = input.Batch().v * input.Feature().v * input.Y().v; } else { auto flatten_input = input.FlattenFeatureAndSpatials(); dispatchData.dataSetSize = flatten_input.Feature().v; diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/softmax.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/softmax.cpp index e0757e28927316..92da7f1b0b44c8 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/softmax.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/softmax.cpp @@ -88,13 +88,18 @@ INSTANTIATE_TEST_SUITE_P( testing::Values(ov::AnyMap())), SoftMax8LayerTest::getTestCaseName); +const std::vector stableDiffusionShapes = { + {16, 4096, 4096}, + {2, 8, 4096, 4096} +}; + INSTANTIATE_TEST_SUITE_P( smoke_SoftMaxStableDiffusion, SoftMax8LayerTest, testing::Combine(testing::ValuesIn(netPrecisions), ::testing::Values(ov::element::undefined), ::testing::Values(ov::element::undefined), - ::testing::ValuesIn(ov::test::static_shapes_to_test_representation({{16, 4096, 4096}})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(stableDiffusionShapes)), testing::Values(-1), testing::Values(ov::test::utils::DEVICE_GPU), testing::Values(ov::AnyMap())), diff --git a/src/plugins/intel_gpu/tests/unit/dynamic_execution/memory_realloc_test.cpp b/src/plugins/intel_gpu/tests/unit/dynamic_execution/memory_realloc_test.cpp index b81a87650a436c..1febfc4cd135ab 100644 --- a/src/plugins/intel_gpu/tests/unit/dynamic_execution/memory_realloc_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/dynamic_execution/memory_realloc_test.cpp @@ -151,6 +151,8 @@ TEST(softmax_gpu_dynamic_f32_test_upper_bound, input_same_values) { format::bfyx); auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + ov::intel_gpu::ImplementationDesc softmax_impl = { format::bfyx, "softmax_gpu_ref" }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "softmax", softmax_impl } })); network network(engine, topology(input_layout("input", in_layout), reorder("reorder", input_info("input"), format::bfyx, data_types::f16), softmax("softmax", input_info("reorder"), 3),