From 1ff5508e701fceb260475538b20056a67f86db11 Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Thu, 21 Mar 2024 09:20:40 +0900 Subject: [PATCH] [GPU] Increase FC tile_b size for INT4 shape agnostic kernel (#23532) ### Details: - Increased FC tile_B size for INT4 shape agnostic kernel for improving context processing ### Tickets: - 133444 --- .../fully_connected/fully_connected_kernel_bf_tiled.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 7c6ace7c20c7de..f39f7deaec6614 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -178,12 +178,13 @@ bool TuneParamsSelector::VerifyTuneParams(const fully_connected_params& params, return false; if (tparams.kernel_type == FullyConnected_bf_tiled::KernelType::SLM) { + bool is_i4_u4 = (params.weights.GetDType() == WeightsType::INT4 || params.weights.GetDType() == WeightsType::UINT4); const auto required_batch_alignment = 64; if (!params.is_shape_agnostic && (!IsAligned(output_b, required_batch_alignment) || output_b < 256)) return false; const auto required_tile_b = 8; - if (tparams.tile_b != required_tile_b) + if ((tparams.tile_b != required_tile_b) && !is_i4_u4) return false; const auto required_tile_ofm = 2; @@ -248,6 +249,10 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, } else { // Try to use SLM kernels if possible if (preferred_kernel_type != KernelType::DEFAULT) { + if (params.is_shape_agnostic) { + selector.Case(tune_params(16, 2, 2, 4, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)) + .Case(tune_params(16, 2, 1, 4, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)); + } selector.Case(tune_params(8, 2, 2, 4, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)) .Case(tune_params(8, 2, 1, 4, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)); }