diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp index 45a233bbebc747..f4495fb5dd1645 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp @@ -48,7 +48,7 @@ struct FullyConnectedImplementationManager : public ImplementationManager { one_of(wei_dt, {data_types::i8, data_types::u8}) && one_of(out_dt, {data_types::f16, data_types::f32, data_types::i32, data_types::i8, data_types::u8}); bool compressed_case = fc_prim->compressed_weights && - one_of(in0_dt, {data_types::f16, data_types::f32}) && + one_of(in0_dt, {data_types::f16, data_types::f32, data_types::i8}) && one_of(wei_dt, {data_types::u8, data_types::i8, data_types::u4, data_types::i4}) && one_of(out_dt, {data_types::f16, data_types::f32}); if (!f16f16_case && !f32f32_case && !u8s8_case && !compressed_case) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_opt.cl index 628bc69f3886df..6db1790844e501 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_opt.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_opt.cl @@ -72,7 +72,7 @@ KERNEL(dynamic_quantize_gpu_opt)( continue; val[i] *= scale; - VSTORE_N(CONVERT_CHAR_N(val[i]), 0, output + offset + ((local_id * iteration + i) * block_size)); + VSTORE_N(CAT(CONVERT_CHAR_N, _rte)(val[i]), 0, output + offset + ((local_id * iteration + i) * block_size)); } if (sglid == 0 && local_id == 0) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_ref.cl index 436276a67e48c0..858571fea71914 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_ref.cl @@ -44,7 +44,7 @@ KERNEL(dynamic_quantize_gpu_ref)( for (x = 0; x < INPUT0_SIZE_X / 8; x++) { half8 val = as_half8(vload8(0, (ushort*)input + in_offset + x * 8)); val *= scale; - vstore8(convert_char8(val), 0, output + out_offset + x * 8); + vstore8(convert_char8_rte(val), 0, output + out_offset + x * 8); } x *= 8; for (; x < INPUT0_SIZE_X; x++)