From 4fc7d1854b4d50e3baded9365b38f34bd93fd9ec Mon Sep 17 00:00:00 2001 From: Kelvin Choi Date: Wed, 27 Nov 2024 19:03:11 +0900 Subject: [PATCH] [GPU] Add fs_b_yx_fsv32 format for pooling int8 ref kernel --- .../cl_kernels/pooling_gpu_int8_ref.cl | 2 +- .../pooling/pooling_kernel_gpu_int8_ref.cpp | 2 ++ .../pooling/pooling_kernel_gpu_ref.cpp | 2 -- .../unit/test_cases/pooling_gpu_test.cpp | 29 ++++++++++++++----- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pooling_gpu_int8_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pooling_gpu_int8_ref.cl index d2f374215a1d4d..c2c47394b0b147 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pooling_gpu_int8_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pooling_gpu_int8_ref.cl @@ -74,7 +74,7 @@ KERNEL(pooling_gpu_int8_ref)( const uint f = bf / INPUT0_BATCH_NUM; const uint b = bf % INPUT0_BATCH_NUM; const uint z = 0; -#elif OUTPUT_LAYOUT_B_FS_YX_FSV16 || OUTPUT_LAYOUT_BS_FS_YX_BSV32_FSV32 || OUTPUT_LAYOUT_BS_FS_YX_BSV16_FSV32 +#elif OUTPUT_LAYOUT_B_FS_YX_FSV16 || OUTPUT_LAYOUT_BS_FS_YX_BSV32_FSV32 || OUTPUT_LAYOUT_BS_FS_YX_BSV16_FSV32 || OUTPUT_LAYOUT_FS_B_YX_FSV32 const uint x = get_global_id(1); const uint y = get_global_id(2); const uint bf = (uint)get_global_id(0); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_int8_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_int8_ref.cpp index 0c3ccf6d3604f1..9eee25f57ef600 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_int8_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_int8_ref.cpp @@ -18,6 +18,7 @@ ParamsKey PoolingKernelGPUInt8Ref::GetSupportedKey() const { k.EnableInputLayout(DataLayout::bfzyx); k.EnableInputLayout(DataLayout::yxfb); k.EnableInputLayout(DataLayout::byxf); + k.EnableInputLayout(DataLayout::fs_b_yx_fsv32); k.EnableInputLayout(DataLayout::b_fs_yx_fsv4); k.EnableInputLayout(DataLayout::b_fs_yx_fsv32); k.EnableInputLayout(DataLayout::b_fs_zyx_fsv32); @@ -30,6 +31,7 @@ ParamsKey PoolingKernelGPUInt8Ref::GetSupportedKey() const { k.EnableOutputLayout(DataLayout::bfzyx); k.EnableOutputLayout(DataLayout::yxfb); k.EnableOutputLayout(DataLayout::byxf); + k.EnableOutputLayout(DataLayout::fs_b_yx_fsv32); k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4); k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32); k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv32); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_ref.cpp index 9aca90f891c652..3e620c848a2844 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_ref.cpp @@ -9,8 +9,6 @@ ParamsKey PoolingKernelGPURef::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::F16); k.EnableInputDataType(Datatype::F32); - k.EnableInputDataType(Datatype::UINT8); - k.EnableInputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::UINT8); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/pooling_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/pooling_gpu_test.cpp index a2b1a5134cf85c..f47c257064d44e 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/pooling_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/pooling_gpu_test.cpp @@ -1521,13 +1521,13 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_int8) { auto& engine = get_test_engine(); - auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 32, 1, 2 } }); + auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 32, 1, 2 } }); topology topology; topology.add(input_layout("input", input_prim->get_layout())); - topology.add(reorder("reorder_input", input_info("input"), layout(data_types::i8, format::fs_b_yx_fsv32, { 1, 32, 1, 2 }))); + topology.add(reorder("reorder_input", input_info("input"), layout(data_types::i8, format::fs_b_yx_fsv32, { 2, 32, 1, 2 }))); topology.add(pooling("avg_pooling", input_info("reorder_input"), pooling_mode::average, { 7, 7 }, { 2, 2 })); - topology.add(reorder("reorder_after_pooling", input_info("avg_pooling"), layout(data_types::f32, format::fs_b_yx_fsv32, { 1, 32, 1, 1 }))); + topology.add(reorder("reorder_after_pooling", input_info("avg_pooling"), layout(data_types::f32, format::fs_b_yx_fsv32, { 2, 32, 1, 1 }))); network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { 41.f, 42.f, 43.f, 44.f, 45.f, 46.f, 47.f, 48.f, @@ -1535,6 +1535,16 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_int8) 21.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 28.f, 31.f, 32.f, 33.f, 34.f, 35.f, 36.f, 37.f, 38.f, + 31.f, 32.f, 33.f, 34.f, 35.f, 36.f, 37.f, 38.f, + 21.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 28.f, + 51.f, 52.f, 53.f, 54.f, 55.f, 56.f, 57.f, 58.f, + 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, + + 41.f, 42.f, 43.f, 44.f, 45.f, 46.f, 47.f, 48.f, + 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, + 21.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 28.f, + 31.f, 32.f, 33.f, 34.f, 35.f, 36.f, 37.f, 38.f, + 31.f, 32.f, 33.f, 34.f, 35.f, 36.f, 37.f, 38.f, 21.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 28.f, 51.f, 52.f, 53.f, 54.f, 55.f, 56.f, 57.f, 58.f, @@ -1549,10 +1559,15 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_int8) cldnn::mem_lock output_ptr(output_prim, get_test_stream()); - std::vector ref_data = { 36.f, 37.f, 38.f, 39.f, 40.f, 41.f, 42.f, 43.f, - 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, - 36.f, 37.f, 38.f, 39.f, 40.f, 41.f, 42.f, 43.f, - 21.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 28.f }; + std::vector ref_data = { 41.f, 43.f, 45.f, 47.f, 11.f, 13.f, 15.f, 17.f, + 21.f, 23.f, 25.f, 27.f, 31.f, 33.f, 35.f, 37.f, + 31.f, 33.f, 35.f, 37.f, 21.f, 23.f, 25.f, 27.f, + 51.f, 53.f, 55.f, 57.f, 11.f, 13.f, 15.f, 17.f, + + 42.f, 44.f, 46.f, 48.f, 12.f, 14.f, 16.f, 18.f, + 22.f, 24.f, 26.f, 28.f, 32.f, 34.f, 36.f, 38.f, + 32.f, 34.f, 36.f, 38.f, 22.f, 24.f, 26.f, 28.f, + 52.f, 54.f, 56.f, 58.f, 12.f, 14.f, 16.f, 18.f }; for (size_t i = 0; i < ref_data.size(); i++) { ASSERT_EQ(ref_data[i], float(output_ptr[i]));