diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_b_fs_zyx_fsv16_imad.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_b_fs_zyx_fsv16_imad.cl index 3b55a1a401c72a..ca8342d122507a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_b_fs_zyx_fsv16_imad.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_b_fs_zyx_fsv16_imad.cl @@ -126,26 +126,45 @@ KERNEL(pooling_gpu_b_fs_zyx_fsv16)( ACTIVATION_VEC16 pool_result; #if defined AVG_POOLING #if INPUT0_FEATURE_NUM % FEATURE_SLICE_SIZE != 0 - __attribute__((opencl_unroll_hint(INPUT0_FEATURE_NUM % FEATURE_SLICE_SIZE))) - for (uint i = 0; i < INPUT0_FEATURE_NUM % FEATURE_SLICE_SIZE; i++) { + if (last_in_f_group) { + __attribute__((opencl_unroll_hint(INPUT0_FEATURE_NUM % FEATURE_SLICE_SIZE))) + for (uint i = 0; i < INPUT0_FEATURE_NUM % FEATURE_SLICE_SIZE; i++) { +#if ENABLE_ROUND +#if defined(DYNAMIC_KERNEL_DIVIDER) || defined(DYNAMIC_WITH_PADDING_KERNEL_DIVIDER) + pool_result[i] = convert_int(round(((float)result[i] / max(num_elements, (uint)1)))); #else - __attribute__((opencl_unroll_hint(FEATURE_SLICE_SIZE))) - for (uint i = 0; i < FEATURE_SLICE_SIZE; i++) { + pool_result[i] = convert_int(round((float)result[i] / (int)(POOL_SIZE_X * INPUT0_SIZE_Z * INPUT0_SIZE_Y))); +#endif +#else // ENABLE_ROUND +#if defined(DYNAMIC_KERNEL_DIVIDER) || defined(DYNAMIC_WITH_PADDING_KERNEL_DIVIDER) + pool_result[i] = (float)result[i] / max(num_elements, (uint)1); +#else + pool_result[i] = (float)result[i] / (int)(POOL_SIZE_X * INPUT0_SIZE_Z * INPUT0_SIZE_Y); #endif +#endif // ENABLE_ROUND + } + } else { +#endif + __attribute__((opencl_unroll_hint(FEATURE_SLICE_SIZE))) + for (uint i = 0; i < FEATURE_SLICE_SIZE; i++) { +// INPUT0_FEATURE_NUM % FEATURE_SLICE_SIZE != 0 #if ENABLE_ROUND #if defined(DYNAMIC_KERNEL_DIVIDER) || defined(DYNAMIC_WITH_PADDING_KERNEL_DIVIDER) - pool_result[i] = convert_int(round(((float)result[i] / max(num_elements, (uint)1)))); + pool_result[i] = convert_int(round(((float)result[i] / max(num_elements, (uint)1)))); #else - pool_result[i] = convert_int(round((float)result[i] / (int)(POOL_SIZE_X * INPUT0_SIZE_Z * INPUT0_SIZE_Y))); + pool_result[i] = convert_int(round((float)result[i] / (int)(POOL_SIZE_X * INPUT0_SIZE_Z * INPUT0_SIZE_Y))); #endif #else // ENABLE_ROUND #if defined(DYNAMIC_KERNEL_DIVIDER) || defined(DYNAMIC_WITH_PADDING_KERNEL_DIVIDER) - pool_result[i] = (float)result[i] / max(num_elements, (uint)1); + pool_result[i] = (float)result[i] / max(num_elements, (uint)1); #else - pool_result[i] = (float)result[i] / (int)(POOL_SIZE_X * INPUT0_SIZE_Z * INPUT0_SIZE_Y); + pool_result[i] = (float)result[i] / (int)(POOL_SIZE_X * INPUT0_SIZE_Z * INPUT0_SIZE_Y); #endif #endif // ENABLE_ROUND + } +#if INPUT0_FEATURE_NUM % FEATURE_SLICE_SIZE != 0 } +#endif // INPUT0_FEATURE_NUM % FEATURE_SLICE_SIZE != 0 #else // AVG_POOLING pool_result = TO_ACTIVATION_VEC16(result); #endif // AVG_POOLING diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp index 8650127f4085ed..ac5629a8f01c3d 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp @@ -2719,7 +2719,7 @@ INSTANTIATE_TEST_SUITE_P( smoke_low_precision_2d_spatial, pooling_random_test, testing::Combine(testing::Values(1, 2), - testing::Values(3, 8, 64), + testing::Values(3, 8, 19, 64), testing::Values(std::tuple(12, 12, 1), std::tuple(24, 24, 1)), testing::Values(std::tuple(4, 4, 1), std::tuple(2, 2, 1)), testing::Values(std::tuple(2, 2, 1)), @@ -2736,7 +2736,7 @@ INSTANTIATE_TEST_SUITE_P( smoke_low_precision_3d_spatial, pooling_random_test, testing::Combine(testing::Values(1, 2), - testing::Values(3, 8, 64), + testing::Values(3, 8, 27, 64), testing::Values(std::tuple(12, 12, 12), std::tuple(24, 24, 24)), testing::Values(std::tuple(4, 4, 4), std::tuple(2, 2, 2)), testing::Values(std::tuple(2, 2, 2)),