diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp index fb7cac013d4ada..80b786897f3a3f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp @@ -40,8 +40,8 @@ bool EltwiseKernel_vload8::Validate(const Params& params, const optional_params& return false; } if ((ewParams.output.GetLayout() == DataLayout::b_fs_yx_fsv16 && ewParams.output.Feature().v % 16 != 0) || - (ewParams.output.GetLayout() == DataLayout::b_fs_zyx_fsv16 && ewParams.output.Feature().v % 16 != 0) || - (ewParams.output.GetLayout() == DataLayout::b_fs_yx_fsv4 && ewParams.output.Feature().v % 8 != 0) || + (ewParams.output.GetLayout() == DataLayout::b_fs_zyx_fsv16 && ewParams.output.Feature().v % 16 != 0) || + (ewParams.output.GetLayout() == DataLayout::b_fs_yx_fsv4 && ewParams.output.Feature().v % 8 != 0) || ewParams.output.GetLayout() == DataLayout::fs_b_yx_fsv32) return false; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.cpp index 867dc94ad4041e..a32f5bd0f0f159 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.cpp @@ -220,44 +220,53 @@ std::vector GetOptimalLocalWorkGroupSizes(std::vector gws, const bool CheckInputsOutputNoPitchSameDims(const base_params& params) { bool no_pitch_same_dims = true; - std::vector> block_layouts = { - {DataLayout::b_fs_yx_fsv16, 16}, - {DataLayout::b_fs_zyx_fsv16, 16}, - {DataLayout::b_fs_yx_fsv32, 32}, - {DataLayout::b_fs_zyx_fsv32, 32}, - {DataLayout::bs_fs_yx_bsv16_fsv16, 16}, - {DataLayout::bs_fs_zyx_bsv16_fsv16, 16}, - {DataLayout::bs_f_bsv8__af8, 8}, - {DataLayout::bs_f_bsv16__af8, 16}, - {DataLayout::b_fs_yx_fsv4, 4}, - {DataLayout::fs_b_yx_fsv32, 32}, - {DataLayout::b_fs_yx_32fp, 32} + std::map> block_layouts { + {DataLayout::b_fs_yx_fsv16, {1, 16}}, + {DataLayout::b_fs_zyx_fsv16, {1, 16}}, + {DataLayout::b_fs_yx_fsv32, {1, 32}}, + {DataLayout::b_fs_zyx_fsv32, {1, 32}}, + {DataLayout::bs_fs_yx_bsv16_fsv16, {16, 16}}, + {DataLayout::bs_fs_zyx_bsv16_fsv16, {16, 16}}, + {DataLayout::bs_f_bsv8__af8, {8, 8}}, + {DataLayout::bs_f_bsv16__af8, {16, 8}}, + {DataLayout::b_fs_yx_fsv4, {1, 4}}, + {DataLayout::fs_b_yx_fsv32, {1, 32}}, + {DataLayout::b_fs_yx_32fp, {1, 32}} }; if (params.inputs.size()) { no_pitch_same_dims = !params.inputs[0].PitchesDifferFromLogicalDims(); - for (const auto& layout : block_layouts) { - if (params.fused_ops.size()) { - for (auto fused_op : params.fused_ops) { - for (size_t in = 0; in < fused_op.tensors.size(); in++) { - if (fused_op.tensors[in].LogicalSize() == 1) - continue; - if (fused_op.tensors[in].GetLayout() == layout.first && fused_op.tensors[in].Feature().v % layout.second != 0) - return false; - no_pitch_same_dims = no_pitch_same_dims && (params.inputs[0] == fused_op.tensors[in]); - } - } - } + if (params.fused_ops.size()) { + for (auto fused_op : params.fused_ops) { + for (size_t in = 0; in < fused_op.tensors.size(); in++) { + if (fused_op.tensors[in].LogicalSize() == 1) + continue; - for (size_t i = 0; i < params.inputs.size(); i++) { - no_pitch_same_dims = no_pitch_same_dims && (params.inputs[0] == params.inputs[i]); + auto layout = block_layouts.find(fused_op.tensors[in].GetLayout()); + if (layout == block_layouts.end()) + continue; - if (params.inputs[i].GetLayout() == layout.first && params.inputs[i].Feature().v % layout.second != 0) - return false; + auto block_size = layout->second; + if (fused_op.tensors[in].Batch().v % block_size.first != 0 || fused_op.tensors[in].Feature().v % block_size.second != 0) + return false; + + no_pitch_same_dims = no_pitch_same_dims && (params.inputs[0] == fused_op.tensors[in]); + } } + } + + for (size_t i = 0; i < params.inputs.size(); i++) { + no_pitch_same_dims = no_pitch_same_dims && (params.inputs[0] == params.inputs[i]); + auto layout = block_layouts.find(params.inputs[i].GetLayout()); + if (layout == block_layouts.end()) + continue; - no_pitch_same_dims = no_pitch_same_dims && (params.inputs[0] == params.output); + auto block_size = layout->second; + if (params.inputs[i].Batch().v % block_size.first != 0 || params.inputs[i].Feature().v % block_size.second != 0) + return false; } + + no_pitch_same_dims = no_pitch_same_dims && (params.inputs[0] == params.output); } return no_pitch_same_dims;