From ec3d68916fb1b03554e00b35b568874ba56093b6 Mon Sep 17 00:00:00 2001 From: Xiuchuan Zhai Date: Thu, 2 Jan 2025 13:58:26 +0800 Subject: [PATCH] Revert "x64: brgemm convolution: update req_cal_comp_pad condition" This reverts commit 05d68df233bb67046697758962cd32bd6d23a956. --- src/cpu/x64/jit_brgemm_conv.cpp | 4 ---- src/cpu/x64/jit_brgemm_conv_utils.cpp | 12 +++--------- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/src/cpu/x64/jit_brgemm_conv.cpp b/src/cpu/x64/jit_brgemm_conv.cpp index 0a31fca4672..a4a725645cf 100644 --- a/src/cpu/x64/jit_brgemm_conv.cpp +++ b/src/cpu/x64/jit_brgemm_conv.cpp @@ -1526,10 +1526,6 @@ status_t brgemm_convolution_fwd_t::cal_compensation( const int max_ker_sz = adjusted_k.size(); const auto comp_buffer_ow = jcp.exec_type != exec_vpad ? jcp.ow : 1; - // TODO: revise the thread distribution here because the work_amount may be - // insufficient - // TODO: revise comp_vpad_pbuffer_ generator to avoid huge code for cases - // with big ow const auto work_amount = static_cast(jcp.ngroups) * jcp.nb_oc * max_ker_sz; const auto is_small_shape = work_amount <= jcp.nthr diff --git a/src/cpu/x64/jit_brgemm_conv_utils.cpp b/src/cpu/x64/jit_brgemm_conv_utils.cpp index b8f43e5a4d3..4185479017b 100644 --- a/src/cpu/x64/jit_brgemm_conv_utils.cpp +++ b/src/cpu/x64/jit_brgemm_conv_utils.cpp @@ -2301,18 +2301,12 @@ status_t init_conf(jit_brgemm_conv_conf_t &jcp, cpu_isa_t isa, // For padding shapes, we calculate the comp along with the computation // inside brgemm kernel when output size is small to get optimal perf - // For shapes with large ow we calculate the comp inside brgemm kernel too - // because current implementation of brgemm_comp_pad kernel unrolled by ow - // so not optimal for large ow. - // Otherwise we calculate the comp using brgemm_comp_pad kernel + // Or we calculate the comp using brgemm_coomp_pad kernel const auto output_sz = static_cast(jcp.mb) * jcp.ngroups * jcp.oc * jcp.od * jcp.oh * jcp.ow; - // TODO: revise below condition to avoid limitation for big ow - const auto shape_for_brgemm_kernel - = (output_sz <= 8192 && jcp.oc < 512) || jcp.ow > 128; - const auto is_relo = jcp.is_relo() && jcp.relo_conv_weights; jcp.req_brg_comp_pad = compensation_w_padding && jcp.exec_type != exec_trans - && IMPLICATION(!is_relo, shape_for_brgemm_kernel); + && IMPLICATION(!(jcp.is_relo() && jcp.relo_conv_weights), + output_sz <= 8192 && jcp.oc < 512); jcp.req_cal_comp_pad = compensation_w_padding && !jcp.req_brg_comp_pad && IMPLICATION(jcp.exec_type == exec_vpad, jcp.t_pad > 0 || jcp.b_pad > 0 || jcp.f_pad > 0