Skip to content

Commit

Permalink
[CPU]remove redundant marco
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangYiIntel committed Dec 18, 2024
1 parent b4b0f0d commit 5c838f7
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ void attn_dequant_u4_kernel(const uint8_t* src, TDST* dst, size_t n, float scale
mm512_uni_storeu_ps(dst + i, first_half);
mm512_uni_storeu_ps(dst + i + vec_len_f32_avx512, second_half);
}
#elif defined(HAVE_AVX2) || defined(HAVE_AVX512F)
#elif defined(HAVE_AVX2)
auto v256_zp = _mm256_set1_ps(zp);
auto v256_scale = _mm256_set1_ps(scale);
for (; i + vec_len_f32_avx2 * 2 <= n; i += vec_len_f32_avx2 * 2) {
Expand Down Expand Up @@ -171,7 +171,7 @@ void attn_dequant_s4_kernel(const uint8_t* src, TDST* dst, size_t n, float scale
mm512_uni_storeu_ps(dst + i + vec_len_f32_avx512, second_half);
}

#elif defined(HAVE_AVX2) || defined(HAVE_AVX512F)
#elif defined(HAVE_AVX2)
for (; i + vec_len_f32_avx2 * 2 <= n; i += vec_len_f32_avx2 * 2) {
auto v256_scale = _mm256_set1_ps(scale);
auto data = _mm_loadl_epi64(reinterpret_cast<__m128i*>(src_nc + i / 2));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ static void attn_acc_value_block(float* out,
mm512_uni_storeu_ps(out + dst_offset + i, v_out0);
mm512_uni_storeu_ps(out + dst_offset + i + vec_len_f32_avx512, v_out1);
}
# elif defined(HAVE_AVX2) || defined(HAVE_AVX512F)
# elif defined(HAVE_AVX2)
auto v256_attn_w_vec0 = _mm256_set1_ps(weight[j] * v0[0]);
auto v256_zp = _mm256_set1_ps(v0[1]);
for (; i + vec_len_f32_avx2 * 2 <= group_size; i += vec_len_f32_avx2 * 2) {
Expand Down Expand Up @@ -514,7 +514,7 @@ static void attn_acc_value_block(float* out,
mm512_uni_storeu_ps(out + dst_offset + i, v_out0);
mm512_uni_storeu_ps(out + dst_offset + i + vec_len_f32_avx512, v_out1);
}
# elif defined(HAVE_AVX2) || defined(HAVE_AVX512F)
# elif defined(HAVE_AVX2)
auto v256_attn_w_vec0 = _mm256_set1_ps(weight[j] * v0[0]);
for (; i + vec_len_f32_avx2 * 2 <= group_size; i += vec_len_f32_avx2 * 2) {
auto data = _mm_loadl_epi64(reinterpret_cast<__m128i*>(v_ptr + i / 2 + src_offset + params_offset));
Expand Down

0 comments on commit 5c838f7

Please sign in to comment.