diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp index 5018c9314b42c3..82018ed43c25a3 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp @@ -77,6 +77,12 @@ void cvt_copy(TA* dst, TB* src, size_t n) { i += inc; } #else + if (std::is_same::value && std::is_same::value) { + for (; i + vec_len_f32_neon <= n; i += vec_len_f32_neon) { + float32x4_t vb1 = __vld1q_f32(src + i); + __vst1q_f32(dst + i, vb1); + } + } #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) if (std::is_same::value && std::is_same::value) { for (; i + vec_len_f16_neon <= n; i += vec_len_f16_neon) { @@ -84,13 +90,6 @@ void cvt_copy(TA* dst, TB* src, size_t n) { vst1q_f16(reinterpret_cast(dst + i), vb1); } } -#else - if (std::is_same::value && std::is_same::value) { - for (; i + vec_len_f32_neon <= n; i += vec_len_f32_neon) { - float32x4_t vb1 = __vld1q_f32(src + i); - __vst1q_f32(dst + i, vb1); - } - } #endif #endif #endif