From af36ad54597e35954f246227bd4ba412f7eb21ce Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Thu, 7 Mar 2024 14:48:27 +0000 Subject: [PATCH] [CPU] [ARM64] emitter pipeline fix: aux vectors store/restore + gpr registers update --- .../emitters/plugin/aarch64/jit_emitter.cpp | 27 +++++++++++++++++++ .../aarch64/jit_uni_eltwise_generic.hpp | 24 ++++++++++++----- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_emitter.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_emitter.cpp index 65aac61ba853e3..70c8ed2158a61b 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_emitter.cpp @@ -117,12 +117,39 @@ void jit_emitter::emitter_preamble(const std::vector& in_idxs, h->str(Xbyak_aarch64::XReg(preserved_gpr_idxs[i]), pre_ptr(h->sp, -16)); } + const size_t aux_vec_idxs_size = aux_vec_idxs.size(); + if (aux_vec_idxs_size > 1ull) { + for (size_t i = 0; i < (aux_vec_idxs_size - 1); i += 2) { + h->stp(Xbyak_aarch64::XReg(aux_vec_idxs[i]), + Xbyak_aarch64::XReg(aux_vec_idxs[i + 1]), + pre_ptr(h->sp, -get_vec_length() * 2)); + } + } + if (aux_vec_idxs_size % 2) { + h->str(Xbyak_aarch64::XReg(aux_vec_idxs[aux_vec_idxs_size - 1]), + pre_ptr(h->sp, -get_vec_length())); + } + if (!entry_map_.empty()) { load_table_addr(); } } void jit_emitter::emitter_postamble() const { + const int aux_vec_idxs_size = static_cast(aux_vec_idxs.size()); + if (aux_vec_idxs_size % 2) { + h->ldr(Xbyak_aarch64::XReg(aux_vec_idxs[aux_vec_idxs_size - 1]), + post_ptr(h->sp, get_vec_length())); + } + if (aux_vec_idxs_size > 1) { + const int begin = aux_vec_idxs_size - ((aux_vec_idxs_size % 2) ? 2 : 1); + for (int i = begin; i >= 0; i -= 2) { + h->ldp(Xbyak_aarch64::XReg(aux_vec_idxs[i - 1]), + Xbyak_aarch64::XReg(aux_vec_idxs[i]), + post_ptr(h->sp, get_vec_length() * 2)); + } + } + const int size = static_cast(preserved_gpr_idxs.size()); for (int i = (size - 1); i >= 0; --i) { h->ldr(Xbyak_aarch64::XReg(preserved_gpr_idxs[i]), post_ptr(h->sp, 16)); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.hpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.hpp index fac17b28830156..a03c4813c4c1ed 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.hpp @@ -138,12 +138,12 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, jit_generator { // X20 | src ptr // X21 | src ptr // X22 | src ptr - // X23 | src ptr + // X23 | temporary & kernel used (oneDNN: X_TMP_0) // X24 | src ptr // X25 | src ptr - // X26 | temporary + // X26 | src ptr // X27 | temporary - // X28 | kernel used (X_DEFAULT_ADDR) + // X28 | temporary & kernel used (oneDNN: X_DEFAULT_ADDR) // X29 | [not used: The Frame Pointer (FP)] // X30 | [not used: The Link Register (LR)] @@ -156,14 +156,26 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, jit_generator { if (idx > MAX_ELTWISE_INPUTS) { OPENVINO_THROW("source vector ptr register " + std::to_string(idx) + " is not supported"); } - return XReg(19 + idx); + + const uint32_t base = 19; + if ((base + idx) == 23) { + idx++; + } + + return XReg(base + idx); } inline XReg get_aux_gpr(const uint32_t idx) { - if (idx > 2) { + if (idx > 3) { OPENVINO_THROW("aux gpr register " + std::to_string(idx) + " is not supported"); } - return XReg(26 + idx); + + if (idx == 0) { + return XReg(23); + } + + const uint32_t base = 27; + return XReg(base + idx - 1); } // Vector registers mapping