Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CPU]fix offset in store emitter issue #26083

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ size_t jit_load_emitter::aux_gprs_count() const {
}

void jit_load_emitter::emit_impl(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs) const {
// offset in load emitter is the offset of src gpr register, should be parsed from in_idxs.
const int offset = in_idxs.size() == 2 ? in_idxs[1] : 0;
if (host_isa_ == cpu::x64::sse41) {
emit_isa<cpu::x64::sse41>(Reg64(in_idxs[0]), static_cast<int>(out_idxs[0]), offset);
Expand Down Expand Up @@ -676,7 +677,8 @@ void jit_store_emitter::emit_data() const {
}

void jit_store_emitter::emit_impl(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs) const {
const int offset = in_idxs.size() == 2 ? in_idxs[1] : 0;
// offset in store emitter is the offset of dst gpr register, should be parsed from out_ids.
Copy link
Contributor

@a-sidorova a-sidorova Aug 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: The best place for this comment in the corresponding header file before class definitions 😃 Another developer will take a look at interface and won't find the description for offset. And only after searching in files, he will find this comment.

const int offset = out_idxs.size() == 2 ? out_idxs[1] : 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please also add a description regarding offset for store and load emitter?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree. Otherwise this is sacred knowledge which is known by a few developers

if (host_isa_ == cpu::x64::sse41) {
emit_isa<cpu::x64::sse41>(static_cast<int>(in_idxs[0]), Reg64(out_idxs[0]), offset);
} else if (host_isa_ == cpu::x64::avx2) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ jit_store_memory_emitter::jit_store_memory_emitter(jit_generator* h, cpu_isa_t i

void jit_store_memory_emitter::emit_impl(const std::vector<size_t>& in, const std::vector<size_t>& out) const {
OV_CPU_JIT_EMITTER_ASSERT(store_emitter, "Store CPU emitter isn't initialized!");
store_emitter->emit_code({in[0], compiled_byte_offset}, {out[0]}, aux_vec_idxs, get_available_aux_gprs());
store_emitter->emit_code({in[0]}, {out[0], compiled_byte_offset}, aux_vec_idxs, get_available_aux_gprs());
}

void jit_store_memory_emitter::emit_data() const {
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/src/nodes/interaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ struct jit_move_scale_kernel : public jit_uni_move_scale_kernel, public jit_gene
emitters[seed].reset(new jit_store_emitter(this, isa, src_prc, dst_prc, elt_num));
}

emitters[seed]->emit_code({static_cast<size_t>(vmm_src.getIdx()), 0}, {static_cast<size_t>(reg_dst.getIdx())},
emitters[seed]->emit_code({static_cast<size_t>(vmm_src.getIdx())}, {static_cast<size_t>(reg_dst.getIdx())},
pool_aux_vmm_idxs, pool_aux_gpr_idxs);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ void jit_rotary_kernel<isa>::store(const Xbyak::Reg64& reg_dst, const Vmm& vmm_s
if (!emitters[seed]) {
emitters[seed].reset(new jit_store_emitter(this, isa, ov::element::f32, dst_prc, elt_num));
}
emitters[seed]->emit_code({static_cast<size_t>(vmm_src.getIdx()), offset}, {static_cast<size_t>(reg_dst.getIdx())},
emitters[seed]->emit_code({static_cast<size_t>(vmm_src.getIdx())}, {static_cast<size_t>(reg_dst.getIdx()), offset},
pool_aux_vmm_idxs, pool_aux_gpr_idxs);
}

Expand Down
6 changes: 3 additions & 3 deletions src/plugins/intel_cpu/src/nodes/mha.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ struct jit_mul_add_softmax_kernel : public jit_uni_mul_add_softmax_kernel, publi
emitters[seed].reset(new jit_store_emitter(this, isa, ov::element::f32, dst_prc, elt_num));
}

emitters[seed]->emit_code({static_cast<size_t>(vmm_src.getIdx()), 0}, {static_cast<size_t>(reg_dst.getIdx())},
emitters[seed]->emit_code({static_cast<size_t>(vmm_src.getIdx())}, {static_cast<size_t>(reg_dst.getIdx())},
pool_aux_vmm_idxs, pool_aux_gpr_idxs);
}

Expand Down Expand Up @@ -480,7 +480,7 @@ struct jit_convert_reorder_kernel : public jit_uni_convert_reorder_kernel, publi
emitters[seed].reset(new jit_store_emitter(this, isa, ov::element::f32, dst_prc, elt_num));
}

emitters[seed]->emit_code({static_cast<size_t>(vmm_src.getIdx()), 0}, {static_cast<size_t>(reg_dst.getIdx())},
emitters[seed]->emit_code({static_cast<size_t>(vmm_src.getIdx())}, {static_cast<size_t>(reg_dst.getIdx())},
pool_aux_vmm_idxs, pool_aux_gpr_idxs);
}

Expand Down Expand Up @@ -639,7 +639,7 @@ struct jit_convert_transpose_kernel : public jit_uni_convert_transpose_kernel, p
emitters[seed].reset(new jit_store_emitter(this, isa, src_prc, dst_prc, elt_num));
}

emitters[seed]->emit_code({static_cast<size_t>(vmm_src.getIdx()), 0}, {static_cast<size_t>(reg_dst.getIdx())},
emitters[seed]->emit_code({static_cast<size_t>(vmm_src.getIdx())}, {static_cast<size_t>(reg_dst.getIdx())},
pool_aux_vmm_idxs, pool_aux_gpr_idxs);
}

Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_cpu/src/nodes/roi_align.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,8 @@ struct jit_uni_roi_align_kernel_f32 : public jit_uni_roi_align_kernel, public ji
std::vector<size_t> local_store_pool_vec_idxs = { static_cast<size_t>(vmm_dst.getIdx()) };
local_store_pool_vec_idxs.insert(local_store_pool_vec_idxs.begin(), store_pool_vec_idxs.begin(), store_pool_vec_idxs.end());

emitters[seed]->emit_code({static_cast<size_t>(vmm_dst.getIdx()), static_cast<size_t>(offset)},
{static_cast<size_t>(reg_dst.getIdx())},
emitters[seed]->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
{static_cast<size_t>(reg_dst.getIdx()), static_cast<size_t>(offset)},
{local_store_pool_vec_idxs}, {store_pool_gpr_idxs});
}

Expand Down
11 changes: 7 additions & 4 deletions src/plugins/intel_cpu/src/nodes/roi_pooling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,8 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi
for (int i = 0; i < c_blocks; i++) {
Vmm vmm_dst = get_acc_reg(i);

store_emitter->emit_code({static_cast<size_t>(vmm_dst.getIdx()), static_cast<size_t>(i * dst_c_off)}, {static_cast<size_t>(reg_output.getIdx())},
store_emitter->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
{static_cast<size_t>(reg_output.getIdx()), static_cast<size_t>(i * dst_c_off)},
get_local_store_pool_vec_idxs(vmm_dst), store_pool_gpr_idxs);
}
}
Expand Down Expand Up @@ -260,7 +261,8 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi

const int dst_c_off = i * jpp_.oh * jpp_.ow * jpp_.c_block * jpp_.dst_prc.size();

store_emitter->emit_code({static_cast<size_t>(vmm_src11.getIdx()), static_cast<size_t>(dst_c_off)}, {static_cast<size_t>(reg_output.getIdx())},
store_emitter->emit_code({static_cast<size_t>(vmm_src11.getIdx())},
{static_cast<size_t>(reg_output.getIdx()), static_cast<size_t>(dst_c_off)},
get_local_store_pool_vec_idxs(vmm_src11), store_pool_gpr_idxs);
}
}
Expand All @@ -270,8 +272,9 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi

const int dst_c_off = jpp_.oh * jpp_.ow * jpp_.c_block * jpp_.dst_prc.size();
for (int i = 0; i < c_blocks; i++) {
store_empty_roi_emitter->emit_code({static_cast<size_t>(vmm_zero.getIdx()), static_cast<size_t>(i * dst_c_off)},
{static_cast<size_t>(reg_output.getIdx())}, store_pool_vec_idxs, store_pool_gpr_idxs);
store_empty_roi_emitter->emit_code({static_cast<size_t>(vmm_zero.getIdx())},
{static_cast<size_t>(reg_output.getIdx()), static_cast<size_t>(i * dst_c_off)},
store_pool_vec_idxs, store_pool_gpr_idxs);
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_cpu/src/nodes/topk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,8 @@ struct jit_uni_topk_kernel_f32 : public jit_uni_topk_kernel, public jit_generato
std::vector<size_t> local_store_pool_vec_idxs = { static_cast<size_t>(vmm_dst.getIdx()) };
local_store_pool_vec_idxs.insert(local_store_pool_vec_idxs.begin(), store_pool_vec_idxs.begin(), store_pool_vec_idxs.end());

emitters[seed]->emit_code({static_cast<size_t>(vmm_dst.getIdx()), static_cast<size_t>(offset)},
{static_cast<size_t>(reg_dst.getIdx())},
emitters[seed]->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
{static_cast<size_t>(reg_dst.getIdx()), static_cast<size_t>(offset)},
{local_store_pool_vec_idxs}, {store_pool_gpr_idxs});
}

Expand Down
Loading