Skip to content

Commit

Permalink
Applied comments by Chenhu
Browse files Browse the repository at this point in the history
  • Loading branch information
a-sidorova committed Aug 15, 2022
1 parent 8fa8d92 commit 02deb5c
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 17 deletions.
29 changes: 20 additions & 9 deletions src/plugins/intel_cpu/src/emitters/jit_load_store_emitters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,24 @@ jit_load_emitter::jit_load_emitter(dnnl::impl::cpu::x64::jit_generator *host, dn
size_t jit_load_emitter::get_inputs_num() const { return 1; }

size_t jit_load_emitter::aux_gprs_count() const {
// 0 for temp reg for mask load in avx512, 1 for table address
if (mayiuse(cpu::x64::avx512_core) && is_fill_)
return 2;
// 0 for temp reg for only mask load in avx512 or for table in sse and avx2
else if ((mayiuse(cpu::x64::avx512_core) && !one_of(load_num_, 16, 8, 4)) || is_fill_)
return 1;
else
return 0;
auto allocate_reg_for_avx512_mask = [&](int& count) {
if (mayiuse(cpu::x64::avx512_core)) {
// using load_num_ * dst_prc_.size() we can cover the both cases: equal src and dst prcs and not equal
if (!one_of(load_num_ * dst_prc_.size(), 64, 32, 16) || is_fill_) {
count++;
}
}
};

int count = 0;
// 1 for table address
if (is_fill_)
count++;

// 0 for temp reg for mask load in avx512 if needed
allocate_reg_for_avx512_mask(count);

return count;
}

void jit_load_emitter::emit_impl(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs,
Expand Down Expand Up @@ -558,7 +568,8 @@ size_t jit_store_emitter::aux_vecs_count() const {
if ((src_prc_.is_float() && !dst_prc_.is_float()) || (!src_prc_.is_float() && dst_prc_.is_float()))
count++;
// zero value, zeroed and passed from caller from performance standpoint(zeroed one time and not need preserve and restore status)
if ((mayiuse(cpu::x64::avx512_core) && !one_of(store_num_, 16, 8, 4)) || one_of(dst_prc_, Precision::U8, Precision::U16))
// store_num_ * src_prc_.size() we can cover the both cases: equal src and dst prcs and not equal
if ((mayiuse(cpu::x64::avx512_core) && !one_of(store_num_ * src_prc_.size(), 64, 32, 16)) || one_of(dst_prc_, Precision::U8, Precision::U16))
count++;
return count;
}
Expand Down
10 changes: 10 additions & 0 deletions src/plugins/intel_cpu/src/utils/jit_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,21 @@ InferenceEngine::Precision type2precision<float>() {
return InferenceEngine::Precision::FP32;
}

template<>
InferenceEngine::Precision type2precision<bfloat16_t>() {
return InferenceEngine::Precision::BF16;
}

template<>
InferenceEngine::Precision type2precision<uint8_t>() {
return InferenceEngine::Precision::U8;
}

template<>
InferenceEngine::Precision type2precision<int8_t>() {
return InferenceEngine::Precision::I8;
}

cpu_isa_t get_current_isa() {
if (mayiuse(cpu_isa_t::avx512_core))
return cpu_isa_t::avx512_core;
Expand Down
31 changes: 23 additions & 8 deletions src/tests/unit/cpu/jit_kernel_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,15 +318,30 @@ struct jit_variable_load_store_test_kernel {
};

TEST(JitKernel, variable_load_and_store) {
jit_variable_load_store_test_kernel<uint8_t, float> kernel;
if (mayiuse(cpu_isa_t::avx512_core)) {
kernel.test<16>();
}
if (mayiuse(cpu_isa_t::avx2)) {
kernel.test<8>();
{
jit_variable_load_store_test_kernel<uint8_t, float> kernel;
if (mayiuse(cpu_isa_t::avx512_core)) {
kernel.test<16>();
}
if (mayiuse(cpu_isa_t::avx2)) {
kernel.test<8>();
}
if (mayiuse(cpu_isa_t::sse41)) {
kernel.test<4>();
}
}
if (mayiuse(cpu_isa_t::sse41)) {
kernel.test<4>();

{
jit_variable_load_store_test_kernel<int8_t, int8_t> kernel;
if (mayiuse(cpu_isa_t::avx512_core)) {
kernel.test<16>();
}
if (mayiuse(cpu_isa_t::avx2)) {
kernel.test<8>();
}
if (mayiuse(cpu_isa_t::sse41)) {
kernel.test<4>();
}
}
}

Expand Down

0 comments on commit 02deb5c

Please sign in to comment.