From 04665a62277e2a7028ec92dcf1c58238b7768a57 Mon Sep 17 00:00:00 2001 From: chenhuwa Date: Mon, 1 Feb 2021 16:16:11 +0800 Subject: [PATCH] an overloaded emit() and size_t index --- .../mkldnn_plugin/nodes/common/emitter.cpp | 43 ++++-- .../src/mkldnn_plugin/nodes/common/emitter.h | 32 ++-- .../nodes/common/jit_load_store_emitters.cpp | 22 +-- .../nodes/common/jit_load_store_emitters.h | 8 +- .../mkldnn_plugin/nodes/common/softmax.cpp | 2 +- .../nodes/jit_eltwise_emitters.cpp | 138 +++++++++--------- .../nodes/jit_eltwise_emitters.hpp | 138 +++++++++--------- .../nodes/jit_mkldnn_emitters.cpp | 5 +- .../nodes/jit_mkldnn_emitters.hpp | 5 +- .../nodes/mkldnn_eltwise_node.cpp | 14 +- .../nodes/mkldnn_interpolate_node.cpp | 2 +- .../mkldnn_plugin/nodes/mkldnn_mvn_node.cpp | 64 ++++---- .../nodes/mkldnn_normalize_node.cpp | 2 +- .../nodes/mkldnn_reduce_node.cpp | 4 +- .../src/mkldnn_plugin/nodes/region_yolo.cpp | 2 +- .../src/mkldnn_plugin/utils/bfloat16.hpp | 4 +- 16 files changed, 253 insertions(+), 232 deletions(-) diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/emitter.cpp b/inference-engine/src/mkldnn_plugin/nodes/common/emitter.cpp index 9c14dc5ca4190b..4fda8243000939 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/emitter.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/common/emitter.cpp @@ -23,7 +23,7 @@ size_t jit_emitter::get_vec_length() const { one_of(host_isa_, cpu::x64::avx2) ? 32 : 16; } -void jit_emitter::push_vec(const Xbyak::Address &addr, int vec_idx) const { +void jit_emitter::push_vec(const Xbyak::Address &addr, size_t vec_idx) const { if (host_isa_ == cpu::x64::sse41) { h->uni_vmovups(addr, Xmm(vec_idx)); } else if (host_isa_ == cpu::x64::avx2) { @@ -33,7 +33,7 @@ void jit_emitter::push_vec(const Xbyak::Address &addr, int vec_idx) const { } } -void jit_emitter::pop_vec(int vec_idx, const Xbyak::Address &addr) const { +void jit_emitter::pop_vec(size_t vec_idx, const Xbyak::Address &addr) const { if (host_isa_ == cpu::x64::sse41) { h->uni_vmovups(Xmm(vec_idx), addr); } else if (host_isa_ == cpu::x64::avx2) { @@ -56,8 +56,8 @@ std::set jit_emitter::get_supported_precisions() { return {InferenceEngine::Precision::FP32}; } -void jit_emitter::emitter_preamble(const std::vector &in_idxs, const std::vector &out_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { +void jit_emitter::emitter_preamble(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { using namespace Xbyak::util; bool is_vec_input = (in_out_type_ == emitter_in_out_map::vec_to_vec) || (in_out_type_ == emitter_in_out_map::vec_to_gpr); bool is_vec_output = (in_out_type_ == emitter_in_out_map::vec_to_vec) || (in_out_type_ == emitter_in_out_map::gpr_to_vec); @@ -67,7 +67,7 @@ void jit_emitter::emitter_preamble(const std::vector &in_idxs, const std::v // For sse41 mask register has to be Xmm(0) if (host_isa_ == cpu::x64::sse41 && aux_vecs_count() > 0) { - int idx = 0; + size_t idx = 0; if (is_vec_input) assert(std::find(in_idxs.begin(), in_idxs.end(), idx) == in_idxs.end()); if (is_vec_output) @@ -88,7 +88,7 @@ void jit_emitter::emitter_preamble(const std::vector &in_idxs, const std::v } } - for (int idx = 0; idx < get_max_vecs_count(); idx++) { + for (size_t idx = 0; idx < get_max_vecs_count(); idx++) { if (aux_vec_idxs.size() >= aux_vecs_count()) break; if (is_vec_input) { @@ -109,8 +109,8 @@ void jit_emitter::emitter_preamble(const std::vector &in_idxs, const std::v for (auto idx : pool_gpr_idxs) aux_gpr_idxs.push_back(idx); - for (int gpr_idx = 0; gpr_idx <= Operand::R15; ++gpr_idx) { - int _idx = Operand::R15 - gpr_idx; // we allocate from the end + for (size_t gpr_idx = 0; gpr_idx <= Operand::R15; ++gpr_idx) { + size_t _idx = Operand::R15 - gpr_idx; // we allocate from the end if (aux_gpr_idxs.size() >= aux_gprs_count()) break; if (_idx == Operand::RSP) continue; @@ -134,13 +134,13 @@ void jit_emitter::emitter_preamble(const std::vector &in_idxs, const std::v aux_gpr_idxs.erase(aux_gpr_idxs.end() - 1); } - for (int i = 0; i < preserved_gpr_idxs.size(); ++i) + for (size_t i = 0; i < preserved_gpr_idxs.size(); ++i) h->push(Reg64(preserved_gpr_idxs[i])); if (preserved_vec_idxs.size()) h->sub(h->rsp, preserved_vec_idxs.size() * get_vec_length()); - for (int i = 0; i < preserved_vec_idxs.size(); ++i) { + for (size_t i = 0; i < preserved_vec_idxs.size(); ++i) { push_vec(h->ptr[h->rsp + i * get_vec_length()], preserved_vec_idxs[i]); } @@ -151,7 +151,7 @@ void jit_emitter::emitter_preamble(const std::vector &in_idxs, const std::v void jit_emitter::emitter_postamble() { using namespace Xbyak::util; - for (int i = 0; i < preserved_vec_idxs.size(); ++i) + for (size_t i = 0; i < preserved_vec_idxs.size(); ++i) pop_vec(preserved_vec_idxs[i], h->ptr[h->rsp + i * get_vec_length()]); if (preserved_vec_idxs.size()) @@ -198,12 +198,25 @@ void jit_emitter::prepare_table() { } } -void jit_emitter::emit(const std::vector &in_idxs, const std::vector &out_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, - const std::shared_ptr &emit_context) { +void jit_emitter::emit(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { emitter_preamble(in_idxs, out_idxs, pool_vec_idxs, pool_gpr_idxs); - emit_impl(in_idxs, out_idxs, pool_vec_idxs, pool_gpr_idxs, emit_context.get()); + emit_impl(in_idxs, out_idxs, pool_vec_idxs, pool_gpr_idxs); + + emitter_postamble(); +} + +void jit_emitter::emit(const std::vector &in_idxs, const std::vector &out_idxs, + const std::shared_ptr &emit_context, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + emitter_preamble(in_idxs, out_idxs, pool_vec_idxs, pool_gpr_idxs); + + if (emit_context) { + emit_impl(in_idxs, out_idxs, pool_vec_idxs, pool_gpr_idxs, emit_context.get()); + } else { + emit_impl(in_idxs, out_idxs, pool_vec_idxs, pool_gpr_idxs); + } emitter_postamble(); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/emitter.h b/inference-engine/src/mkldnn_plugin/nodes/common/emitter.h index afc8188558e0f4..75a302ab4be171 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/emitter.h +++ b/inference-engine/src/mkldnn_plugin/nodes/common/emitter.h @@ -31,9 +31,12 @@ class jit_emitter { k_mask = Xbyak::Opmask(1); // FIXME: in general case we need preserve k_mask state as well } - virtual void emit(const std::vector &in_idxs, const std::vector &out_idxs, - const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}, - const std::shared_ptr &emit_context = nullptr); + virtual void emit(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}); + + virtual void emit(const std::vector &in_idxs, const std::vector &out_idxs, + const std::shared_ptr &emit_context, + const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}); virtual void emit_table(); virtual size_t get_inputs_num() = 0; virtual size_t aux_vecs_count() const; @@ -84,18 +87,21 @@ class jit_emitter { _cmp_gt_os = mkldnn::impl::cpu::x64::jit_generator::_cmp_nle_us, }; - virtual void emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + virtual void emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) {} + + virtual void emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) {} - virtual void emitter_preamble(const std::vector &in_idxs, const std::vector &out_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs); + virtual void emitter_preamble(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs); virtual void emitter_postamble(); emitter_in_out_map in_out_type_; - std::vector aux_vec_idxs; - std::vector aux_gpr_idxs; + std::vector aux_vec_idxs; + std::vector aux_gpr_idxs; static constexpr int k_mask_size = 8; @@ -123,11 +129,11 @@ class jit_emitter { } private: - std::vector preserved_vec_idxs; - std::vector preserved_gpr_idxs; + std::vector preserved_vec_idxs; + std::vector preserved_gpr_idxs; - void push_vec(const Xbyak::Address &addr, int vec_idx) const; - void pop_vec(int vec_idx, const Xbyak::Address &addr) const; + void push_vec(const Xbyak::Address &addr, size_t vec_idx) const; + void pop_vec(size_t vec_idx, const Xbyak::Address &addr) const; size_t table_off(std::string& key, size_t key_off_val_shift = 0) const { // assumption: all table entries sharing the same key also diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/jit_load_store_emitters.cpp b/inference-engine/src/mkldnn_plugin/nodes/common/jit_load_store_emitters.cpp index 358c8c68cd3e5f..75850b92b79eea 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/jit_load_store_emitters.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/common/jit_load_store_emitters.cpp @@ -33,8 +33,8 @@ size_t jit_load_emitter::aux_gprs_count() const { return 2; } -void jit_load_emitter::emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_load_emitter::emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { const auto* load_emitter_context = dynamic_cast(emit_context); if (load_emitter_context == nullptr) { @@ -42,13 +42,13 @@ void jit_load_emitter::emit_impl(const std::vector &in_idxs, const std::vec } if (host_isa_ == cpu::x64::sse41) { - emit_isa(Reg64(in_idxs[0]), load_emitter_context->offset_byte_, load_emitter_context->src_prc_, out_idxs[0], + emit_isa(Reg64(in_idxs[0]), load_emitter_context->offset_byte_, load_emitter_context->src_prc_, static_cast(out_idxs[0]), load_emitter_context->dst_prc_, load_emitter_context->load_num_, load_emitter_context->is_fill_, load_emitter_context->fill_value_); } else if (host_isa_ == cpu::x64::avx2) { - emit_isa(Reg64(in_idxs[0]), load_emitter_context->offset_byte_, load_emitter_context->src_prc_, out_idxs[0], + emit_isa(Reg64(in_idxs[0]), load_emitter_context->offset_byte_, load_emitter_context->src_prc_, static_cast(out_idxs[0]), load_emitter_context->dst_prc_, load_emitter_context->load_num_, load_emitter_context->is_fill_, load_emitter_context->fill_value_); } else if (host_isa_ == cpu::x64::avx512_common) { - emit_isa(Reg64(in_idxs[0]), load_emitter_context->offset_byte_, load_emitter_context->src_prc_, out_idxs[0], + emit_isa(Reg64(in_idxs[0]), load_emitter_context->offset_byte_, load_emitter_context->src_prc_, static_cast(out_idxs[0]), load_emitter_context->dst_prc_, load_emitter_context->load_num_, load_emitter_context->is_fill_, load_emitter_context->fill_value_); } else { THROW_IE_EXCEPTION << "Load emitter in " << n->getName() << " is performed on unsupported isa(at least x64::sse41)."; @@ -510,21 +510,21 @@ size_t jit_store_emitter::aux_vecs_count() const { size_t jit_store_emitter::get_inputs_num() { return 1; } -void jit_store_emitter::emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_store_emitter::emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { const auto* store_emitter_context = dynamic_cast(emit_context); if (store_emitter_context == nullptr) { THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " does not get store emmiter context."; } if (host_isa_ == cpu::x64::sse41) { - emit_isa(in_idxs[0], store_emitter_context->src_prc_, Reg64(out_idxs[0]), + emit_isa(static_cast(in_idxs[0]), store_emitter_context->src_prc_, Reg64(out_idxs[0]), store_emitter_context->offset_byte_, store_emitter_context->dst_prc_, store_emitter_context->store_num_); } else if (host_isa_ == cpu::x64::avx2) { - emit_isa(in_idxs[0], store_emitter_context->src_prc_, Reg64(out_idxs[0]), + emit_isa(static_cast(in_idxs[0]), store_emitter_context->src_prc_, Reg64(out_idxs[0]), store_emitter_context->offset_byte_, store_emitter_context->dst_prc_, store_emitter_context->store_num_); } else if (host_isa_ == cpu::x64::avx512_common) { - emit_isa(in_idxs[0], store_emitter_context->src_prc_, Reg64(out_idxs[0]), + emit_isa(static_cast(in_idxs[0]), store_emitter_context->src_prc_, Reg64(out_idxs[0]), store_emitter_context->offset_byte_, store_emitter_context->dst_prc_, store_emitter_context->store_num_); } else { THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " is performed on unsupported isa(at least x64::sse41)."; @@ -829,7 +829,7 @@ template if (mayiuse(cpu::x64::avx512_core_bf16)) { h->vcvtneps2bf16(ymm, zmm); } else { - emu_vcvtneps2bf16->emit({vmm.getIdx()}, {ymm.getIdx()}); + emu_vcvtneps2bf16->emit({static_cast(vmm.getIdx())}, {static_cast(ymm.getIdx())}); } if (store_num == 16) { h->vmovdqu16(ptr[reg + offset], ymm); diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/jit_load_store_emitters.h b/inference-engine/src/mkldnn_plugin/nodes/common/jit_load_store_emitters.h index 50cae59d75b220..332d54903e5b99 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/jit_load_store_emitters.h +++ b/inference-engine/src/mkldnn_plugin/nodes/common/jit_load_store_emitters.h @@ -64,8 +64,8 @@ class jit_load_emitter : public jit_emitter { * \|/ * dst_prc */ - void emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; size_t get_inputs_num() override; @@ -117,8 +117,8 @@ class jit_store_emitter : public jit_emitter { * dst_prc * note: FP32/I32-->BF16(x*) is supported only on at least avx512-core plateform */ - void emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; size_t get_inputs_num() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/softmax.cpp b/inference-engine/src/mkldnn_plugin/nodes/common/softmax.cpp index a5ba5b4023c250..3358215c5d262a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/softmax.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/common/softmax.cpp @@ -218,7 +218,7 @@ struct jit_uni_softmax_kernel_f32 : public jit_uni_softmax_kernel, public jit_ge if (mayiuse(avx512_core_bf16)) vcvtneps2bf16(ymm_dst, vmm_dst); else - emu_vcvtneps2bf16->emit({vmm_dst.getIdx()}, {ymm_dst.getIdx()}); + emu_vcvtneps2bf16->emit({static_cast(vmm_dst.getIdx())}, {static_cast(ymm_dst.getIdx())}); vmovdqu16(op, ymm_dst); break; default: diff --git a/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.cpp b/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.cpp index da16d87c7803e2..447c812f5ac191 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.cpp @@ -24,8 +24,8 @@ jit_add_emitter::jit_add_emitter(jit_generator *host, cpu_isa_t host_isa, const size_t jit_add_emitter::get_inputs_num() { return 2; } -void jit_add_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_add_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -39,7 +39,7 @@ void jit_add_emitter::emit_impl(const std::vector &in_vec_idxs, const std:: } template -void jit_add_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_add_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -59,8 +59,8 @@ jit_mul_add_emitter::jit_mul_add_emitter(jit_generator *host, cpu_isa_t host_isa size_t jit_mul_add_emitter::get_inputs_num() { return 3; } -void jit_mul_add_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_mul_add_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -74,7 +74,7 @@ void jit_mul_add_emitter::emit_impl(const std::vector &in_vec_idxs, const s } template -void jit_mul_add_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_mul_add_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -119,8 +119,8 @@ jit_subtract_emitter::jit_subtract_emitter(jit_generator *host, cpu_isa_t host_i size_t jit_subtract_emitter::get_inputs_num() { return 2; } -void jit_subtract_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_subtract_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -134,7 +134,7 @@ void jit_subtract_emitter::emit_impl(const std::vector &in_vec_idxs, const } template -void jit_subtract_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_subtract_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -155,8 +155,8 @@ jit_multiply_emitter::jit_multiply_emitter(jit_generator *host, cpu_isa_t host_i size_t jit_multiply_emitter::get_inputs_num() { return 2; } -void jit_multiply_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_multiply_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -170,7 +170,7 @@ void jit_multiply_emitter::emit_impl(const std::vector &in_vec_idxs, const } template -void jit_multiply_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_multiply_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -191,8 +191,8 @@ jit_divide_emitter::jit_divide_emitter(jit_generator *host, cpu_isa_t host_isa, size_t jit_divide_emitter::get_inputs_num() { return 2; } -void jit_divide_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_divide_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -206,7 +206,7 @@ void jit_divide_emitter::emit_impl(const std::vector &in_vec_idxs, const st } template -void jit_divide_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_divide_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -256,8 +256,8 @@ jit_floor_mod_emitter::jit_floor_mod_emitter(jit_generator *host, cpu_isa_t host size_t jit_floor_mod_emitter::get_inputs_num() { return 2; } -void jit_floor_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_floor_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -271,7 +271,7 @@ void jit_floor_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const } template -void jit_floor_mod_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_floor_mod_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -306,8 +306,8 @@ jit_mod_emitter::jit_mod_emitter(jit_generator *host, cpu_isa_t host_isa, const size_t jit_mod_emitter::get_inputs_num() { return 2; } -void jit_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -321,7 +321,7 @@ void jit_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const std:: } template -void jit_mod_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_mod_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -356,8 +356,8 @@ jit_maximum_emitter::jit_maximum_emitter(jit_generator *host, cpu_isa_t host_isa size_t jit_maximum_emitter::get_inputs_num() { return 2; } -void jit_maximum_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_maximum_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -371,7 +371,7 @@ void jit_maximum_emitter::emit_impl(const std::vector &in_vec_idxs, const s } template -void jit_maximum_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_maximum_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -404,8 +404,8 @@ jit_minimum_emitter::jit_minimum_emitter(jit_generator *host, cpu_isa_t host_isa size_t jit_minimum_emitter::get_inputs_num() { return 2; } -void jit_minimum_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_minimum_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -419,7 +419,7 @@ void jit_minimum_emitter::emit_impl(const std::vector &in_vec_idxs, const s } template -void jit_minimum_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_minimum_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -452,8 +452,8 @@ jit_squared_difference_emitter::jit_squared_difference_emitter(jit_generator *ho size_t jit_squared_difference_emitter::get_inputs_num() { return 2; } -void jit_squared_difference_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_squared_difference_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -467,7 +467,7 @@ void jit_squared_difference_emitter::emit_impl(const std::vector &in_vec_id } template -void jit_squared_difference_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_squared_difference_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -491,8 +491,8 @@ jit_power_dynamic_emitter::jit_power_dynamic_emitter(jit_generator *host, cpu_is size_t jit_power_dynamic_emitter::get_inputs_num() { return 2; } -void jit_power_dynamic_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_power_dynamic_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -506,7 +506,7 @@ void jit_power_dynamic_emitter::emit_impl(const std::vector &in_vec_idxs, c } template -void jit_power_dynamic_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_power_dynamic_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -600,8 +600,8 @@ jit_equal_emitter::jit_equal_emitter(jit_generator *host, cpu_isa_t host_isa, co size_t jit_equal_emitter::get_inputs_num() { return 2; } -void jit_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -615,7 +615,7 @@ void jit_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std } template -void jit_equal_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_equal_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -657,8 +657,8 @@ jit_not_equal_emitter::jit_not_equal_emitter(jit_generator *host, cpu_isa_t host size_t jit_not_equal_emitter::get_inputs_num() { return 2; } -void jit_not_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_not_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -672,7 +672,7 @@ void jit_not_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const } template -void jit_not_equal_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_not_equal_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -714,8 +714,8 @@ jit_greater_emitter::jit_greater_emitter(jit_generator *host, cpu_isa_t host_isa size_t jit_greater_emitter::get_inputs_num() { return 2; } -void jit_greater_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_greater_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -729,7 +729,7 @@ void jit_greater_emitter::emit_impl(const std::vector &in_vec_idxs, const s } template -void jit_greater_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_greater_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -771,8 +771,8 @@ jit_greater_equal_emitter::jit_greater_equal_emitter(jit_generator *host, cpu_is size_t jit_greater_equal_emitter::get_inputs_num() { return 2; } -void jit_greater_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_greater_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -786,7 +786,7 @@ void jit_greater_equal_emitter::emit_impl(const std::vector &in_vec_idxs, c } template -void jit_greater_equal_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_greater_equal_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -828,8 +828,8 @@ jit_less_emitter::jit_less_emitter(jit_generator *host, cpu_isa_t host_isa, cons size_t jit_less_emitter::get_inputs_num() { return 2; } -void jit_less_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_less_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -843,7 +843,7 @@ void jit_less_emitter::emit_impl(const std::vector &in_vec_idxs, const std: } template -void jit_less_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_less_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -885,8 +885,8 @@ jit_less_equal_emitter::jit_less_equal_emitter(jit_generator *host, cpu_isa_t ho size_t jit_less_equal_emitter::get_inputs_num() { return 2; } -void jit_less_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_less_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -900,7 +900,7 @@ void jit_less_equal_emitter::emit_impl(const std::vector &in_vec_idxs, cons } template -void jit_less_equal_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_less_equal_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -943,8 +943,8 @@ jit_logical_and_emitter::jit_logical_and_emitter(jit_generator *host, cpu_isa_t size_t jit_logical_and_emitter::get_inputs_num() { return 2; } -void jit_logical_and_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_logical_and_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -958,7 +958,7 @@ void jit_logical_and_emitter::emit_impl(const std::vector &in_vec_idxs, con } template -void jit_logical_and_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_logical_and_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -1021,8 +1021,8 @@ jit_logical_or_emitter::jit_logical_or_emitter(jit_generator *host, cpu_isa_t ho size_t jit_logical_or_emitter::get_inputs_num() { return 2; } -void jit_logical_or_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_logical_or_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -1036,7 +1036,7 @@ void jit_logical_or_emitter::emit_impl(const std::vector &in_vec_idxs, cons } template -void jit_logical_or_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_logical_or_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -1098,8 +1098,8 @@ jit_logical_xor_emitter::jit_logical_xor_emitter(jit_generator *host, cpu_isa_t size_t jit_logical_xor_emitter::get_inputs_num() { return 2; } -void jit_logical_xor_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_logical_xor_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -1113,7 +1113,7 @@ void jit_logical_xor_emitter::emit_impl(const std::vector &in_vec_idxs, con } template -void jit_logical_xor_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_logical_xor_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); @@ -1175,8 +1175,8 @@ jit_logical_not_emitter::jit_logical_not_emitter(jit_generator *host, cpu_isa_t size_t jit_logical_not_emitter::get_inputs_num() { return 1; } -void jit_logical_not_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_logical_not_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -1190,7 +1190,7 @@ void jit_logical_not_emitter::emit_impl(const std::vector &in_vec_idxs, con } template -void jit_logical_not_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_logical_not_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_dst = Vmm(out_vec_idxs[0]); @@ -1231,8 +1231,8 @@ jit_power_static_emitter::jit_power_static_emitter(jit_generator *host, cpu_isa_ size_t jit_power_static_emitter::get_inputs_num() { return 1; } -void jit_power_static_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_power_static_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -1246,7 +1246,7 @@ void jit_power_static_emitter::emit_impl(const std::vector &in_vec_idxs, co } template -void jit_power_static_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_power_static_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_dst = Vmm(out_vec_idxs[0]); @@ -1420,8 +1420,8 @@ jit_prelu_emitter::jit_prelu_emitter(jit_generator *host, cpu_isa_t host_isa, co size_t jit_prelu_emitter::get_inputs_num() { return 2; } -void jit_prelu_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, +void jit_prelu_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -1435,7 +1435,7 @@ void jit_prelu_emitter::emit_impl(const std::vector &in_vec_idxs, const std } template -void jit_prelu_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_prelu_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src0 = Vmm(in_vec_idxs[0]); Vmm vmm_src1 = Vmm(in_vec_idxs[1]); diff --git a/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.hpp b/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.hpp index 99c5bdf147f5fd..fb8d2e16fb1480 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.hpp +++ b/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.hpp @@ -18,12 +18,12 @@ class jit_add_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; class jit_mul_add_emitter : public jit_emitter { @@ -34,12 +34,12 @@ class jit_mul_add_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; size_t aux_vecs_count() const override; }; @@ -53,12 +53,12 @@ class jit_subtract_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; @@ -70,12 +70,12 @@ class jit_multiply_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; @@ -88,12 +88,12 @@ class jit_divide_emitter : public jit_emitter { static std::set get_supported_precisions(); private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; size_t aux_vecs_count() const override; }; @@ -106,12 +106,12 @@ class jit_floor_mod_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; size_t aux_vecs_count() const override; }; @@ -124,12 +124,12 @@ class jit_mod_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; size_t aux_vecs_count() const override; }; @@ -143,12 +143,12 @@ class jit_maximum_emitter : public jit_emitter { static std::set get_supported_precisions(); private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; @@ -161,12 +161,12 @@ class jit_minimum_emitter : public jit_emitter { static std::set get_supported_precisions(); private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; @@ -178,12 +178,12 @@ class jit_squared_difference_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; @@ -195,12 +195,12 @@ class jit_power_dynamic_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; @@ -212,12 +212,12 @@ class jit_equal_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; void register_table_entries() override; size_t aux_vecs_count() const override; @@ -232,12 +232,12 @@ class jit_not_equal_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; void register_table_entries() override; size_t aux_vecs_count() const override; @@ -252,12 +252,12 @@ class jit_greater_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; void register_table_entries() override; size_t aux_vecs_count() const override; @@ -272,12 +272,12 @@ class jit_greater_equal_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; void register_table_entries() override; size_t aux_vecs_count() const override; @@ -292,12 +292,12 @@ class jit_less_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; void register_table_entries() override; size_t aux_vecs_count() const override; @@ -312,12 +312,12 @@ class jit_less_equal_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; void register_table_entries() override; size_t aux_vecs_count() const override; @@ -332,12 +332,12 @@ class jit_logical_and_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; void register_table_entries() override; size_t aux_vecs_count() const override; @@ -352,12 +352,12 @@ class jit_logical_or_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; void register_table_entries() override; size_t aux_vecs_count() const override; @@ -372,12 +372,12 @@ class jit_logical_xor_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; void register_table_entries() override; size_t aux_vecs_count() const override; @@ -391,12 +391,12 @@ class jit_logical_not_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; void register_table_entries() override; size_t aux_vecs_count() const override; @@ -410,12 +410,12 @@ class jit_power_static_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; void register_table_entries() override; size_t aux_vecs_count() const override; @@ -429,12 +429,12 @@ class jit_prelu_emitter : public jit_emitter { size_t get_inputs_num() override; private: - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) override; template - void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; size_t aux_vecs_count() const override; }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/jit_mkldnn_emitters.cpp b/inference-engine/src/mkldnn_plugin/nodes/jit_mkldnn_emitters.cpp index 8b4c2bc869d027..84132993e026e6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/jit_mkldnn_emitters.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/jit_mkldnn_emitters.cpp @@ -36,9 +36,8 @@ jit_mkldnn_emitter::jit_mkldnn_emitter(jit_generator *host, cpu_isa_t host_isa, size_t jit_mkldnn_emitter::get_inputs_num() { return 1; } -void jit_mkldnn_emitter::emit(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, - const std::shared_ptr &emit_context) { +void jit_mkldnn_emitter::emit(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { if (host_isa_ == cpu::x64::sse41) { if (out_vec_idxs[0] != in_vec_idxs[0]) h->uni_vmovups(Xmm(out_vec_idxs[0]), Xmm(in_vec_idxs[0])); diff --git a/inference-engine/src/mkldnn_plugin/nodes/jit_mkldnn_emitters.hpp b/inference-engine/src/mkldnn_plugin/nodes/jit_mkldnn_emitters.hpp index 3b72e74e3f4065..4fb48ee962f6aa 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/jit_mkldnn_emitters.hpp +++ b/inference-engine/src/mkldnn_plugin/nodes/jit_mkldnn_emitters.hpp @@ -19,9 +19,8 @@ class jit_mkldnn_emitter : public jit_emitter { size_t get_inputs_num() override; - void emit(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}, - const std::shared_ptr &emit_context = nullptr) override; + void emit(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}) override; void emit_table() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp index 6c72752a798da9..438a0161dc3440 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp @@ -475,14 +475,14 @@ struct jit_uni_eltwise_generic : public MKLDNNPlugin::jit_uni_eltwise_kernel, pu } inline void compute_eltwise_op() { - std::vector in_idxs; - std::vector aux_idxs; + std::vector in_idxs; + std::vector aux_idxs; for (int i = 0; i < eltwise_emitter->get_inputs_num(); i++) in_idxs.push_back(get_vmm_reg(i).getIdx()); for (int i = 0; i < eltwise_emitter->aux_vecs_count(); i++) aux_idxs.push_back(get_aux_vmm(i).getIdx()); - std::vector out_idxs; + std::vector out_idxs; out_idxs.push_back(vmm_dst.getIdx()); eltwise_emitter->emit(in_idxs, out_idxs, aux_idxs); @@ -494,15 +494,15 @@ struct jit_uni_eltwise_generic : public MKLDNNPlugin::jit_uni_eltwise_kernel, pu int quantization_post_op_idx = 0; for (int i = 0; i < eltwiseNode.getFusedWith().size(); i++) { if (eltwiseNode.getFusedWith()[i].get()->getType() == Eltwise) { - std::vector in_idxs; - std::vector aux_idxs; + std::vector in_idxs; + std::vector aux_idxs; in_idxs.push_back(vmm_dst.getIdx()); for (int j = 1; j < post_op_emitters[eltwise_post_op_idx]->get_inputs_num(); j++) in_idxs.push_back(get_vmm_reg(input_idx++).getIdx()); for (int j = 0; j < post_op_emitters[eltwise_post_op_idx]->aux_vecs_count(); j++) aux_idxs.push_back(get_aux_vmm(j).getIdx()); - std::vector out_idxs; + std::vector out_idxs; out_idxs.push_back(vmm_dst.getIdx()); post_op_emitters[eltwise_post_op_idx]->emit(in_idxs, out_idxs, aux_idxs); @@ -647,7 +647,7 @@ struct jit_uni_eltwise_generic : public MKLDNNPlugin::jit_uni_eltwise_kernel, pu if (mayiuse(avx512_core_bf16)) vcvtneps2bf16(ymm_dst, vmm_dst); else - emu_vcvtneps2bf16->emit({vmm_dst.getIdx()}, {ymm_dst.getIdx()}); + emu_vcvtneps2bf16->emit({static_cast(vmm_dst.getIdx())}, {static_cast(ymm_dst.getIdx())}); vmovdqu16(op, ymm_dst); break; case Precision::I16: diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp index 4342c66a6cb828..6b6f19f29d2960 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp @@ -1297,7 +1297,7 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi if (mayiuse(avx512_core_bf16)) vcvtneps2bf16(ymm_dst, vmm_dst); else - emu_vcvtneps2bf16->emit({vmm_dst.getIdx()}, {ymm_dst.getIdx()}); + emu_vcvtneps2bf16->emit({static_cast(vmm_dst.getIdx())}, {static_cast(ymm_dst.getIdx())}); vmovdqu16(op, ymm_dst); } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp index d41fdbf69bb58d..80819f15aff91b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp @@ -84,7 +84,7 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k tail_num = jcp_.planar_layout ? (jcp_.D * jcp_.H * jcp_.W) - ((jcp_.D * jcp_.H * jcp_.W) / step) * step : jcp_.C - (jcp_.C / step) * step; - load_pool_gpr_idxs = {reg_load_table.getIdx(), reg_load_store_mask.getIdx()}; + load_pool_gpr_idxs = {static_cast(reg_load_store_mask.getIdx()), static_cast(reg_load_table.getIdx())}; if (jcp_.planar_layout) { worker_unroll(); @@ -235,13 +235,13 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k std::unique_ptr load_emitter = nullptr; - std::vector load_pool_gpr_idxs; + std::vector load_pool_gpr_idxs; inline void worker_full_size() { Precision dst_prc = isFloatCompatible(jcp_.src_prc) ? Precision::FP32 : Precision::I32; - load_emitter->emit({reg_src.getIdx()}, {vmm_val.getIdx()}, - {}, {load_pool_gpr_idxs}, - std::make_shared(jcp_.src_prc, dst_prc, step)); + load_emitter->emit({static_cast(reg_src.getIdx())}, {static_cast(vmm_val.getIdx())}, + std::make_shared(jcp_.src_prc, dst_prc, step), + {}, {load_pool_gpr_idxs}); if (jcp_.normalize_variance) { // all with float @@ -261,9 +261,9 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k inline void worker_tail_blk() { Precision dst_prc = isFloatCompatible(jcp_.src_prc) ? Precision::FP32 : Precision::I32; - load_emitter->emit({reg_src.getIdx()}, {vmm_val.getIdx()}, - {}, {load_pool_gpr_idxs}, - std::make_shared(jcp_.src_prc, dst_prc, tail_num)); + load_emitter->emit({static_cast(reg_src.getIdx())}, {static_cast(vmm_val.getIdx())}, + std::make_shared(jcp_.src_prc, dst_prc, tail_num), + {}, {load_pool_gpr_idxs}); if (jcp_.normalize_variance) { // all with float @@ -305,9 +305,9 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k inline void worker_tail_planar() { Precision dst_prc = isFloatCompatible(jcp_.src_prc) ? Precision::FP32 : Precision::I32; - load_emitter->emit({reg_src.getIdx()}, {vmm_val.getIdx()}, - {}, {load_pool_gpr_idxs}, - std::make_shared(jcp_.src_prc, dst_prc, tail_num, true, "zero")); + load_emitter->emit({static_cast(reg_src.getIdx())}, {static_cast(vmm_val.getIdx())}, + std::make_shared(jcp_.src_prc, dst_prc, tail_num, true, "zero"), + {}, {load_pool_gpr_idxs}); if (jcp_.normalize_variance) { if (!isFloatCompatible(jcp_.src_prc)) @@ -412,9 +412,9 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator tail_num = jcp_.planar_layout ? (jcp_.D * jcp_.H * jcp_.W) - ((jcp_.D * jcp_.H * jcp_.W) / step) * step : jcp_.C - (jcp_.C / step) * step; - load_pool_gpr_idxs = {reg_load_table.getIdx(), reg_load_store_mask.getIdx()}; - store_pool_gpr_idxs = {reg_load_store_mask.getIdx()}; - store_pool_vec_idxs = {vmm_zero.getIdx()}; + load_pool_gpr_idxs = {static_cast(reg_load_store_mask.getIdx()), static_cast(reg_load_table.getIdx())}; + store_pool_gpr_idxs = {static_cast(reg_load_store_mask.getIdx())}; + store_pool_vec_idxs = {static_cast(vmm_zero.getIdx())}; if (jcp_.planar_layout) { worker_mvn_unroll(); @@ -523,25 +523,25 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator std::vector>> depthwise_injectors; std::vector>> quantization_injectors; - std::vector store_pool_gpr_idxs; - std::vector store_pool_vec_idxs; - std::vector load_pool_gpr_idxs; + std::vector store_pool_gpr_idxs; + std::vector store_pool_vec_idxs; + std::vector load_pool_gpr_idxs; inline void worker_mvn(bool is_tail) { int elt_num = is_tail ? tail_num : step; - load_emitter->emit({reg_src.getIdx()}, {vmm_val.getIdx()}, - {}, {load_pool_gpr_idxs}, - std::make_shared(jcp_.src_prc, Precision::FP32, elt_num)); + load_emitter->emit({static_cast(reg_src.getIdx())}, {static_cast(vmm_val.getIdx())}, + std::make_shared(jcp_.src_prc, Precision::FP32, elt_num), + {}, {load_pool_gpr_idxs}); uni_vsubps(vmm_val, vmm_val, vmm_mean); if (jcp_.normalize_variance) uni_vmulps(vmm_val, vmm_val, vmm_variance_inv); - apply_post_ops(jcp_.dst_prc); + apply_post_ops(jcp_.dst_prc, jcp_.planar_layout); - store_emitter->emit({vmm_val.getIdx()}, {reg_dst.getIdx()}, - {store_pool_vec_idxs}, {store_pool_gpr_idxs}, - std::make_shared(Precision::FP32, jcp_.dst_prc, elt_num)); + store_emitter->emit({static_cast(vmm_val.getIdx())}, {static_cast(reg_dst.getIdx())}, + std::make_shared(Precision::FP32, jcp_.dst_prc, elt_num), + {store_pool_vec_idxs}, {store_pool_gpr_idxs}); } inline void worker_mvn_unroll(bool is_tail = false) { @@ -564,7 +564,7 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator L(mvn_loop_end_label); } - void apply_post_ops(InferenceEngine::Precision dst_prc) { + void apply_post_ops(InferenceEngine::Precision dst_prc, bool is_broadcast) { const auto &p = attr_.post_ops_; int eltwise_inj_idx = 0; int depthwise_inj_idx = 0; @@ -579,7 +579,7 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator mov(reg_d_bias, reinterpret_cast(post_op.depthwise.biases_data)); add(reg_d_weights, reg_oc_off); add(reg_d_bias, reg_oc_off); - depthwise_injectors[depthwise_inj_idx]->compute_vector_range(vmm_val.getIdx(), vmm_val.getIdx() + 1, reg_d_weights, reg_d_bias); + depthwise_injectors[depthwise_inj_idx]->compute_vector_range(vmm_val.getIdx(), vmm_val.getIdx() + 1, reg_d_weights, reg_d_bias, is_broadcast); depthwise_inj_idx++; } else if (post_op.is_quantization()) { bool do_dequantization = post_op.quantization.alg == alg_kind::quantization_quantize_dequantize; @@ -587,13 +587,13 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator int s_idx = vmm_val.getIdx(); quantization_injectors[quantization_inj_idx]->init_crop_ptrs(reg_oc_off); - quantization_injectors[quantization_inj_idx]->compute_crop(s_idx, s_idx + 1, 0); + quantization_injectors[quantization_inj_idx]->compute_crop(s_idx, s_idx + 1, 0, 0, is_broadcast); quantization_injectors[quantization_inj_idx]->init_input_scale_shift_ptrs(reg_oc_off); - quantization_injectors[quantization_inj_idx]->compute_input_scale_shift(s_idx, s_idx + 1, 0, do_rounding); + quantization_injectors[quantization_inj_idx]->compute_input_scale_shift(s_idx, s_idx + 1, 0, do_rounding, 0, is_broadcast); quantization_injectors[quantization_inj_idx]->init_output_scale_shift_ptrs(reg_oc_off); - quantization_injectors[quantization_inj_idx]->compute_output_scale_shift(s_idx, s_idx + 1, 0); + quantization_injectors[quantization_inj_idx]->compute_output_scale_shift(s_idx, s_idx + 1, 0, 0, is_broadcast); quantization_inj_idx++; } @@ -636,7 +636,7 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { setPostOps(attr, true); Precision inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision(); - if (getParentEdgeAt(0)->getDims().ndims() < 4 || getParentEdgeAt(0)->getDims().ndims() > 5 + if (getParentEdgeAt(0)->getDims().ndims() < 3 || getParentEdgeAt(0)->getDims().ndims() > 5 || across_channels != 0 || normalize_variance != 1) { if (!isFloatCompatible(inputPrecision)) { inputPrecision = Precision::FP32; @@ -817,6 +817,7 @@ void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) { quantizeNode->appendPostOps(ops); continue; } + auto* eltwiseNode = dynamic_cast(node.get()); if (eltwiseNode) { eltwiseNode->appendPostOps(ops); @@ -911,6 +912,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si arg.src_stride = src_stride_size; arg.dst_stride = dst_stride_size; arg.work_amount = static_cast(C2 / blk_size); // work amount for vector part + arg.oc_off = static_cast(c * sizeof(float)); (*mvn_kernel)(&arg); }); } @@ -926,6 +928,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si arg.src_stride = src_stride_size; arg.dst_stride = dst_stride_size; arg.work_amount = static_cast(C2 / blk_size); + arg.oc_off = static_cast(c * sizeof(float)); (*mvn_kernel)(&arg); }); } @@ -945,6 +948,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si arg.src_stride = src_stride_size; arg.dst_stride = dst_stride_size; arg.work_amount = static_cast(C2 / blk_size); + arg.oc_off = static_cast(c * sizeof(float)); (*mvn_mean_kernel)(&arg); mean *= C2inv; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp index 7ccbb167726fbe..94eb01e2e0fb59 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp @@ -603,7 +603,7 @@ struct jit_uni_normalize_kernel_f32 : public jit_uni_normalize_kernel, public ji if (mayiuse(avx512_core_bf16)) vcvtneps2bf16(ymm_dst, vmm_dst); else - emu_vcvtneps2bf16->emit({vmm_dst.getIdx()}, {ymm_dst.getIdx()}); + emu_vcvtneps2bf16->emit({static_cast(vmm_dst.getIdx())}, {static_cast(ymm_dst.getIdx())}); vmovdqu16(op, ymm_dst); } else if (dst_dt == memory::data_type::u8) { uni_vcvtps2dq(vmm_dst, vmm_dst); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp index 9e62b7d77972dc..19740905c3e7dc 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp @@ -622,7 +622,7 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene if (mayiuse(avx512_core_bf16)) vcvtneps2bf16(ymm_dst, vmm_dst); else - emu_vcvtneps2bf16->emit({vmm_dst.getIdx()}, {ymm_dst.getIdx()}); + emu_vcvtneps2bf16->emit({static_cast(vmm_dst.getIdx())}, {static_cast(ymm_dst.getIdx())}); vmovdqu16(op, ymm_dst); break; case memory::data_type::s8: @@ -1096,7 +1096,7 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi if (mayiuse(avx512_core_bf16)) vcvtneps2bf16(ymm_dst, vmm_dst); else - emu_vcvtneps2bf16->emit({vmm_dst.getIdx()}, {ymm_dst.getIdx()}); + emu_vcvtneps2bf16->emit({static_cast(vmm_dst.getIdx())}, {static_cast(ymm_dst.getIdx())}); vmovdqu16(op, ymm_dst); break; case memory::data_type::s8: diff --git a/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp b/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp index 4eb8aa4c08325b..60b3fc27917156 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp @@ -223,7 +223,7 @@ struct jit_uni_logistic_kernel_f32 : public jit_uni_logistic_kernel, public jit_ if (mayiuse(avx512_core_bf16)) vcvtneps2bf16(ymm_dst, vmm_dst); else - emu_vcvtneps2bf16->emit({vmm_dst.getIdx()}, {ymm_dst.getIdx()}); + emu_vcvtneps2bf16->emit({static_cast(vmm_dst.getIdx())}, {static_cast(ymm_dst.getIdx())}); vmovdqu16(op, ymm_dst); break; default: diff --git a/inference-engine/src/mkldnn_plugin/utils/bfloat16.hpp b/inference-engine/src/mkldnn_plugin/utils/bfloat16.hpp index 58d2138fc7053b..d999e1ce453f01 100644 --- a/inference-engine/src/mkldnn_plugin/utils/bfloat16.hpp +++ b/inference-engine/src/mkldnn_plugin/utils/bfloat16.hpp @@ -84,8 +84,8 @@ class jit_emu_vcvtneps2bf16 : public jit_emitter { size_t get_inputs_num() override { return 1; }; private: - void emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs, - const std::vector& pool_vec_idxs, const std::vector& pool_gpr_idxs, const emitter_context *emit_context) override { + void emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs, + const std::vector& pool_vec_idxs, const std::vector& pool_gpr_idxs) override { if (host_isa_ == mkldnn::impl::cpu::x64::cpu_isa_t::avx512_common) { Xbyak::Zmm in = Xbyak::Zmm(in_vec_idxs[0]); Xbyak::Ymm out = Xbyak::Ymm(out_vec_idxs[0]);