Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CPU] Removed contexts from Load/Store emitters #12446

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/plugins/intel_cpu/src/emitters/jit_emitter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ enum emitter_in_out_map {
gpr_to_gpr,
};

// structure for storage of emitter parameters to hash in map
struct emitter_params {
virtual size_t hash() const = 0;
};

struct emitter_context {
virtual ~emitter_context() = default;
};
Expand Down
952 changes: 496 additions & 456 deletions src/plugins/intel_cpu/src/emitters/jit_load_store_emitters.cpp

Large diffs are not rendered by default.

82 changes: 42 additions & 40 deletions src/plugins/intel_cpu/src/emitters/jit_load_store_emitters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,40 +15,37 @@ using namespace InferenceEngine;
namespace ov {
namespace intel_cpu {

struct load_emitter_context : public emitter_context {
load_emitter_context() : src_prc_(Precision::FP32), dst_prc_(Precision::FP32), load_num_(8),
offset_byte_(0), is_fill_(false), fill_value_("zero") {}
struct load_emitter_params : public emitter_params {
load_emitter_params(Precision src_prc, Precision dst_prc, int load_num, bool is_fill = false, std::string fill_value = "zero"):
src_prc_(src_prc), dst_prc_(dst_prc), load_num_(load_num), is_fill_(is_fill), fill_value_(fill_value) {}

load_emitter_context(Precision src_prc, Precision dst_prc, int load_num, int offset_byte = 0, bool is_fill = false, std::string fill_value = "zero"):
src_prc_(src_prc), dst_prc_(dst_prc), load_num_(load_num), offset_byte_(offset_byte), is_fill_(is_fill), fill_value_(fill_value) {}
size_t hash() const override;

int offset_byte_;
int load_num_;
Precision src_prc_;
Precision dst_prc_;
int load_num_;
bool is_fill_;
std::string fill_value_;
};

struct store_emitter_context : public emitter_context {
store_emitter_context() : src_prc_(Precision::FP32), dst_prc_(Precision::FP32),
store_num_(8), offset_byte_(0) {}
struct store_emitter_params : public emitter_params {
store_emitter_params(Precision src_prc, Precision dst_prc, int store_num):
src_prc_(src_prc), dst_prc_(dst_prc), store_num_(store_num) {}

store_emitter_context(Precision src_prc, Precision dst_prc, int store_num, int offset_byte = 0)
: src_prc_(src_prc), dst_prc_(dst_prc), store_num_(store_num), offset_byte_(offset_byte) {}
size_t hash() const override;

int offset_byte_;
int store_num_;
Precision src_prc_;
Precision dst_prc_;
int store_num_;
};

class jit_load_emitter : public jit_emitter {
public:
jit_load_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32, emitter_in_out_map in_out_type = emitter_in_out_map::gpr_to_vec);
jit_load_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, Precision src_prc, Precision dst_prc, int load_num,
Precision exec_prc = Precision::FP32, bool is_fill = false, std::string fill_value = "zero",
emitter_in_out_map in_out_type = emitter_in_out_map::gpr_to_vec);
/**
* load_num values with src_prc precision are loaded from ptr[Reg64(in_idxs[0]) + offset_byte] address to Vmm[out_idxs[0]] as dst_prc.
* load_num values with src_prc precision are loaded from ptr[Reg64(in_idxs[0]) + offset_byte] address to Vmm[out_idxs[0]] as dst_prc, where offset_byte is in_idxs[1]
* is_fill: when load_num can not fully fit in vector register, whether fill_value should be filled as default values.
* fill_value: when load_num can not fully fit in vector register, what values should be filled as default values.
* currently support "zero", "int_one", "float_one", "int32_min", "float_min", "int32_max" and "float_max".
Expand All @@ -66,27 +63,23 @@ class jit_load_emitter : public jit_emitter {
* dst_prc
*/
void emit_impl(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emit_context) const override;
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emit_context) const override;

size_t get_inputs_num() const override;

private:
template <dnnl::impl::cpu::x64::cpu_isa_t isa>
void emit_isa(const Xbyak::Reg64 &reg_src, int offset_byte, InferenceEngine::Precision src_prc,
const int out_vec_idx, InferenceEngine::Precision dst_prc, int load_num, bool is_fill = false, std::string fill_value = "zero") const;
void emit_isa(const Xbyak::Reg64 &reg_src, const int out_vec_idx, const int offset) const;

template <typename Vmm>
void load_bytes(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, int load_size,
bool is_fill = false, std::string fill_value = "zero") const;
void load_bytes(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, int load_size) const;

template <typename Vmm>
void load_bytes_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, bool is_signed, int load_size,
bool is_fill = false, std::string fill_value = "zero") const;
void load_bytes_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, bool is_signed, int load_size) const;

template <typename Vmm>
void load_words_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, bool is_bf16, bool is_signed, int load_size,
bool is_fill = false, std::string fill_value = "zero") const;
void load_words_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, bool is_bf16, bool is_signed, int load_size) const;

template <typename Vmm>
void fill_with_default(const Vmm &vmm, std::string fill_value, const int &load_num) const;
Expand All @@ -95,17 +88,23 @@ class jit_load_emitter : public jit_emitter {

size_t aux_gprs_count() const override;

std::string name;
int v_len_elt; // 4/8/16
std::string name_;
int v_len_elt_; // 4/8/16
int load_num_;
int load_size_;
Precision src_prc_;
Precision dst_prc_;
bool is_fill_;
std::string fill_value_;
};

class jit_store_emitter : public jit_emitter {
public:
jit_store_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32, emitter_in_out_map in_out_type = emitter_in_out_map::vec_to_gpr);
jit_store_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, Precision src_prc, Precision dst_prc, int store_num,
Precision exec_prc = Precision::FP32, emitter_in_out_map in_out_type = emitter_in_out_map::vec_to_gpr);

/**
* store_num values with src_prc in Vmm[in_vec_idx] is stored to ptr[reg_dst + offset_byte] address as dst_prc data.
* store_num values with src_prc in Vmm[in_vec_idx] is stored to ptr[reg_dst + offset_byte] address as dst_prc data, where offset_byte is in_idxs[1]
* supported src_prc and dst_prc pairs are as below(x indicate for support):
* FP32 I32 I16 U16 I8 U8 BF16 --> src_prc
* FP32 x x
Expand All @@ -120,21 +119,20 @@ class jit_store_emitter : public jit_emitter {
* note: FP32/I32-->BF16(x*) is supported only on at least avx512-core plateform
*/
void emit_impl(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emit_context) const override;
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emit_context) const override;

size_t get_inputs_num() const override;

void emit_data() const override;

std::shared_ptr<jit_emu_vcvtneps2bf16> get_emu_vcvtneps2bf16() const {
return emu_vcvtneps2bf16;
return emu_vcvtneps2bf16_;
}

private:
template <dnnl::impl::cpu::x64::cpu_isa_t isa>
void emit_isa(const int in_vec_idx, InferenceEngine::Precision src_prc,
const Xbyak::Reg64 &reg_dst, int offset_byte, InferenceEngine::Precision dst_prc, int store_num) const;
void emit_isa(const int in_vec_idx, const Xbyak::Reg64 &reg_dst, const int offset) const;

template <typename Vmm>
void store_bytes(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, int store_size) const;
Expand All @@ -148,9 +146,13 @@ class jit_store_emitter : public jit_emitter {
size_t aux_gprs_count() const override;
size_t aux_vecs_count() const override;

std::string name;
int v_len_elt; // 4/8/16
std::shared_ptr<jit_emu_vcvtneps2bf16> emu_vcvtneps2bf16;
std::string name_;
int v_len_elt_; // 4/8/16
int store_num_;
int store_size_;
Precision src_prc_;
Precision dst_prc_;
std::shared_ptr<jit_emu_vcvtneps2bf16> emu_vcvtneps2bf16_;
};

} // namespace intel_cpu
Expand Down
Loading