Skip to content

Commit

Permalink
Removed Contexts from load and store emitters
Browse files Browse the repository at this point in the history
  • Loading branch information
a-sidorova committed Aug 10, 2022
1 parent 1b5a130 commit 3232a90
Show file tree
Hide file tree
Showing 13 changed files with 985 additions and 835 deletions.
5 changes: 5 additions & 0 deletions src/plugins/intel_cpu/src/emitters/jit_emitter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ enum emitter_in_out_map {
gpr_to_gpr,
};

// structure for storage of emitter parameters to hash in map
struct emitter_params {
virtual size_t hash() const = 0;
};

struct emitter_context {
virtual ~emitter_context() = default;
};
Expand Down
932 changes: 475 additions & 457 deletions src/plugins/intel_cpu/src/emitters/jit_load_store_emitters.cpp

Large diffs are not rendered by default.

80 changes: 41 additions & 39 deletions src/plugins/intel_cpu/src/emitters/jit_load_store_emitters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,40 +15,37 @@ using namespace InferenceEngine;
namespace ov {
namespace intel_cpu {

struct load_emitter_context : public emitter_context {
load_emitter_context() : src_prc_(Precision::FP32), dst_prc_(Precision::FP32), load_num_(8),
offset_byte_(0), is_fill_(false), fill_value_("zero") {}
struct load_emitter_params : public emitter_params {
load_emitter_params(Precision src_prc, Precision dst_prc, int load_num, bool is_fill = false, std::string fill_value = "zero"):
src_prc_(src_prc), dst_prc_(dst_prc), load_num_(load_num), is_fill_(is_fill), fill_value_(fill_value) {}

load_emitter_context(Precision src_prc, Precision dst_prc, int load_num, int offset_byte = 0, bool is_fill = false, std::string fill_value = "zero"):
src_prc_(src_prc), dst_prc_(dst_prc), load_num_(load_num), offset_byte_(offset_byte), is_fill_(is_fill), fill_value_(fill_value) {}
size_t hash() const override;

int offset_byte_;
int load_num_;
Precision src_prc_;
Precision dst_prc_;
int load_num_;
bool is_fill_;
std::string fill_value_;
};

struct store_emitter_context : public emitter_context {
store_emitter_context() : src_prc_(Precision::FP32), dst_prc_(Precision::FP32),
store_num_(8), offset_byte_(0) {}
struct store_emitter_params : public emitter_params {
store_emitter_params(Precision src_prc, Precision dst_prc, int store_num):
src_prc_(src_prc), dst_prc_(dst_prc), store_num_(store_num) {}

store_emitter_context(Precision src_prc, Precision dst_prc, int store_num, int offset_byte = 0)
: src_prc_(src_prc), dst_prc_(dst_prc), store_num_(store_num), offset_byte_(offset_byte) {}
size_t hash() const override;

int offset_byte_;
int store_num_;
Precision src_prc_;
Precision dst_prc_;
};

class jit_load_emitter : public jit_emitter {
public:
jit_load_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32, emitter_in_out_map in_out_type = emitter_in_out_map::gpr_to_vec);
jit_load_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, int load_num, Precision src_prc, Precision dst_prc,
Precision exec_prc = Precision::FP32, bool is_fill = false, std::string fill_value = "zero",
emitter_in_out_map in_out_type = emitter_in_out_map::gpr_to_vec);
/**
* load_num values with src_prc precision are loaded from ptr[Reg64(in_idxs[0]) + offset_byte] address to Vmm[out_idxs[0]] as dst_prc.
* load_num values with src_prc precision are loaded from ptr[Reg64(in_idxs[0]) + offset_byte] address to Vmm[out_idxs[0]] as dst_prc, where offset_byte is in_idxs[1]
* is_fill: when load_num can not fully fit in vector register, whether fill_value should be filled as default values.
* fill_value: when load_num can not fully fit in vector register, what values should be filled as default values.
* currently support "zero", "int_one", "float_one", "int32_min", "float_min", "int32_max" and "float_max".
Expand All @@ -66,27 +63,23 @@ class jit_load_emitter : public jit_emitter {
* dst_prc
*/
void emit_impl(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emit_context) const override;
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emit_context) const override;

size_t get_inputs_num() const override;

private:
template <dnnl::impl::cpu::x64::cpu_isa_t isa>
void emit_isa(const Xbyak::Reg64 &reg_src, int offset_byte, InferenceEngine::Precision src_prc,
const int out_vec_idx, InferenceEngine::Precision dst_prc, int load_num, bool is_fill = false, std::string fill_value = "zero") const;
void emit_isa(const Xbyak::Reg64 &reg_src, const int out_vec_idx, const int offset) const;

template <typename Vmm>
void load_bytes(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, int load_size,
bool is_fill = false, std::string fill_value = "zero") const;
void load_bytes(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, int load_size) const;

template <typename Vmm>
void load_bytes_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, bool is_signed, int load_size,
bool is_fill = false, std::string fill_value = "zero") const;
void load_bytes_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, bool is_signed, int load_size) const;

template <typename Vmm>
void load_words_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, bool is_bf16, bool is_signed, int load_size,
bool is_fill = false, std::string fill_value = "zero") const;
void load_words_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, bool is_bf16, bool is_signed, int load_size) const;

template <typename Vmm>
void fill_with_default(const Vmm &vmm, std::string fill_value, const int &load_num) const;
Expand All @@ -95,17 +88,23 @@ class jit_load_emitter : public jit_emitter {

size_t aux_gprs_count() const override;

std::string name;
int v_len_elt; // 4/8/16
std::string name_;
int v_len_elt_; // 4/8/16
int load_num_;
int load_size_;
Precision src_prc_;
Precision dst_prc_;
bool is_fill_;
std::string fill_value_;
};

class jit_store_emitter : public jit_emitter {
public:
jit_store_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32, emitter_in_out_map in_out_type = emitter_in_out_map::vec_to_gpr);
jit_store_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, int size_num, Precision src_prc, Precision dst_prc,
Precision exec_prc = Precision::FP32, emitter_in_out_map in_out_type = emitter_in_out_map::vec_to_gpr);

/**
* store_num values with src_prc in Vmm[in_vec_idx] is stored to ptr[reg_dst + offset_byte] address as dst_prc data.
* store_num values with src_prc in Vmm[in_vec_idx] is stored to ptr[reg_dst + offset_byte] address as dst_prc data, where offset_byte is in_idxs[1]
* supported src_prc and dst_prc pairs are as below(x indicate for support):
* FP32 I32 I16 U16 I8 U8 BF16 --> src_prc
* FP32 x x
Expand All @@ -120,21 +119,20 @@ class jit_store_emitter : public jit_emitter {
* note: FP32/I32-->BF16(x*) is supported only on at least avx512-core plateform
*/
void emit_impl(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emit_context) const override;
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emit_context) const override;

size_t get_inputs_num() const override;

void emit_data() const override;

std::shared_ptr<jit_emu_vcvtneps2bf16> get_emu_vcvtneps2bf16() const {
return emu_vcvtneps2bf16;
return emu_vcvtneps2bf16_;
}

private:
template <dnnl::impl::cpu::x64::cpu_isa_t isa>
void emit_isa(const int in_vec_idx, InferenceEngine::Precision src_prc,
const Xbyak::Reg64 &reg_dst, int offset_byte, InferenceEngine::Precision dst_prc, int store_num) const;
void emit_isa(const int in_vec_idx, const Xbyak::Reg64 &reg_dst, const int offset) const;

template <typename Vmm>
void store_bytes(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, int store_size) const;
Expand All @@ -148,9 +146,13 @@ class jit_store_emitter : public jit_emitter {
size_t aux_gprs_count() const override;
size_t aux_vecs_count() const override;

std::string name;
int v_len_elt; // 4/8/16
std::shared_ptr<jit_emu_vcvtneps2bf16> emu_vcvtneps2bf16;
std::string name_;
int v_len_elt_; // 4/8/16
int store_num_;
int store_size_;
Precision src_prc_;
Precision dst_prc_;
std::shared_ptr<jit_emu_vcvtneps2bf16> emu_vcvtneps2bf16_;
};

} // namespace intel_cpu
Expand Down
7 changes: 7 additions & 0 deletions src/plugins/intel_cpu/src/nodes/color_convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,9 @@ class JitConverter<T[N]> : public jit_uni_converter {

template<typename T, size_t N>
void JitConverter<T[N]>::generate() {
init_load<float, T>(N);
init_store<T, float>(N);

preamble();

// Get arguments addresses
Expand Down Expand Up @@ -776,6 +779,10 @@ class JitConverter<T[N]> : public jit_uni_converter {

template<typename T, size_t N>
void JitConverter<T[N]>::generate() {
init_load<float, T>(N);
init_load<float, T>(N / 2);
init_store<T, float>(N);

preamble();

// Get arguments addresses
Expand Down
Loading

0 comments on commit 3232a90

Please sign in to comment.