Removed Contexts from load and store emitters

openvinotoolkit · Aug 10, 2022 · 958ab3f · 958ab3f
1 parent 1b5a130
commit 958ab3f
Show file tree

Hide file tree

Showing 13 changed files with 988 additions and 834 deletions.
diff --git a/src/plugins/intel_cpu/src/emitters/jit_emitter.hpp b/src/plugins/intel_cpu/src/emitters/jit_emitter.hpp
@@ -23,6 +23,11 @@ enum emitter_in_out_map {
     gpr_to_gpr,
 };
 
+// structure for storage of emitter parameters to hash in map
+struct emitter_params {
+    virtual size_t hash() const = 0;
+};
+
 struct emitter_context {
     virtual ~emitter_context() = default;
 };

diff --git a/src/plugins/intel_cpu/src/emitters/jit_load_store_emitters.cpp b/src/plugins/intel_cpu/src/emitters/jit_load_store_emitters.cpp
diff --git a/src/plugins/intel_cpu/src/emitters/jit_load_store_emitters.hpp b/src/plugins/intel_cpu/src/emitters/jit_load_store_emitters.hpp
@@ -15,40 +15,37 @@ using namespace InferenceEngine;
 namespace ov {
 namespace intel_cpu {
 
-struct load_emitter_context : public emitter_context {
-    load_emitter_context() : src_prc_(Precision::FP32), dst_prc_(Precision::FP32), load_num_(8),
-    offset_byte_(0), is_fill_(false), fill_value_("zero") {}
+struct load_emitter_params : public emitter_params {
+    load_emitter_params(Precision src_prc, Precision dst_prc, int load_num, bool is_fill = false, std::string fill_value = "zero"):
+        src_prc_(src_prc), dst_prc_(dst_prc), load_num_(load_num), is_fill_(is_fill), fill_value_(fill_value) {}
 
-    load_emitter_context(Precision src_prc, Precision dst_prc, int load_num, int offset_byte = 0, bool is_fill = false, std::string fill_value = "zero"):
-    src_prc_(src_prc), dst_prc_(dst_prc), load_num_(load_num), offset_byte_(offset_byte), is_fill_(is_fill), fill_value_(fill_value) {}
+    size_t hash() const override;
 
-    int offset_byte_;
-    int load_num_;
     Precision src_prc_;
     Precision dst_prc_;
+    int load_num_;
     bool is_fill_;
     std::string fill_value_;
 };
 
-struct store_emitter_context : public emitter_context {
-    store_emitter_context() : src_prc_(Precision::FP32), dst_prc_(Precision::FP32),
-    store_num_(8), offset_byte_(0) {}
+struct store_emitter_params : public emitter_params {
+    store_emitter_params(Precision src_prc, Precision dst_prc, int store_num):
+        src_prc_(src_prc), dst_prc_(dst_prc), store_num_(store_num) {}
 
-    store_emitter_context(Precision src_prc, Precision dst_prc, int store_num, int offset_byte = 0)
-    : src_prc_(src_prc), dst_prc_(dst_prc), store_num_(store_num), offset_byte_(offset_byte) {}
+    size_t hash() const override;
 
-    int offset_byte_;
     int store_num_;
     Precision src_prc_;
     Precision dst_prc_;
 };
 
 class jit_load_emitter : public jit_emitter {
 public:
-    jit_load_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32, emitter_in_out_map in_out_type = emitter_in_out_map::gpr_to_vec);
+    jit_load_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, int load_num, Precision src_prc, Precision dst_prc,
+                     Precision exec_prc = Precision::FP32, bool is_fill = false, std::string fill_value = "zero",
+                     emitter_in_out_map in_out_type = emitter_in_out_map::gpr_to_vec);
     /**
-    * load_num values with src_prc precision are loaded from ptr[Reg64(in_idxs[0]) + offset_byte] address to Vmm[out_idxs[0]] as dst_prc.
+    * load_num values with src_prc precision are loaded from ptr[Reg64(in_idxs[0]) + offset_byte] address to Vmm[out_idxs[0]] as dst_prc, where offset_byte is in_idxs[1]
     * is_fill: when load_num can not fully fit in vector register, whether fill_value should be filled as default values.
     * fill_value: when load_num can not fully fit in vector register, what values should be filled as default values.
     *   currently support "zero", "int_one", "float_one", "int32_min", "float_min", "int32_max" and "float_max".
@@ -66,27 +63,23 @@ class jit_load_emitter : public jit_emitter {
     * dst_prc
     */
     void emit_impl(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs,
-                  const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
-                  const emitter_context *emit_context) const override;
+                   const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
+                   const emitter_context *emit_context) const override;
 
     size_t get_inputs_num() const override;
 
 private:
     template <dnnl::impl::cpu::x64::cpu_isa_t isa>
-    void emit_isa(const Xbyak::Reg64 &reg_src, int offset_byte, InferenceEngine::Precision src_prc,
-        const int out_vec_idx, InferenceEngine::Precision dst_prc, int load_num, bool is_fill = false, std::string fill_value = "zero") const;
+    void emit_isa(const Xbyak::Reg64 &reg_src,  const int out_vec_idx, const int offset) const;
 
     template <typename Vmm>
-    void load_bytes(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, int load_size,
-        bool is_fill = false, std::string fill_value = "zero") const;
+    void load_bytes(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, int load_size) const;
 
     template <typename Vmm>
-    void load_bytes_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, bool is_signed, int load_size,
-        bool is_fill = false, std::string fill_value = "zero") const;
+    void load_bytes_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, bool is_signed, int load_size) const;
 
     template <typename Vmm>
-    void load_words_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, bool is_bf16, bool is_signed, int load_size,
-        bool is_fill = false, std::string fill_value = "zero") const;
+    void load_words_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, bool is_bf16, bool is_signed, int load_size) const;
 
     template <typename Vmm>
     void fill_with_default(const Vmm &vmm, std::string fill_value, const int &load_num) const;
@@ -95,17 +88,23 @@ class jit_load_emitter : public jit_emitter {
 
     size_t aux_gprs_count() const override;
 
-    std::string name;
-    int v_len_elt;  // 4/8/16
+    std::string name_;
+    int v_len_elt_;  // 4/8/16
+    int load_num_;
+    int load_size_;
+    Precision src_prc_;
+    Precision dst_prc_;
+    bool is_fill_;
+    std::string fill_value_;
 };
 
 class jit_store_emitter : public jit_emitter {
 public:
-    jit_store_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
-                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32, emitter_in_out_map in_out_type = emitter_in_out_map::vec_to_gpr);
+    jit_store_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, int size_num, Precision src_prc, Precision dst_prc,
+                      Precision exec_prc = Precision::FP32, emitter_in_out_map in_out_type = emitter_in_out_map::vec_to_gpr);
 
     /**
-    * store_num values with src_prc in Vmm[in_vec_idx] is stored to ptr[reg_dst + offset_byte] address as dst_prc data.
+    * store_num values with src_prc in Vmm[in_vec_idx] is stored to ptr[reg_dst + offset_byte] address as dst_prc data, where offset_byte is in_idxs[1]
     * supported src_prc and dst_prc pairs are as below(x indicate for support):
     *       FP32  I32   I16   U16   I8    U8    BF16  --> src_prc
     * FP32   x     x
@@ -120,21 +119,20 @@ class jit_store_emitter : public jit_emitter {
     * note: FP32/I32-->BF16(x*) is supported only on at least avx512-core plateform
     */
     void emit_impl(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs,
-                  const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
-                  const emitter_context *emit_context) const override;
+                   const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
+                   const emitter_context *emit_context) const override;
 
     size_t get_inputs_num() const override;
 
     void emit_data() const override;
 
     std::shared_ptr<jit_emu_vcvtneps2bf16> get_emu_vcvtneps2bf16() const {
-        return emu_vcvtneps2bf16;
+        return emu_vcvtneps2bf16_;
     }
 
 private:
     template <dnnl::impl::cpu::x64::cpu_isa_t isa>
-    void emit_isa(const int in_vec_idx, InferenceEngine::Precision src_prc,
-        const Xbyak::Reg64 &reg_dst, int offset_byte, InferenceEngine::Precision dst_prc, int store_num) const;
+    void emit_isa(const int in_vec_idx,  const Xbyak::Reg64 &reg_dst, const int offset) const;
 
     template <typename Vmm>
     void store_bytes(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, int store_size) const;
@@ -148,9 +146,13 @@ class jit_store_emitter : public jit_emitter {
     size_t aux_gprs_count() const override;
     size_t aux_vecs_count() const override;
 
-    std::string name;
-    int v_len_elt;  // 4/8/16
-    std::shared_ptr<jit_emu_vcvtneps2bf16> emu_vcvtneps2bf16;
+    std::string name_;
+    int v_len_elt_;  // 4/8/16
+    int store_num_;
+    int store_size_;
+    Precision src_prc_;
+    Precision dst_prc_;
+    std::shared_ptr<jit_emu_vcvtneps2bf16> emu_vcvtneps2bf16_;
 };
 
 }   // namespace intel_cpu

diff --git a/src/plugins/intel_cpu/src/nodes/color_convert.cpp b/src/plugins/intel_cpu/src/nodes/color_convert.cpp
@@ -422,6 +422,9 @@ class JitConverter<T[N]> : public jit_uni_converter {
 
 template<typename T, size_t N>
 void JitConverter<T[N]>::generate() {
+    init_load<float, T>(N);
+    init_store<T, float>(N);
+
     preamble();
 
     // Get arguments addresses
@@ -776,6 +779,10 @@ class JitConverter<T[N]> : public jit_uni_converter {
 
 template<typename T, size_t N>
 void JitConverter<T[N]>::generate() {
+    init_load<float, T>(N);
+    init_load<float, T>(N / 2);
+    init_store<T, float>(N);
+
     preamble();
 
     // Get arguments addresses