From 9a140cea89354fd7a9a9aa60e1c3875aef1cf0b4 Mon Sep 17 00:00:00 2001 From: geeky33 Date: Fri, 22 Nov 2024 21:31:02 +0530 Subject: [PATCH 1/7] [CPU][ARM] JIT Floor Mod Operation --- .../plugin/aarch64/jit_eltwise_emitters.cpp | 43 +++++++++++++++++++ .../plugin/aarch64/jit_eltwise_emitters.hpp | 21 +++++++++ .../nodes/executors/aarch64/jit_eltwise.cpp | 1 + 3 files changed, 65 insertions(+) diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index 355c8fb7f4c4d7..e79052afea825a 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -515,7 +515,50 @@ void jit_floor_emitter::emit_isa(const std::vector &in_vec_idxs, const s std::set> jit_floor_emitter::get_supported_precisions(const std::shared_ptr& node) { return {{element::f32}}; } +/// FLOOR_MOD /// +jit_floor_mod_emitter::jit_floor_mod_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node) + : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) { +} + +jit_floor_mod_emitter::jit_floor_mod_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc): jit_emitter(host, host_isa, exec_prc) { +} + +size_t jit_floor_mod_emitter::get_inputs_count() const { return 2; } + +size_t jit_floor_mod_emitter::get_aux_vecs_count() const { return 1; } +void jit_floor_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else { + OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel"); + } +} + +template +void jit_floor_mod_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string()); + + using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits::TReg; + + TReg dividend = TReg(in_vec_idxs[0]); + TReg divisor = TReg(in_vec_idxs[1]); + TReg r = TReg(out_vec_idxs[0]); + TReg aux = TReg(aux_vec_idxs[0]); + + h->fdiv(aux.s, dividend.s, divisor.s); + h->frintm(aux.s, aux.s); + h->fmul(aux.s, aux.s, divisor.s); + h->fsub(r.s, dividend.s, aux.s); +} + +std::set> jit_floor_mod_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} /// CEILING /// //Initialization of the emitter, taking node as input jit_ceiling_emitter::jit_ceiling_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp index a99e016c9c834a..69bf33f898f0b6 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp @@ -213,7 +213,28 @@ class jit_floor_emitter : public jit_emitter { template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; +class jit_floor_mod_emitter : public jit_emitter { +public: + jit_floor_mod_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc = ov::element::f32); + + jit_floor_mod_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node); + + size_t get_inputs_count() const override; + + size_t get_aux_vecs_count() const override; + + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); +private: + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; + + template + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; +}; class jit_ceiling_emitter : public jit_emitter { public: // Constructor with explicit precision diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp index 6da6b63eb94a72..189cd44efa2f26 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp @@ -26,6 +26,7 @@ bool JitEltwiseExecutor::isSupported( Algorithm::EltwiseEqual, Algorithm::EltwiseExp, Algorithm::EltwiseFloor, + Algorithm::EltwiseFloorMod, Algorithm::EltwiseCeiling, Algorithm::EltwiseGeluErf, Algorithm::EltwiseGeluTanh, From 2e2d0367303ba7683a3aa914dc6eeca37d8630f5 Mon Sep 17 00:00:00 2001 From: geeky33 Date: Sat, 23 Nov 2024 00:42:06 +0530 Subject: [PATCH 2/7] Edited the required kernel files --- .../src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp index b7fbfaf16e1587..f9a94a1e961a61 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp @@ -648,6 +648,7 @@ std::shared_ptr jit_uni_eltwise_generic::create_eltwise_emitte OV_CASE(Algorithm::EltwiseEqual, ov::intel_cpu::aarch64::jit_equal_emitter), OV_CASE(Algorithm::EltwiseExp, ov::intel_cpu::aarch64::jit_exp_emitter), OV_CASE(Algorithm::EltwiseFloor, ov::intel_cpu::aarch64::jit_floor_emitter), + OV_CASE(Algorithm::EltwiseFloorMod, ov::intel_cpu::aarch64::jit_floor_mod_emitter), OV_CASE(Algorithm::EltwiseCeiling, ov::intel_cpu::aarch64::jit_ceiling_emitter), OV_CASE(Algorithm::EltwiseHswish, ov::intel_cpu::aarch64::jit_hswish_emitter), OV_CASE(Algorithm::EltwiseIsFinite, ov::intel_cpu::aarch64::jit_is_finite_emitter), @@ -829,6 +830,7 @@ std::set> eltwise_precision_helper::get_supported_pre OV_CASE(Algorithm::EltwiseEqual, jit_equal_emitter), OV_CASE(Algorithm::EltwiseExp, jit_exp_emitter), OV_CASE(Algorithm::EltwiseFloor, jit_floor_emitter), + OV_CASE(Algorithm::EltwiseFloorMod, jit_floor_mod_emitter), OV_CASE(Algorithm::EltwiseCeiling, jit_ceiling_emitter), OV_CASE(Algorithm::EltwiseGeluErf, jit_gelu_erf_emitter), OV_CASE(Algorithm::EltwiseGeluTanh, jit_gelu_tanh_emitter), From 94819d10786af2eaaf68993709216daffef40060 Mon Sep 17 00:00:00 2001 From: geeky33 Date: Thu, 28 Nov 2024 16:13:19 +0530 Subject: [PATCH 3/7] All the test cases have passed and removed the aux register --- .../emitters/plugin/aarch64/jit_eltwise_emitters.cpp | 11 ++++------- .../emitters/plugin/aarch64/jit_eltwise_emitters.hpp | 2 -- .../custom/single_layer_tests/classes/eltwise.cpp | 3 +++ 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index 3011bc6ec2ebc3..5af4a4040dd601 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -529,8 +529,6 @@ jit_floor_mod_emitter::jit_floor_mod_emitter(dnnl::impl::cpu::aarch64::jit_gener size_t jit_floor_mod_emitter::get_inputs_count() const { return 2; } -size_t jit_floor_mod_emitter::get_aux_vecs_count() const { return 1; } - void jit_floor_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -548,12 +546,11 @@ void jit_floor_mod_emitter::emit_isa(const std::vector &in_vec_idxs, con TReg dividend = TReg(in_vec_idxs[0]); TReg divisor = TReg(in_vec_idxs[1]); TReg r = TReg(out_vec_idxs[0]); - TReg aux = TReg(aux_vec_idxs[0]); - h->fdiv(aux.s, dividend.s, divisor.s); - h->frintm(aux.s, aux.s); - h->fmul(aux.s, aux.s, divisor.s); - h->fsub(r.s, dividend.s, aux.s); + h->fdiv(r.s, dividend.s, divisor.s); + h->frintm(r.s, r.s); + h->fmul(r.s, r.s, divisor.s); + h->fsub(r.s, dividend.s, r.s); } std::set> jit_floor_mod_emitter::get_supported_precisions(const std::shared_ptr& node) { diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp index 2cb7e6928ade3e..ad4af6b03038da 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp @@ -225,8 +225,6 @@ class jit_floor_mod_emitter : public jit_emitter { size_t get_inputs_count() const override; - size_t get_aux_vecs_count() const override; - static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp index d7cfe80d22f617..e77df2b55c75e6 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp @@ -259,6 +259,7 @@ std::string EltwiseLayerCPUTest::getPrimitiveType(const utils::EltwiseTypes& elt (eltwise_type == utils::EltwiseTypes::MULTIPLY) || (eltwise_type == utils::EltwiseTypes::SUBTRACT) || (eltwise_type == utils::EltwiseTypes::DIVIDE) || + (eltwise_type == utils::EltwiseTypes::FLOOR_MOD) || (eltwise_type == utils::EltwiseTypes::MOD)) { return "jit"; } @@ -317,6 +318,8 @@ const std::vector& eltwiseOpTypesBinInp() { utils::EltwiseTypes::SUBTRACT, // TODO: Fix CVS-105430 utils::EltwiseTypes::DIVIDE, // TODO: Fix CVS-105430 utils::EltwiseTypes::FLOOR_MOD, // TODO: Fix CVS-111875 +#else if defined(OPENVINO_ARCH_ARM64) + utils::EltwiseTypes::FLOOR_MOD, #endif utils::EltwiseTypes::SQUARED_DIFF, utils::EltwiseTypes::MOD, From c31bf84600c4e3abb5be82833e41e9c479c405d1 Mon Sep 17 00:00:00 2001 From: "ayraa.ai" <141430616+geeky33@users.noreply.github.com> Date: Thu, 28 Nov 2024 16:42:21 +0530 Subject: [PATCH 4/7] Update jit_eltwise_emitters.cpp added the aux register --- .../plugin/aarch64/jit_eltwise_emitters.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index 5af4a4040dd601..f830d0b0aae6f9 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -515,6 +515,7 @@ void jit_floor_emitter::emit_isa(const std::vector &in_vec_idxs, const s std::set> jit_floor_emitter::get_supported_precisions(const std::shared_ptr& node) { return {{element::f32}}; } + /// FLOOR_MOD /// jit_floor_mod_emitter::jit_floor_mod_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, @@ -529,6 +530,8 @@ jit_floor_mod_emitter::jit_floor_mod_emitter(dnnl::impl::cpu::aarch64::jit_gener size_t jit_floor_mod_emitter::get_inputs_count() const { return 2; } +size_t jit_mod_emitter::get_aux_vecs_count() const { return 1; } + void jit_floor_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -546,16 +549,18 @@ void jit_floor_mod_emitter::emit_isa(const std::vector &in_vec_idxs, con TReg dividend = TReg(in_vec_idxs[0]); TReg divisor = TReg(in_vec_idxs[1]); TReg r = TReg(out_vec_idxs[0]); + TReg aux = TReg(aux_vec_idxs[0]); - h->fdiv(r.s, dividend.s, divisor.s); - h->frintm(r.s, r.s); - h->fmul(r.s, r.s, divisor.s); - h->fsub(r.s, dividend.s, r.s); + h->fdiv(aux.s, dividend.s, divisor.s); + h->frintm(aux.s, aux.s); + h->fmul(aux.s, aux.s, divisor.s); + h->fsub(r.s, dividend.s, aux.s); } std::set> jit_floor_mod_emitter::get_supported_precisions(const std::shared_ptr& node) { return {{element::f32, element::f32}}; } + /// CEILING /// //Initialization of the emitter, taking node as input jit_ceiling_emitter::jit_ceiling_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, From 0624235c7b943266ac6ba5f807e3120b436e2516 Mon Sep 17 00:00:00 2001 From: "ayraa.ai" <141430616+geeky33@users.noreply.github.com> Date: Thu, 28 Nov 2024 16:43:37 +0530 Subject: [PATCH 5/7] Update jit_eltwise_emitters.hpp --- .../src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp index ad4af6b03038da..2cb7e6928ade3e 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp @@ -225,6 +225,8 @@ class jit_floor_mod_emitter : public jit_emitter { size_t get_inputs_count() const override; + size_t get_aux_vecs_count() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: From 2eade4854f1b91b599d771f9dc785fc3e15dcad7 Mon Sep 17 00:00:00 2001 From: "ayraa.ai" <141430616+geeky33@users.noreply.github.com> Date: Thu, 28 Nov 2024 17:08:08 +0530 Subject: [PATCH 6/7] Update jit_eltwise_emitters.cpp --- .../src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index f830d0b0aae6f9..4aec56d98873fa 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -530,7 +530,7 @@ jit_floor_mod_emitter::jit_floor_mod_emitter(dnnl::impl::cpu::aarch64::jit_gener size_t jit_floor_mod_emitter::get_inputs_count() const { return 2; } -size_t jit_mod_emitter::get_aux_vecs_count() const { return 1; } +size_t jit_floor_mod_emitter::get_aux_vecs_count() const { return 1; } void jit_floor_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { From 03dce8fbd2f433d783ad7267142cca545a7502eb Mon Sep 17 00:00:00 2001 From: "ayraa.ai" <141430616+geeky33@users.noreply.github.com> Date: Thu, 28 Nov 2024 17:31:21 +0530 Subject: [PATCH 7/7] Update eltwise.cpp --- .../functional/custom/single_layer_tests/classes/eltwise.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp index e77df2b55c75e6..1696f35fc1bc4a 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp @@ -318,7 +318,7 @@ const std::vector& eltwiseOpTypesBinInp() { utils::EltwiseTypes::SUBTRACT, // TODO: Fix CVS-105430 utils::EltwiseTypes::DIVIDE, // TODO: Fix CVS-105430 utils::EltwiseTypes::FLOOR_MOD, // TODO: Fix CVS-111875 -#else if defined(OPENVINO_ARCH_ARM64) +#elif defined(OPENVINO_ARCH_ARM64) utils::EltwiseTypes::FLOOR_MOD, #endif utils::EltwiseTypes::SQUARED_DIFF,