From d6ed1f4673a8e84a8ca7df3ee24ffd63c033c3f7 Mon Sep 17 00:00:00 2001 From: Nashez Zubair Date: Sun, 20 Oct 2024 12:30:53 +0530 Subject: [PATCH] [CPU][ARM64] Add a JIT emitter for SoftPlus operation - Added a jit_sqrt_emitter derived class in aarch64/jit_eltwise_emitters - Created entry Algorithm::EltwiseSqrt in the get_supported_precisions in nodes/kernels/aarch64 - Add the EltwiseSqrt entry in the aarch64 executors supported algorithms - Add the ActivationType::Sqrt in the getPrimitiveType in activations Closes: #24109 Signed-off-by: Nashez Zubair --- .../plugin/aarch64/jit_eltwise_emitters.cpp | 69 +++++++++++++++++++ .../plugin/aarch64/jit_eltwise_emitters.hpp | 29 ++++++++ .../nodes/executors/aarch64/jit_eltwise.cpp | 1 + .../aarch64/jit_uni_eltwise_generic.cpp | 2 + .../single_layer_tests/classes/activation.cpp | 1 + 5 files changed, 102 insertions(+) diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index 355c8fb7f4c4d7..d0b761f9a76f7a 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -2024,6 +2024,75 @@ std::set> jit_sigmoid_emitter::get_supported_precisio return {{element::f32}}; } +/// SOFT_PLUS /// +jit_soft_plus_emitter::jit_soft_plus_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node) + : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) { + prepare_table(); + exp_emitter = std::make_unique(h, host_isa, exec_prc); +} + +jit_soft_plus_emitter::jit_soft_plus_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc) : jit_emitter(host, host_isa, exec_prc) { + prepare_table(); + exp_emitter = std::make_unique(h, host_isa, exec_prc); +} + +size_t jit_soft_plus_emitter::get_inputs_count() const { return 1; } + +size_t jit_soft_plus_emitter::get_aux_vecs_count() const { return exp_emitter->get_aux_vecs_count() + 2; } + +size_t jit_soft_plus_emitter::get_aux_gprs_count() const { return exp_emitter->get_aux_gprs_count() + 1; } + +void jit_soft_plus_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else { + OPENVINO_THROW("Can't create jit eltwise kernel"); + } +} + +template +void jit_soft_plus_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + if (exec_prc_ != ov::element::f32) { + OPENVINO_THROW("unsupported precision: " + exec_prc_.to_string()); + } + + using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits::TReg; + const TReg src(in_vec_idxs[0]); + const TReg dst(out_vec_idxs[0]); + const TReg aux1(aux_vec_idxs[exp_emitter->get_aux_vecs_count()]); + const TReg aux2(aux_vec_idxs[exp_emitter->get_aux_vecs_count() + 1]); + + exp_emitter->emit_code( + { src.getIdx() }, + out_vec_idxs, + aux_vec_idxs, + aux_gpr_idxs); + h->ld1r(aux1.s, table_val2("one")); + h->fadd(dst.s, dst.s, aux1.s); + h->fcvtzs(aux2.s, dst.s); + h->cls(aux1.s, aux2.s); + h->ld1r(aux2.s, table_val("bit_count")); + h->fsub(aux1.s, aux2.s, aux1.s); + // aux1.s contains nearest power of 2 for e^x + 1 + h->ld1r(aux2.s, table_val("ln2f")); + h->fmul(aux2.s, aux1.s, aux2.s); // Computed n*ln2 in aux2.s + h->fsub(dst.s, dst.s); +} + +void jit_soft_plus_emitter::register_table_entries() { + push_arg_entry_of("one", 0x3f800000, true); + push_arg_entry_of("threshold", 0x41a00000, true); // Threshold set to 20 + push_arg_entry_of("ln2f", 0x3f317218, true); // Natural log of 2 +} + +std::set> jit_soft_plus_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32}}; +} + /// SOFT_SIGN /// jit_soft_sign_emitter::jit_soft_sign_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp index a99e016c9c834a..88fdc13ab30f02 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp @@ -837,6 +837,35 @@ class jit_sigmoid_emitter : public jit_emitter { void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; +class jit_soft_plus_emitter : public jit_emitter { +public: + jit_soft_plus_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc = ov::element::f32); + + jit_soft_plus_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node); + + size_t get_inputs_count() const override; + + size_t get_aux_vecs_count() const override; + + size_t get_aux_gprs_count() const override; + + void register_table_entries() override; + + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); + +private: + std::unique_ptr exp_emitter; + + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; + + template + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; +}; + class jit_soft_sign_emitter : public jit_emitter { public: jit_soft_sign_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp index 6da6b63eb94a72..b0a23458b49fc6 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp @@ -49,6 +49,7 @@ bool JitEltwiseExecutor::isSupported( Algorithm::EltwiseRelu, Algorithm::EltwiseSelect, Algorithm::EltwiseSigmoid, + Algorithm::EltwiseSoftPlus, Algorithm::EltwiseSoftSign, Algorithm::EltwiseSqrt, Algorithm::EltwiseSubtract, diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp index b7fbfaf16e1587..6548bcca38df79 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp @@ -671,6 +671,7 @@ std::shared_ptr jit_uni_eltwise_generic::create_eltwise_emitte OV_CASE(Algorithm::EltwiseRelu, ov::intel_cpu::aarch64::jit_relu_emitter), OV_CASE(Algorithm::EltwiseSelect, ov::intel_cpu::aarch64::jit_select_emitter), OV_CASE(Algorithm::EltwiseSigmoid, ov::intel_cpu::aarch64::jit_sigmoid_emitter), + OV_CASE(Algorithm::EltwiseSoftPlus, ov::intel_cpu::aarch64::jit_soft_plus_emitter), OV_CASE(Algorithm::EltwiseSoftSign, ov::intel_cpu::aarch64::jit_soft_sign_emitter), OV_CASE(Algorithm::EltwiseSqrt, ov::intel_cpu::aarch64::jit_sqrt_emitter), OV_CASE(Algorithm::EltwiseSubtract, ov::intel_cpu::aarch64::jit_subtract_emitter), @@ -851,6 +852,7 @@ std::set> eltwise_precision_helper::get_supported_pre OV_CASE(Algorithm::EltwisePowerStatic, jit_power_static_emitter), OV_CASE(Algorithm::EltwiseSelect, jit_select_emitter), OV_CASE(Algorithm::EltwiseSigmoid, jit_sigmoid_emitter), + OV_CASE(Algorithm::EltwiseSoftPlus, jit_soft_plus_emitter), OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter), OV_CASE(Algorithm::EltwiseSqrt, jit_sqrt_emitter), OV_CASE(Algorithm::EltwiseSubtract, jit_subtract_emitter), diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp index bd81bcf1a41c63..7804256c85f4ab 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp @@ -193,6 +193,7 @@ std::string ActivationLayerCPUTest::getPrimitiveType(const utils::ActivationType (activation_type == utils::ActivationTypes::GeluTanh) || (activation_type == utils::ActivationTypes::Relu) || (activation_type == utils::ActivationTypes::Sigmoid) || + (activation_type == utils::ActivationTypes::SoftPlus) || (activation_type == utils::ActivationTypes::SoftSign) || (activation_type == utils::ActivationTypes::Sqrt) || (activation_type == utils::ActivationTypes::Swish) ||