Convert: fuse refactoring + tests

openvinotoolkit · Jan 28, 2024 · 2b03e32 · 2b03e32
1 parent ac50235
commit 2b03e32
Show file tree

Hide file tree

Showing 12 changed files with 166 additions and 75 deletions.
diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp
@@ -841,36 +841,35 @@ void GraphOptimizer::FuseMultiplyAndAdd(Graph &graph) {
 void GraphOptimizer::MergeConvertAndScaleShift(Graph& graph) {
     auto& graphNodes = graph.GetNodes();
 
-    auto isSuitableParentNode = [](NodePtr parentNode) {
-        return parentNode->getType() == Type::Convert && parentNode->getChildEdges().size() == 1 &&
-               (parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::u8 ||
-                parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::i8) &&
-               parentNode->getOriginalOutputPrecisionAtPort(0) == ov::element::f32;
-    };
-
-    auto isSuitableChildNode = [](NodePtr childNode) {
-        return childNode->getType() == Type::Eltwise && childNode->getParentEdges().size() != 2;
-    };
-
     auto parent = graphNodes.begin();
     while (parent != graphNodes.end()) {
+        CPU_GRAPH_OPTIMIZER_SCOPE(MergeConvertAndScaleShift);
         auto parentNode = *parent;
-        if (!isSuitableParentNode(parentNode)) {
+        if (parentNode->getType() != Type::Convert) {
             parent++;
             continue;
         }
 
-        CPU_GRAPH_OPTIMIZER_SCOPE(MergeConvertAndScaleShift_ParentNode);
+        const auto& childEdges = parentNode->getChildEdges();
+        if (childEdges.size() != 1) {
+            parent++;
+            continue;
+        }
 
-        auto childNode = parentNode->getChildEdgeAt(0)->getChild();
-        if (!isSuitableChildNode(childNode)) {
+        const auto edge = childEdges[0].lock();
+        auto childNode = edge->getChild();
+        if (childNode->getType() != Type::Eltwise) {
             parent++;
             continue;
         }
 
-        CPU_GRAPH_OPTIMIZER_SCOPE(MergeConvertAndScaleShift_ChildNode);
+        const auto eltwise = dynamic_cast<ov::intel_cpu::node::Eltwise*>(childNode.get());
+        if (!eltwise->canFuseParent(parentNode)) {
+            parent++;
+            continue;
+        }
 
-        auto parents = parentNode->parentEdges;
+        const auto parents = parentNode->parentEdges;
         for (size_t i = 0; i < parents.size(); i++) {
             auto p_edge = parents[i].lock();
             if (!p_edge) continue;

diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
@@ -38,7 +38,6 @@
 #include "utils/cpu_utils.hpp"
 #include <common/primitive_hashing_utils.hpp>
 
-#include <ngraph/opsets/opset1.hpp>
 #include <openvino/op/bitwise_and.hpp>
 #include <openvino/op/bitwise_not.hpp>
 #include <openvino/op/bitwise_or.hpp>
@@ -83,6 +82,68 @@ namespace ov {
 namespace intel_cpu {
 namespace node {
 
+#if defined(OPENVINO_ARCH_ARM64)
+namespace {
+bool jitIsSupported(const Node* node,
+                    const float alpha,
+                    const float beta,
+                    const float gamma,
+                    const std::vector<ov::element::Type>& input_precisions = {}) {
+    const Algorithm& algorithm = node->getAlgorithm();
+    const auto is_supported = one_of(algorithm,
+                                     Algorithm::EltwiseAdd,
+                                     Algorithm::EltwiseMultiply,
+                                     Algorithm::EltwiseMulAdd,
+                                     Algorithm::EltwisePowerStatic,
+                                     Algorithm::EltwiseRelu);
+    if (!is_supported) {
+        return false;
+    }
+
+    const auto check_precisions = [&node](
+            const std::vector<ov::element::Type>& precisions,
+            const std::set<ov::element::Type>& supported_precisions) {
+        const auto& input_precisions = precisions.size() == 0 ? node->getOriginalInputPrecisions() : precisions;
+        if (std::any_of(input_precisions.begin(),
+                        input_precisions.end(),
+                        [&supported_precisions](const ov::element::Type& precision) {
+                            return supported_precisions.find(precision) == supported_precisions.end();
+                        })) {
+            return false;
+        }
+
+        const auto& output_precisions = node->getOriginalOutputPrecisions();
+        if (std::any_of(output_precisions.begin(),
+                        output_precisions.end(),
+                        [&supported_precisions](const ov::element::Type& precision) {
+                            return supported_precisions.find(precision) == supported_precisions.end();
+                        })) {
+            return false;
+        }
+
+        return true;
+    };
+
+    const std::set<ov::element::Type> supported_precisions = {
+        ov::element::f16,
+        ov::element::f32,
+        ov::element::i32,
+        ov::element::u32
+    };
+
+    if (!check_precisions(input_precisions, supported_precisions)) {
+        return false;
+    }
+
+    if ((algorithm == Algorithm::EltwiseRelu) && ((alpha != 0.f) || (beta != 0.f) || (gamma != 0.f))) {
+        return false;
+    }
+
+    return true;
+}
+} // namespace
+#endif
+
 #if defined(OPENVINO_ARCH_X86_64)
 
 template<typename T>
@@ -2253,7 +2314,8 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
 #endif
 #elif defined(OPENVINO_ARCH_ARM64)
     const bool useJit = canUseOptimizedImpl &&
-                        executors::aarch64::JitEltwiseExecutor::isSupported(this, getAlpha(), getBeta(), getGamma());
+                        jitIsSupported(this, getAlpha(), getBeta(), getGamma()) &&
+                        executors::aarch64::JitEltwiseExecutor::isSupported(getAlgorithm(), getAlpha(), getBeta(), getGamma());
     if (!useJit) {
         canUseOptimizedImpl = false;
     }
@@ -2980,6 +3042,35 @@ bool Eltwise::appendAttrPostOps(DnnlPostOpsComposer& dnnlpoc, bool isLastPostOp,
     return true;
 }
 
+bool Eltwise::canFuseParent(const NodePtr& parentNode) const {
+#if defined(OPENVINO_ARCH_ARM64)
+    if (parentNode->getType() != Type::Convert) {
+        return false;
+    }
+    const auto& input_precisions = parentNode->getOriginalInputPrecisions();
+    if (!jitIsSupported(this, getAlpha(), getBeta(), getGamma(), input_precisions)) {
+        return false;
+    }
+#else
+    const auto isSuitableParentNode = [](const Node* parentNode) {
+        return parentNode->getType() == Type::Convert &&
+               (parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::u8 ||
+                parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::i8) &&
+               parentNode->getOriginalOutputPrecisionAtPort(0) == ov::element::f32;
+    };
+
+    auto isSuitableChildNode = [](const Node* childNode) {
+        return childNode->getParentEdges().size() != 2;
+    };
+
+    if (!isSuitableParentNode(parentNode.get()) || !isSuitableChildNode(this)) {
+        return false;
+    }
+#endif
+
+    return true;
+}
+
 bool Eltwise::canFuse(const NodePtr& node) const {
     auto isIntegerComputeSupported = [](const Node* node) {
         if (!one_of(node->getAlgorithm(), Algorithm::EltwiseAdd,
@@ -3004,14 +3095,14 @@ bool Eltwise::canFuse(const NodePtr& node) const {
     if (!mayiuse(dnnl::impl::cpu::aarch64::asimd) || (getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK))
         return false;
 
-    if (!executors::aarch64::JitEltwiseExecutor::isSupported(this, getAlpha(), getBeta(), getGamma())) {
+    if (!jitIsSupported(this, getAlpha(), getBeta(), getGamma())) {
         return false;
     }
     const auto eltwise = dynamic_cast<const Eltwise*>(node.get());
-    if ((eltwise == nullptr) || (!executors::aarch64::JitEltwiseExecutor::isSupported(eltwise,
-                                                                                      eltwise->getAlpha(),
-                                                                                      eltwise->getBeta(),
-                                                                                      eltwise->getGamma()))) {
+    if ((eltwise == nullptr) || (!jitIsSupported(eltwise,
+                                                 eltwise->getAlpha(),
+                                                 eltwise->getBeta(),
+                                                 eltwise->getGamma()))) {
         return false;
     }
 #else

diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.h b/src/plugins/intel_cpu/src/nodes/eltwise.h
@@ -108,6 +108,7 @@ class Eltwise : public Node {
     void execute(dnnl::stream strm) override;
     bool created() const override;
     bool canBeInPlace() const override;
+    bool canFuseParent(const NodePtr& parentNode) const;
     bool canFuse(const NodePtr& node) const override;
     void appendPostOps(dnnl::post_ops& ops, const VectorDims &postOpDims, std::unordered_map<int, MemoryPtr>& postOpsMem, const int channelAxis = 1) override;
     void appendPostOps(dnnl::post_ops& ops, const VectorDims &postOpDims, std::vector<const void*>& postOpsMem, const int channelAxis = 1) override;

diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp
@@ -11,11 +11,10 @@ namespace executors {
 namespace aarch64 {
 
 bool JitEltwiseExecutor::isSupported(
-    const Node* node,
+    const Algorithm& algorithm,
     const float alpha,
     const float beta,
     const float gamma) {
-    const Algorithm& algorithm = node->getAlgorithm();
     const auto is_supported = one_of(algorithm,
                                     Algorithm::EltwiseAdd,
                                     Algorithm::EltwiseMultiply,
@@ -26,51 +25,6 @@ bool JitEltwiseExecutor::isSupported(
         return false;
     }
 
-    const auto check_precisions = [&node](const std::set<ov::element::Type>& precisions) {
-        const auto& input_precisions = node->getOriginalInputPrecisions();
-        if (std::any_of(input_precisions.begin(),
-                        input_precisions.end(),
-                        [&precisions](const ov::element::Type& precision) {
-                            return precisions.find(precision) == precisions.end();
-                        })) {
-            return false;
-        }
-
-        const auto& output_precisions = node->getOriginalOutputPrecisions();
-        if (std::any_of(output_precisions.begin(),
-                        output_precisions.end(),
-                        [&precisions](const ov::element::Type& precision) {
-                            return precisions.find(precision) == precisions.end();
-                        })) {
-            return false;
-        }
-
-        return true;
-    };
-
-    const std::set<ov::element::Type> supported_precisions = {
-        ov::element::f16,
-        ov::element::f32,
-        ov::element::i32,
-        ov::element::u32
-    };
-
-    const auto parent = node->getParentEdgeAt(0)->getParent();
-    if (parent->getType() == ov::intel_cpu::Type::Convert) {
-        const auto& input_precisions = parent->getOriginalInputPrecisions();
-        if (input_precisions.size() != 1ull) {
-            return false;
-        }
-        // input precision will be changed after fuse
-        if (supported_precisions.find(input_precisions[0]) == supported_precisions.end()) {
-            return false;
-        }
-    }
-
-    if (!check_precisions(supported_precisions)) {
-        return false;
-    }
-
     if ((algorithm == Algorithm::EltwiseRelu) && ((alpha != 0.f) || (beta != 0.f) || (gamma != 0.f))) {
         return false;
     }

diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.hpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.hpp
@@ -21,7 +21,7 @@ class JitEltwiseExecutor : public EltwiseExecutor {
     explicit JitEltwiseExecutor(const ExecutorContext::CPtr context);
 
     static bool isSupported(
-        const Node* node,
+        const Algorithm& algorithm,
         const float alpha,
         const float beta,
         const float gamma);

diff --git a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt
@@ -38,6 +38,8 @@ else()
     file(GLOB_RECURSE TMP_LIST_OF_COMMON_TEST_INSTANCES ${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests/instances/common/*.cpp)
     file(GLOB_RECURSE TMP_LIST_OF_ARM_TEST_INSTANCES    ${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests/instances/arm/*.cpp)
     file(GLOB_RECURSE TMP_LIST_OF_ARM_SUBGRAPH_TESTS    ${CMAKE_CURRENT_SOURCE_DIR}/subgraph_tests/arm/*.cpp)
+    # will be done in PR 19856
+    file(GLOB_RECURSE TMP_LIST_OF_ARM_SUBGRAPH_TESTS    ${CMAKE_CURRENT_SOURCE_DIR}/subgraph_tests/src/common/*.cpp)
     list(APPEND TMP_LIST_OF_EXPLICITLY_ENABLED_TESTS
         ${TMP_LIST_OF_TEST_CLASSES} ${TMP_LIST_OF_COMMON_TEST_INSTANCES} ${TMP_LIST_OF_ARM_TEST_INSTANCES} ${TMP_LIST_OF_ARM_SUBGRAPH_TESTS})
     set(TMP_EXPLICITLY_ENABLED_TESTS "${TMP_LIST_OF_EXPLICITLY_ENABLED_TESTS}")

diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -295,6 +295,10 @@ std::vector<std::string> disabledTestPatterns() {
 #if defined(OPENVINO_ARCH_ARM)
     // TODO: rounding errors
     retVector.emplace_back(R"(.*iv_secondaryInputType=PARAMETER_opType=VECTOR_NetType=i32.*)");
+    // not supported
+    retVector.emplace_back(R"(.*fma.*EltwiseLayerCPUTest.*)");
+    retVector.emplace_back(R"(.*int_jit.*EltwiseLayerCPUTest.*)");
+    retVector.emplace_back(R"(.*dyn.*EltwiseChainTest.*)");
 #endif
 
 #if !defined(OPENVINO_ARCH_X86_64)

diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/eltwise.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/eltwise.cpp
@@ -273,7 +273,6 @@ const std::vector<ElementType>& netType() {
 const std::vector<ElementType>& netTypeJit() {
     static const std::vector<ElementType> netType = {
         ElementType::i32,
-        ElementType::u32,
         ElementType::f32};
     return netType;
 }

diff --git a/...onal/subgraph_tests/src/eltwise_chain.cpp → ...bgraph_tests/src/common/eltwise_chain.cpp b/...onal/subgraph_tests/src/eltwise_chain.cpp → ...bgraph_tests/src/common/eltwise_chain.cpp
@@ -165,7 +165,7 @@ std::vector<std::vector<ElementType>> inputPrecisions = {
 
 std::vector<std::vector<EltwiseTypes>> eltwiseOps = {
         { EltwiseTypes::ADD, EltwiseTypes::MULTIPLY, EltwiseTypes::SUBTRACT },
-        { EltwiseTypes::DIVIDE, EltwiseTypes::SQUARED_DIFF, EltwiseTypes::ADD },
+        { EltwiseTypes::DIVIDE, EltwiseTypes::SQUARED_DIFF, EltwiseTypes::ADD }
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain, EltwiseChainTest,
@@ -178,6 +178,40 @@ INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain, EltwiseChainTest,
                                 ::testing::Values(ov::test::utils::DEVICE_CPU)),
                         EltwiseChainTest::getTestCaseName);
 
+
+    std::vector<std::vector<ov::Shape>> inputShapesConvert = {
+            {{1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}}
+    };
+
+    std::vector<std::vector<ElementType>> inputPrecisionsConvert = {
+            { ElementType::i8, ElementType::f32, ElementType::f32 },
+            { ElementType::u8, ElementType::f32, ElementType::f32 },
+            { ElementType::i16, ElementType::f32, ElementType::f32 },
+            { ElementType::u16, ElementType::f32, ElementType::f32 },
+            { ElementType::i32, ElementType::f32, ElementType::f32 },
+            // { ElementType::u32, ElementType::f32, ElementType::f32 }, // plugin doesn't support
+            { ElementType::f16, ElementType::f32, ElementType::f32 },
+            { ElementType::f32, ElementType::f32, ElementType::f32 },
+    };
+
+    std::vector<std::vector<EltwiseTypes>> eltwiseOpsConvert = {
+            { EltwiseTypes::CONVERT_OP, EltwiseTypes::MULTIPLY },
+            { EltwiseTypes::CONVERT_OP, EltwiseTypes::ADD },
+            { EltwiseTypes::CONVERT_OP, EltwiseTypes::DIVIDE },
+            { EltwiseTypes::CONVERT_OP, EltwiseTypes::SUBTRACT },
+            { EltwiseTypes::CONVERT_OP, EltwiseTypes::POWER },
+    };
+
+    INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain_MergeConvert, EltwiseChainTest,
+                             ::testing::Combine(
+                                     ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesConvert)),
+                                     ::testing::Values(InputLayerType::CONSTANT),
+                                     ::testing::ValuesIn(inputPrecisionsConvert),
+                                     ::testing::ValuesIn(eltwiseOpsConvert),
+                                     ::testing::Values(false),
+                                     ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                             EltwiseChainTest::getTestCaseName);
+
 std::vector<std::vector<ov::Shape>> inputShapesFQ = {
     {{1, 2, 2, 3}, {1, 2, 2, 3}, {1, 2, 2, 3}, {1, 2, 2, 3}},
     {{2, 33, 5, 5}, {2, 33, 5, 5}, {2, 33, 1, 5}, {2, 33, 5, 5}},

diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp
@@ -60,7 +60,8 @@ enum EltwiseTypes {
     BITWISE_AND,
     BITWISE_NOT,
     BITWISE_OR,
-    BITWISE_XOR
+    BITWISE_XOR,
+    CONVERT_OP
 };
 
 enum SqueezeOpType {

diff --git a/src/tests/test_utils/common_test_utils/src/node_builders/eltwise.cpp b/src/tests/test_utils/common_test_utils/src/node_builders/eltwise.cpp
@@ -9,6 +9,7 @@
 #include "openvino/op/bitwise_not.hpp"
 #include "openvino/op/bitwise_or.hpp"
 #include "openvino/op/bitwise_xor.hpp"
+#include "openvino/op/convert.hpp"
 #include "openvino/op/divide.hpp"
 #include "openvino/op/erf.hpp"
 #include "openvino/op/floor_mod.hpp"
@@ -51,6 +52,8 @@ std::shared_ptr<ov::Node> make_eltwise(const ov::Output<Node>& in0,
         return std::make_shared<ov::op::v13::BitwiseOr>(in0, in1);
     case ov::test::utils::EltwiseTypes::BITWISE_XOR:
         return std::make_shared<ov::op::v13::BitwiseXor>(in0, in1);
+    case ov::test::utils::EltwiseTypes::CONVERT_OP:
+        return std::make_shared<ov::op::v0::Convert>(in0, ov::element::f32);
     default: {
         OPENVINO_THROW("Incorrect type of Eltwise operation");
     }

diff --git a/src/tests/test_utils/common_test_utils/src/test_enums.cpp b/src/tests/test_utils/common_test_utils/src/test_enums.cpp
@@ -82,6 +82,9 @@ std::ostream& operator<<(std::ostream& os, const ov::test::utils::EltwiseTypes t
     case ov::test::utils::EltwiseTypes::BITWISE_XOR:
         os << "BitwiseXor";
         break;
+    case ov::test::utils::EltwiseTypes::CONVERT_OP:
+        os << "Convert";
+        break;
     default:
         throw std::runtime_error("NOT_SUPPORTED_OP_TYPE");
     }