From 8713ca2475adc138f2ce0b0de28fa1d1ceea04ef Mon Sep 17 00:00:00 2001 From: Egor Duplenskii Date: Fri, 16 Aug 2024 07:52:42 +0200 Subject: [PATCH] [CPU] Fix resolve edge conflicts Convert insertion (#25643) ### Details - Current version does not clean extra edge in case Convert + Reorder is inserted. In this case we need to remove an edge, which is replaced with Convert node first, and then remove childEdge of the newly inserted Convert node, after Reorder is inserted. - The idea is to simplify this logic and do one thing at a time: 1. Insert Convert. 2. If Reorder is still necessary for the new inserted edge, it will be handled later in the loop. It seems to be impossible to write a test for x86 platform, since there is no reorder which does not support precision conversion. --- src/plugins/intel_cpu/src/graph.cpp | 94 +++++++++++++++-------------- src/plugins/intel_cpu/src/graph.h | 1 + 2 files changed, 50 insertions(+), 45 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 66be31c858cc76..bfb28f769b95e7 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -5,6 +5,7 @@ #include "graph.h" #include +#include #include #include #include @@ -557,59 +558,65 @@ void Graph::insertReorder(EdgePtr& edge, bool isOptimized, std::unordered_setgetInputDesc(), edge->getOutputDesc(), isOptimized); } -void Graph::ResolveEdgeConflicts() { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::ResolveEdgeConflicts"); +void Graph::insertConvert(EdgePtr& edge) { + const auto& inDesc = edge->getInputDesc(); + const auto& outDesc = edge->getOutputDesc(); - ptrdiff_t numberOfEdges = static_cast(graphEdges.size()); + std::string convertName = edge->getParent()->getName() + "_" + + inDesc.getPrecision().get_type_name() + "_" + outDesc.getPrecision().get_type_name(); + auto convertNode = std::make_shared(inDesc.getShape(), inDesc.getPrecision(), outDesc.getPrecision(), + convertName, context); + convertNode->setDescs(inDesc, outDesc); + InsertNode(edge, convertNode, true); +} + +static std::unordered_set getUniqueLayerNames(const std::vector& graphNodes) { std::unordered_set uniqueLayerNames; + uniqueLayerNames.reserve(graphNodes.size()); + for (auto node : graphNodes) { uniqueLayerNames.insert(node->getName()); } - auto updateEdge = [&](ptrdiff_t& i) { - graphEdges.erase(graphEdges.begin() + i); - i--; - numberOfEdges--; - }; + return uniqueLayerNames; +} - for (ptrdiff_t i = 0; i < numberOfEdges; i++) { - auto edge = graphEdges[i]; - auto reorderStatus = graphEdges[i]->needReorder(); - DEBUG_LOG(graphEdges[i]->name(), " reorderStatus = ", reorderStatus); - if (reorderStatus == Edge::ReorderStatus::Regular) { - Edge::ReorderStatus reorderStatusInternal = Edge::ReorderStatus::Regular; - // Check if there is a reorder that needs the precision conversion - if (edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() && - !isReorderAvailable(edge->getInputPortDesc()->getMemDesc(), - edge->getOutputPortDesc()->getMemDesc(), - this->getEngine())) { - // If we are here, then we need to insert Convert, because there are no reorders that support such type conversion - const auto& inDesc = edge->getInputDesc(); - const auto& outDesc = edge->getOutputDesc(); - - std::string convertName = edge->getParent()->getName() + "_" + - inDesc.getPrecision().get_type_name() + "_" + outDesc.getPrecision().get_type_name(); - - auto convertNode = std::make_shared(inDesc.getShape(), inDesc.getPrecision(), outDesc.getPrecision(), - convertName, context); - convertNode->setDescs(inDesc, outDesc); - InsertNode(edge, convertNode, true); - - //Check if reorder is still needed - reorderStatusInternal = convertNode->getChildEdgeAt(0)->needReorder(); - if (reorderStatusInternal != Edge::ReorderStatus::No) - edge = convertNode->getChildEdgeAt(0); - } - if (reorderStatusInternal != Edge::ReorderStatus::No) { - insertReorder(edge, reorderStatusInternal == Edge::ReorderStatus::Optimized, uniqueLayerNames); +void Graph::ResolveEdgeConflicts() { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::ResolveEdgeConflicts"); + + std::unordered_set uniqueLayerNames = getUniqueLayerNames(graphNodes); + + /* When inserting convert / reorder, two new edges are added (pushed to the end) to the graphEdges. + So use a plain for loop, to handle newly inserted edges as well */ + for (size_t i = 0; i < graphEdges.size(); i++) { + auto& edge = graphEdges[i]; + auto reorderStatus = edge->needReorder(); + DEBUG_LOG(*edge, " reorderStatus = ", reorderStatus); + + switch (reorderStatus) { + case Edge::ReorderStatus::Regular: { + if (reorderStatus == Edge::ReorderStatus::Regular && + edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() && + !isReorderAvailable(edge->getInputPortDesc()->getMemDesc(), + edge->getOutputPortDesc()->getMemDesc(), + this->getEngine())) { + // just insert convert. If layout reorder is still needed, it will be inserted later in the traverse + insertConvert(edge); + } else { + insertReorder(edge, false, uniqueLayerNames); } - updateEdge(i); - } else if (reorderStatus == Edge::ReorderStatus::Optimized) { + break; + } + case Edge::ReorderStatus::Optimized: insertReorder(edge, true, uniqueLayerNames); - updateEdge(i); + break; + case Edge::ReorderStatus::No: + break; } } + + RemoveDroppedEdges(); } void Graph::ResolveComplexInplaceConflicts() { @@ -617,10 +624,7 @@ void Graph::ResolveComplexInplaceConflicts() { ptrdiff_t numberOfEdges = static_cast(graphEdges.size()); - std::unordered_set uniqueLayerNames; - for (auto node : graphNodes) { - uniqueLayerNames.insert(node->getName()); - } + std::unordered_set uniqueLayerNames = getUniqueLayerNames(graphNodes); auto updateEdge = [&](ptrdiff_t& i) { graphEdges.erase(graphEdges.begin() + i); diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index 4e6d6e6f3beca6..6816dcc75c1a67 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -258,6 +258,7 @@ class Graph { void EnforceInferencePrecision(); void EnforceBF16(); void insertReorder(EdgePtr& edge, bool isOptimized, std::unordered_set& uniqueLayerNames); + void insertConvert(EdgePtr& edge); }; using GraphPtr = std::shared_ptr;