From 4ee44d8e6de3191f42c2bcbd4cbe4af147b5b5f6 Mon Sep 17 00:00:00 2001 From: Egor Duplensky Date: Fri, 19 Jul 2024 15:25:46 +0200 Subject: [PATCH] [CPU] Fix resolve edge conflicts Convert insertion --- src/plugins/intel_cpu/src/graph.cpp | 94 +++++++++++++++-------------- src/plugins/intel_cpu/src/graph.h | 1 + 2 files changed, 50 insertions(+), 45 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 490c15fceb2ec4..b5ffbc6da96603 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -5,6 +5,7 @@ #include "graph.h" #include +#include #include #include #include @@ -537,59 +538,65 @@ void Graph::insertReorder(EdgePtr& edge, bool isOptimized, std::unordered_setgetInputDesc(), edge->getOutputDesc(), isOptimized); } -void Graph::ResolveEdgeConflicts() { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::ResolveEdgeConflicts"); +void Graph::insertConvert(EdgePtr& edge) { + const auto& inDesc = edge->getInputDesc(); + const auto& outDesc = edge->getOutputDesc(); - ptrdiff_t numberOfEdges = static_cast(graphEdges.size()); + std::string convertName = edge->getParent()->getName() + "_" + + inDesc.getPrecision().get_type_name() + "_" + outDesc.getPrecision().get_type_name(); + auto convertNode = std::make_shared(inDesc.getShape(), inDesc.getPrecision(), outDesc.getPrecision(), + convertName, context); + convertNode->setDescs(inDesc, outDesc); + InsertNode(edge, convertNode, true); +} + +static std::unordered_set getUniqueLayerNames(const std::vector& graphNodes) { std::unordered_set uniqueLayerNames; + uniqueLayerNames.reserve(graphNodes.size()); + for (auto node : graphNodes) { uniqueLayerNames.insert(node->getName()); } - auto updateEdge = [&](ptrdiff_t& i) { - graphEdges.erase(graphEdges.begin() + i); - i--; - numberOfEdges--; - }; + return uniqueLayerNames; +} - for (ptrdiff_t i = 0; i < numberOfEdges; i++) { - auto edge = graphEdges[i]; - auto reorderStatus = graphEdges[i]->needReorder(); - DEBUG_LOG(graphEdges[i]->name(), " reorderStatus = ", reorderStatus); - if (reorderStatus == Edge::ReorderStatus::Regular) { - Edge::ReorderStatus reorderStatusInternal = Edge::ReorderStatus::Regular; - // Check if there is a reorder that needs the precision conversion - if (edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() && - !isReorderAvailable(edge->getInputPortDesc()->getMemDesc(), - edge->getOutputPortDesc()->getMemDesc(), - this->getEngine())) { - // If we are here, then we need to insert Convert, because there are no reorders that support such type conversion - const auto& inDesc = edge->getInputDesc(); - const auto& outDesc = edge->getOutputDesc(); - - std::string convertName = edge->getParent()->getName() + "_" + - inDesc.getPrecision().get_type_name() + "_" + outDesc.getPrecision().get_type_name(); - - auto convertNode = std::make_shared(inDesc.getShape(), inDesc.getPrecision(), outDesc.getPrecision(), - convertName, context); - convertNode->setDescs(inDesc, outDesc); - InsertNode(edge, convertNode, true); - - //Check if reorder is still needed - reorderStatusInternal = convertNode->getChildEdgeAt(0)->needReorder(); - if (reorderStatusInternal != Edge::ReorderStatus::No) - edge = convertNode->getChildEdgeAt(0); - } - if (reorderStatusInternal != Edge::ReorderStatus::No) { - insertReorder(edge, reorderStatusInternal == Edge::ReorderStatus::Optimized, uniqueLayerNames); +void Graph::ResolveEdgeConflicts() { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::ResolveEdgeConflicts"); + + std::unordered_set uniqueLayerNames = getUniqueLayerNames(graphNodes); + + /* When inserting convert / reorder, two new edges are added (pushed to the end) to the graphEdges. + So use a plain for loop, to handle newly inserted edges as well */ + for (size_t i = 0; i < graphEdges.size(); i++) { + auto& edge = graphEdges[i]; + auto reorderStatus = edge->needReorder(); + DEBUG_LOG(*edge, " reorderStatus = ", reorderStatus); + + switch (reorderStatus) { + case Edge::ReorderStatus::Regular: { + if (reorderStatus == Edge::ReorderStatus::Regular && + edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() && + !isReorderAvailable(edge->getInputPortDesc()->getMemDesc(), + edge->getOutputPortDesc()->getMemDesc(), + this->getEngine())) { + // just insert convert. If layout reorder is still needed, it will be inserted later in the traverse + insertConvert(edge); + } else { + insertReorder(edge, false, uniqueLayerNames); } - updateEdge(i); - } else if (reorderStatus == Edge::ReorderStatus::Optimized) { + break; + } + case Edge::ReorderStatus::Optimized: insertReorder(edge, true, uniqueLayerNames); - updateEdge(i); + break; + case Edge::ReorderStatus::No: + break; } } + + RemoveDroppedEdges(); } void Graph::ResolveComplexInplaceConflicts() { @@ -597,10 +604,7 @@ void Graph::ResolveComplexInplaceConflicts() { ptrdiff_t numberOfEdges = static_cast(graphEdges.size()); - std::unordered_set uniqueLayerNames; - for (auto node : graphNodes) { - uniqueLayerNames.insert(node->getName()); - } + std::unordered_set uniqueLayerNames = getUniqueLayerNames(graphNodes); auto updateEdge = [&](ptrdiff_t& i) { graphEdges.erase(graphEdges.begin() + i); diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index 4e6d6e6f3beca6..6816dcc75c1a67 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -258,6 +258,7 @@ class Graph { void EnforceInferencePrecision(); void EnforceBF16(); void insertReorder(EdgePtr& edge, bool isOptimized, std::unordered_set& uniqueLayerNames); + void insertConvert(EdgePtr& edge); }; using GraphPtr = std::shared_ptr;