Input, Reference nodes enabled + memory allocation with undef bound (o…

…penvinotoolkit#19) * Input and Reference node enabled * upper bound allocation enabled * fixes after first review * fixes after second review
mandrono · Aug 24, 2021 · ec8a23e · ec8a23e
1 parent a627b49
commit ec8a23e
Show file tree

Hide file tree

Showing 24 changed files with 384 additions and 85 deletions.
diff --git a/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp
@@ -177,13 +177,23 @@ CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(const CNNNetwork& network) {
     }
     for (const auto& inputInfo : inputs) {
         InputInfo::Ptr info = std::make_shared<InputInfo>();
-        const auto& name = inputInfo.second->getInputData()->getName();
-        DataPtr input = std::make_shared<Data>(name, inputInfo.second->getInputData()->getTensorDesc());
+        const auto inData = inputInfo.second->getInputData();
+        const auto& name = inData->getName();
+
+        DataPtr input;
+        if (inData->isDynamic()) {
+            input = std::make_shared<Data>(name, inData->getPrecision(), inData->getPartialShape(),
+                                                 TensorDesc::getLayoutByRank(inData->getPartialShape().rank().get_length()));
+        } else {
+            input = std::make_shared<Data>(name, inputInfo.second->getInputData()->getTensorDesc());
+        }
         _data[name] = input;
         info->setInputData(input);
         info->getPreProcess() = inputInfo.second->getPreProcess();
         info->setPrecision(inputInfo.second->getPrecision());
-        info->setLayout(inputInfo.second->getLayout());
+        // TODO [DS]: can we set layout for dynamic shapes? need to fix TensorDesc::setLayout
+        if (!inData->isDynamic())
+            info->setLayout(inputInfo.second->getLayout());
         _inputData[name] = info;
     }
 }

diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp
@@ -263,6 +263,16 @@ MemoryDescPtr MemoryDescUtils::resetOffset(const MemoryDesc* desc) {
     }
 }
 
+InferenceEngine::Blob::Ptr MemoryDescUtils::createBlob(const MemoryDesc &memDesc) {
+    // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor
+    InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc);
+
+    desc = InferenceEngine::TensorDesc(desc.getPrecision(), memDesc.getShape().getStaticDims(), desc.getBlockingDesc());
+    InferenceEngine::Blob::Ptr blob = make_blob_with_precision(desc);
+    blob->allocate();
+    return blob;
+}
+
 InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const MKLDNNMemory &mem) {
     // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor
     auto& memDesc = mem.GetDesc();

diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h
@@ -78,7 +78,14 @@ class MemoryDescUtils {
     static MemoryDescPtr resetOffset(const MemoryDesc* desc);
 
     /**
-     * @brief Creates InferenceEngine::Blob from MKLDNNMemory
+     * @brief Creates InferenceEngine::Blob from MemoryDesc
+     * @param desc MemoryDesc from which will be created InferenceEngine::Blob
+     * @return pointer to InferenceEngine::Blob
+     */
+    static InferenceEngine::Blob::Ptr createBlob(const MemoryDesc& memDesc);
+
+    /**
+     * @brief Creates InferenceEngine::Blob from MKLDNNMemory with the memory reuse
      * @param desc MKLDNNMemory from which will be created InferenceEngine::Blob
      * @return pointer to InferenceEngine::Blob
      */

diff --git a/inference-engine/src/mkldnn_plugin/cpu_shape.h b/inference-engine/src/mkldnn_plugin/cpu_shape.h
@@ -97,10 +97,15 @@ class Shape {
     const std::vector<size_t>& getDims() const {
         return dims;
     }
+
     bool isStatic() const {
         return type == ShapeType::Static;
     }
 
+    bool isDynamic() const {
+        return type == ShapeType::Dynamic;
+    }
+
     size_t getRank() const {
         return minDims.size();
     }
@@ -143,6 +148,10 @@ class Shape {
         return !(*this == rhs);
     }
 
+    bool hasDefinedUpperBounds() const {
+        return std::all_of(maxDims.begin(), maxDims.end(), [](size_t dim){ return dim != UNDEFINED_DIM; });
+    }
+
     enum : size_t {
         UNDEFINED_DIM = 0xffffffffffffffff
     };

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h
@@ -68,6 +68,10 @@ class MKLDNNEdge {
     MKLDNNEdgePtr getSharedEdge() const;
     MKLDNNEdgePtr getSharedEdge(std::nothrow_t) const;
 
+    bool canProvideMaxSize() {
+        return getDesc().getMaxMemSize() != MemoryDesc::UNDEFINED_SIZE;
+    }
+
 private:
     std::string name() const;
 

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -522,6 +522,9 @@ static edge_clusters_t findEdgeClusters(const std::vector<MKLDNNEdgePtr> & graph
     edge_cluster_idx_map_t edge_cluster_indices;
 
     for (auto &edge : graphEdges) {
+        if (!edge->canProvideMaxSize())
+            continue;
+
         auto edge_it = edge_cluster_indices.find(edge);
 
         if (edge_it != edge_cluster_indices.end())
@@ -603,7 +606,6 @@ void MKLDNNGraph::AllocateWithReuse() {
             int e_finish = edge->getChild()->execIndex;
 
             int64_t e_size = edge->getDesc().getMaxMemSize();  // size in bytes (from the beginning of data to the last element)
-            //TODO [DS]: phase 2: remove this restriction
             if (e_size == MemoryDesc::UNDEFINED_SIZE) {
                 IE_THROW() << "Can not allocate memory since the size is undefined.";
             }
@@ -683,6 +685,9 @@ void MKLDNNGraph::Allocate() {
     // Resolve all other edges with status NotAllocated or in-place
     for (auto& node : graphNodes) node->resolveNotAllocatedEdges();
 
+    // Create dummy memory with undefined desc
+    for (auto& edge : graphEdges) edge->allocate();
+
     // Check all getters. Should work.
     for (auto& edge : graphEdges) edge->validate();
 }
@@ -738,7 +743,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
 
         // TODO [DS]: phase 2: remove this blob allocation when possible, i.e. when dynamic ie blob representation becomes available
         if (out.find(name) == out.end()) {
-            out[name] = MemoryDescUtils::interpretAsBlob(intr_blob);
+            out[name] = MemoryDescUtils::createBlob(intr_blob.GetDesc());
         }
 
         // TODO [DS]: is it sill true for the new paradigm?
@@ -750,7 +755,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
             // TODO [DS]: phase 2: rewrite when dynamic ie blob representation becomes available
 //            IE_THROW() << "Output blob number of elements is not equal network output number of elements ("
 //                       << ext_blob->size() << "!=" << intr_blob.GetElementsCount() << ").";
-            out[name] = MemoryDescUtils::interpretAsBlob(intr_blob);
+            out[name] = MemoryDescUtils::createBlob(intr_blob.GetDesc());
         }
 
         auto ext_blob = out.at(name);
@@ -769,14 +774,19 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
         if (ext_blob_ptr == intr_blob_ptr) continue;
 
         int MB = intr_blob.GetDims()[0];
-        int MB_to_process = node->batchToProcess();
+        int MB_to_process = MB;
         // TODO: Should we support InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT???
         // TODO [DS]: phase 2: should we support this behaviour? Looks obsolete in the dynamic shapes paradigm
-        if (config.batchLimit)
-            MB_to_process = std::min<int>(config.batchLimit, MB_to_process);
+        if (config.batchLimit) {
+            if (node->isDynamicNode()) {
+                IE_THROW(NotImplemented) << "[DS] not implemented dynamic batch for node with dynamic shape";
+            }
+            MB_to_process = node->batchToProcess();
+        }
+
         size_t size_to_copy = intr_blob.GetElementsCount() * MB_to_process / MB;
 
-        const auto actualDesc = MemoryDescUtils::convertToTensorDesc(node->getParentEdgeAt(0)->getDesc());
+        const auto actualDesc = MemoryDescUtils::convertToTensorDesc(node->getParentEdgeAt(0)->getMemory().GetDesc());
         const auto expectedDesc = ext_blob->getTensorDesc();
 
         // TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it
@@ -829,7 +839,11 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
         ENABLE_CPU_DEBUG_CAP(nd.dumpInputBlobs(node));
 
         OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, node->profiling.execute);
-        node->execute(stream);
+        if (node->isDynamicNode()) {
+            node->executeDynamic(stream);
+        } else {
+            node->execute(stream);
+        }
 
         ENABLE_CPU_DEBUG_CAP(nd.dumpOutputBlobs(node));
     }

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
@@ -86,6 +86,20 @@ class MKLDNNGraph {
         return outputNodesMap;
     }
 
+    MKLDNNNodePtr GetInputNodeByName(const std::string &name) {
+        auto input = inputNodesMap.find(name);
+        if (input == inputNodesMap.end())
+            IE_THROW() << "CPU execution graph doesn't contain input node with name: " << name;
+        return input->second;
+    }
+
+    MKLDNNNodePtr GetOutputNodeByName(const std::string &name) {
+        auto output = outputNodesMap.find(name);
+        if (output == outputNodesMap.end())
+            IE_THROW() << "CPU execution graph doesn't contain output node with name: " << name;
+        return output->second;
+    }
+
     bool hasInputWithName(const std::string& name) const {
         return inputNodesMap.count(name);
     }
@@ -197,8 +211,6 @@ class MKLDNNGraph {
 
     MKLDNNMemoryPtr memWorkspace;
 
-    std::map<std::string, MKLDNNNodePtr> inputNodesMap;
-    std::map<std::string, MKLDNNNodePtr> outputNodesMap;
     std::vector<MKLDNNNodePtr> graphNodes;
     std::vector<MKLDNNEdgePtr> graphEdges;
 
@@ -227,6 +239,8 @@ class MKLDNNGraph {
     friend InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph);
 
 private:
+    std::map<std::string, MKLDNNNodePtr> inputNodesMap;
+    std::map<std::string, MKLDNNNodePtr> outputNodesMap;
     // these node pointers (from graphNodes) are to avoid regular checking for
     // constant node in ExecuteConstantNodesOnly and Infer methods
     std::vector<MKLDNNNodePtr> constantGraphNodes;