Input, Reference nodes enabled + memory allocation with undef bound (o…

…penvinotoolkit#19) * Input and Reference node enabled * upper bound allocation enabled * fixes after first review * fixes after second review
mandrono · Sep 1, 2021 · 6726175 · 6726175
1 parent 75de6cc
commit 6726175
Show file tree

Hide file tree

Showing 23 changed files with 371 additions and 82 deletions.
diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp
@@ -263,6 +263,16 @@ MemoryDescPtr MemoryDescUtils::resetOffset(const MemoryDesc* desc) {
     }
 }
 
+InferenceEngine::Blob::Ptr MemoryDescUtils::createBlob(const MemoryDesc &memDesc) {
+    // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor
+    InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc);
+
+    desc = InferenceEngine::TensorDesc(desc.getPrecision(), memDesc.getShape().getStaticDims(), desc.getBlockingDesc());
+    InferenceEngine::Blob::Ptr blob = make_blob_with_precision(desc);
+    blob->allocate();
+    return blob;
+}
+
 InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const MKLDNNMemory &mem) {
     // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor
     auto& memDesc = mem.GetDesc();

diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h
@@ -78,7 +78,14 @@ class MemoryDescUtils {
     static MemoryDescPtr resetOffset(const MemoryDesc* desc);
 
     /**
-     * @brief Creates InferenceEngine::Blob from MKLDNNMemory
+     * @brief Creates InferenceEngine::Blob from MemoryDesc
+     * @param desc MemoryDesc from which will be created InferenceEngine::Blob
+     * @return pointer to InferenceEngine::Blob
+     */
+    static InferenceEngine::Blob::Ptr createBlob(const MemoryDesc& memDesc);
+
+    /**
+     * @brief Creates InferenceEngine::Blob from MKLDNNMemory with the memory reuse
      * @param desc MKLDNNMemory from which will be created InferenceEngine::Blob
      * @return pointer to InferenceEngine::Blob
      */

diff --git a/inference-engine/src/mkldnn_plugin/cpu_shape.h b/inference-engine/src/mkldnn_plugin/cpu_shape.h
@@ -97,10 +97,15 @@ class Shape {
     const std::vector<size_t>& getDims() const {
         return dims;
     }
+
     bool isStatic() const {
         return type == ShapeType::Static;
     }
 
+    bool isDynamic() const {
+        return type == ShapeType::Dynamic;
+    }
+
     size_t getRank() const {
         return minDims.size();
     }
@@ -143,6 +148,10 @@ class Shape {
         return !(*this == rhs);
     }
 
+    bool hasDefinedUpperBounds() const {
+        return std::all_of(maxDims.begin(), maxDims.end(), [](size_t dim){ return dim != UNDEFINED_DIM; });
+    }
+
     enum : size_t {
         UNDEFINED_DIM = 0xffffffffffffffff
     };

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h
@@ -68,6 +68,10 @@ class MKLDNNEdge {
     MKLDNNEdgePtr getSharedEdge() const;
     MKLDNNEdgePtr getSharedEdge(std::nothrow_t) const;
 
+    bool canProvideMaxSize() {
+        return getDesc().getMaxMemSize() != MemoryDesc::UNDEFINED_SIZE;
+    }
+
 private:
     std::string name() const;
 

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -522,6 +522,9 @@ static edge_clusters_t findEdgeClusters(const std::vector<MKLDNNEdgePtr> & graph
     edge_cluster_idx_map_t edge_cluster_indices;
 
     for (auto &edge : graphEdges) {
+        if (!edge->canProvideMaxSize())
+            continue;
+
         auto edge_it = edge_cluster_indices.find(edge);
 
         if (edge_it != edge_cluster_indices.end())
@@ -603,7 +606,6 @@ void MKLDNNGraph::AllocateWithReuse() {
             int e_finish = edge->getChild()->execIndex;
 
             int64_t e_size = edge->getDesc().getMaxMemSize();  // size in bytes (from the beginning of data to the last element)
-            //TODO [DS]: phase 2: remove this restriction
             if (e_size == MemoryDesc::UNDEFINED_SIZE) {
                 IE_THROW() << "Can not allocate memory since the size is undefined.";
             }
@@ -683,6 +685,9 @@ void MKLDNNGraph::Allocate() {
     // Resolve all other edges with status NotAllocated or in-place
     for (auto& node : graphNodes) node->resolveNotAllocatedEdges();
 
+    // Create dummy memory with undefined desc
+    for (auto& edge : graphEdges) edge->allocate();
+
     // Check all getters. Should work.
     for (auto& edge : graphEdges) edge->validate();
 }
@@ -738,7 +743,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
 
         // TODO [DS]: phase 2: remove this blob allocation when possible, i.e. when dynamic ie blob representation becomes available
         if (out.find(name) == out.end()) {
-            out[name] = MemoryDescUtils::interpretAsBlob(intr_blob);
+            out[name] = MemoryDescUtils::createBlob(intr_blob.GetDesc());
         }
 
         // TODO [DS]: is it sill true for the new paradigm?
@@ -750,7 +755,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
             // TODO [DS]: phase 2: rewrite when dynamic ie blob representation becomes available
 //            IE_THROW() << "Output blob number of elements is not equal network output number of elements ("
 //                       << ext_blob->size() << "!=" << intr_blob.GetElementsCount() << ").";
-            out[name] = MemoryDescUtils::interpretAsBlob(intr_blob);
+            out[name] = MemoryDescUtils::createBlob(intr_blob.GetDesc());
         }
 
         auto ext_blob = out.at(name);
@@ -769,14 +774,19 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
         if (ext_blob_ptr == intr_blob_ptr) continue;
 
         int MB = intr_blob.GetDims()[0];
-        int MB_to_process = node->batchToProcess();
+        int MB_to_process = MB;
         // TODO: Should we support InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT???
         // TODO [DS]: phase 2: should we support this behaviour? Looks obsolete in the dynamic shapes paradigm
-        if (config.batchLimit)
-            MB_to_process = std::min<int>(config.batchLimit, MB_to_process);
+        if (config.batchLimit) {
+            if (node->isDynamicNode()) {
+                IE_THROW(NotImplemented) << "[DS] not implemented dynamic batch for node with dynamic shape";
+            }
+            MB_to_process = node->batchToProcess();
+        }
+
         size_t size_to_copy = intr_blob.GetElementsCount() * MB_to_process / MB;
 
-        const auto actualDesc = MemoryDescUtils::convertToTensorDesc(node->getParentEdgeAt(0)->getDesc());
+        const auto actualDesc = MemoryDescUtils::convertToTensorDesc(node->getParentEdgeAt(0)->getMemory().GetDesc());
         const auto expectedDesc = ext_blob->getTensorDesc();
 
         // TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it
@@ -829,7 +839,11 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
         ENABLE_CPU_DEBUG_CAP(nd.dumpInputBlobs(node));
 
         OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, node->profiling.execute);
-        node->execute(stream);
+        if (node->isDynamicNode()) {
+            node->executeDynamic(stream);
+        } else {
+            node->execute(stream);
+        }
 
         ENABLE_CPU_DEBUG_CAP(nd.dumpOutputBlobs(node));
     }

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
@@ -86,6 +86,20 @@ class MKLDNNGraph {
         return outputNodesMap;
     }
 
+    MKLDNNNodePtr GetInputNodeByName(const std::string &name) {
+        auto input = inputNodesMap.find(name);
+        if (input == inputNodesMap.end())
+            IE_THROW() << "CPU execution graph doesn't contain input node with name: " << name;
+        return input->second;
+    }
+
+    MKLDNNNodePtr GetOutputNodeByName(const std::string &name) {
+        auto output = outputNodesMap.find(name);
+        if (output == outputNodesMap.end())
+            IE_THROW() << "CPU execution graph doesn't contain output node with name: " << name;
+        return output->second;
+    }
+
     bool hasInputWithName(const std::string& name) const {
         return inputNodesMap.count(name);
     }
@@ -197,8 +211,6 @@ class MKLDNNGraph {
 
     MKLDNNMemoryPtr memWorkspace;
 
-    std::map<std::string, MKLDNNNodePtr> inputNodesMap;
-    std::map<std::string, MKLDNNNodePtr> outputNodesMap;
     std::vector<MKLDNNNodePtr> graphNodes;
     std::vector<MKLDNNEdgePtr> graphEdges;
 
@@ -227,6 +239,8 @@ class MKLDNNGraph {
     friend std::shared_ptr<ngraph::Function> dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph);
 
 private:
+    std::map<std::string, MKLDNNNodePtr> inputNodesMap;
+    std::map<std::string, MKLDNNNodePtr> outputNodesMap;
     // these node pointers (from graphNodes) are to avoid regular checking for
     // constant node in ExecuteConstantNodesOnly and Infer methods
     std::vector<MKLDNNNodePtr> constantGraphNodes;

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp
@@ -176,6 +176,7 @@ void MKLDNNPlugin::MKLDNNInferRequest::InferImpl() {
 
     ThrowIfCanceled();
 
+    // TODO [DS]: rewrite for dynamic shape
     execDataPreprocessing(_inputs);
 
     changeDefaultPtr();
@@ -207,6 +208,35 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> MKLDNNPlugin:
     return perfMap;
 }
 
+void MKLDNNPlugin::MKLDNNInferRequest::createInputBlob(const std::string &name) {
+    MKLDNNNodeConstPtr inputNode = graph->GetInputNodeByName(name);
+
+    if (inputNode->isDynamicNode() && !m_realShapes.count(name)) {
+        IE_THROW() << "Cannot create blob " << name << " with dynamic shapes";
+    }
+
+    InferenceEngine::TensorDesc origDesc = MemoryDescUtils::convertToTensorDesc(inputNode->getChildEdgesAtPort(0)[0]->getMemory().GetDesc());
+    InferenceEngine::TensorDesc desc = origDesc;
+
+    if (_networkInputs.find(name) != _networkInputs.end()) {
+        InferenceEngine::Layout l = _networkInputs[name]->getLayout();
+        InferenceEngine::Precision p = _networkInputs[name]->getPrecision();
+        InferenceEngine::SizeVector dims = inputNode->isDynamicNode() ? m_realShapes.at(name) : _networkInputs[name]->getTensorDesc().getDims();
+
+        desc = InferenceEngine::TensorDesc(p, dims, l);
+    }
+
+    _inputs[name] = make_blob_with_precision(desc);
+    _inputs[name]->allocate();
+
+    // TODO [DS]: enable inplace for dynamic input/output
+    if (!inputNode->isDynamicNode() &&
+        origDesc == desc &&
+        graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) {
+        externalPtr[name] = _inputs[name]->buffer();
+    }
+}
+
 InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::string& name) {
     OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "GetBlob");
 
@@ -224,26 +254,16 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::
         }
 
         if (_inputs.find(name) == _inputs.end()) {
-            auto pBlob = graph->getInputBlob(name);
-            if (!pBlob) {
-                IE_THROW() << "MKLDNN graph doesn't contain input node with name: " << name;
-            }
-
-            InferenceEngine::TensorDesc desc = pBlob->getTensorDesc();
-
-            if (_networkInputs.find(name) != _networkInputs.end()) {
-                InferenceEngine::Layout l = _networkInputs[name]->getLayout();
-                InferenceEngine::Precision p = _networkInputs[name]->getPrecision();
-                InferenceEngine::SizeVector dims = _networkInputs[name]->getTensorDesc().getDims();
-
-                desc = InferenceEngine::TensorDesc(p, dims, l);
+            createInputBlob(name);
+        }
+        MKLDNNNodeConstPtr inputNode = graph->GetInputNodeByName(name);
+        if (inputNode->isDynamicNode()) {
+            if (!m_realShapes.count(name)) {
+                IE_THROW() << "Cannot get blob " << name << " which contains dynamic shapes";
             }
-
-            _inputs[name] = make_blob_with_precision(desc);
-            _inputs[name]->allocate();
-            if (pBlob->getTensorDesc() == desc &&
-                graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) {
-                externalPtr[name] = _inputs[name]->buffer();
+            if (_inputs[name]->getTensorDesc().getDims() != m_realShapes.at(name)) {
+                // TODO [DS]: reshape without reallocate?
+                createInputBlob(name);
             }
         }
         data = _inputs[name];
@@ -267,6 +287,10 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::
 
     if (graph->hasOutputWithName(name)) {
         if (_outputs.find(name) == _outputs.end()) {
+            if (graph->GetOutputNodeByName(name)->isDynamicNode()) {
+                IE_THROW(NotImplemented) << "[DS] Can't get output blob for dynamic shapes before inference";
+            }
+
             auto pBlob = graph->getOutputBlob(name);
             if (!pBlob) {
                 IE_THROW() << "MKLDNN graph doesn't contain output node with name: " << name;
@@ -360,29 +384,37 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In
             // pre-processing
             _preProcData[name]->setRoiBlob(data);
         } else {
-            size_t inputSize = foundInput->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR
-                ? InferenceEngine::details::product(foundInput->getTensorDesc().getDims())
-                : 1;
-            if (dataSize != inputSize) {
-                IE_THROW() << "Input blob size is not equal network input size ("
-                                   << dataSize << "!=" << inputSize << ").";
+            auto inputNode = graph->GetInputNodeByName(name);
+            if (foundInput->getInputData()->getPartialShape().rank().get_length() != data->getTensorDesc().getDims().size()) {
+                IE_THROW(ParameterMismatch) << "Failed to set input blob. Rank mismatch.";
             }
 
-            if (foundInput->getTensorDesc().getDims() != data->getTensorDesc().getDims()) {
-                IE_THROW(ParameterMismatch) << "Failed to set input blob. Dimensions mismatch.";
-            }
+            if (foundInput->getInputData()->isDynamic()) {
+                const auto &newShape = data->getTensorDesc().getDims();
+                m_realShapes[name] = newShape;
+                inputNode->resetOutputShape({newShape});
+            } else {
+                size_t inputSize = foundInput->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR ?
+                                   InferenceEngine::details::product(foundInput->getTensorDesc().getDims()) : 1;
+                if (dataSize != inputSize) {
+                    IE_THROW() << "Input blob size is not equal network input size ("
+                                       << dataSize << "!=" << inputSize << ").";
+                }
 
-            if (data->getTensorDesc().getLayout() != InferenceEngine::Layout::ANY && foundInput->getTensorDesc().getLayout() != InferenceEngine::Layout::ANY &&
-                foundInput->getTensorDesc().getBlockingDesc() != data->getTensorDesc().getBlockingDesc()) {
-                IE_THROW(ParameterMismatch) << "Failed to set input blob. Blocking descriptor mismatch.";
-            }
+                if (foundInput->getTensorDesc().getDims() != data->getTensorDesc().getDims()) {
+                    IE_THROW(ParameterMismatch) << "Failed to set input blob. Dimensions mismatch.";
+                }
 
-            auto pBlob = graph->getInputBlob(name);
-            if (!pBlob) {
-                IE_THROW() << "MKLDNN graph doesn't contain input node with name: " << name;
+                if (data->getTensorDesc().getLayout() != InferenceEngine::Layout::ANY &&
+                    foundInput->getTensorDesc().getLayout() != InferenceEngine::Layout::ANY &&
+                        foundInput->getTensorDesc().getBlockingDesc() != data->getTensorDesc().getBlockingDesc()) {
+                    IE_THROW(ParameterMismatch) << "Failed to set input blob. Blocking descriptor mismatch.";
+                }
             }
 
-            if (data->getTensorDesc() == pBlob->getTensorDesc() &&
+            // TODO [DS]: enable inplace for dynamic input/output
+            if (!inputNode->isDynamicNode() &&
+                data->getTensorDesc() == MemoryDescUtils::convertToTensorDesc(inputNode->getChildEdgesAtPort(0)[0]->getMemory().GetDesc()) &&
                 graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) {
                 externalPtr[name] = data->buffer();
             } else if (externalPtr.find(name) != externalPtr.end()) {
@@ -392,6 +424,9 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In
         }
     }
     if (foundOutput) {
+        if (foundOutput->isDynamic()) {
+            IE_THROW(NotImplemented) << "[DS] Can't set dynamic output blob";
+        }
         if (compoundBlobPassed) {
             IE_THROW(NotImplemented)
                                << "cannot set compound blob: supported only for input pre-processing";
@@ -435,8 +470,8 @@ static inline void changeEdgePtr(const MKLDNNPlugin::MKLDNNEdgePtr &edge, void *
 
 void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() {
     for (auto& it : externalPtr) {
-        auto input = graph->inputNodesMap.find(it.first);
-        if (input != graph->inputNodesMap.end()) {
+        auto input = graph->GetInputNodesMap().find(it.first);
+        if (input != graph->GetInputNodesMap().end()) {
             if (input->second->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
                 continue;
             // Input cannot be in-place with other primitives
@@ -470,7 +505,7 @@ void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() {
         }
 
         MKLDNNNodePtr output;
-        for (auto& out : graph->outputNodesMap) {
+        for (auto& out : graph->GetOutputNodesMap()) {
             if (out.first == it.first) {
                 output = out.second;
                 break;
@@ -536,3 +571,26 @@ void MKLDNNPlugin::MKLDNNInferRequest::ThrowIfCanceled() const {
         _asyncRequest->ThrowIfCanceled();
     }
 }
+
+// TODO [DS]: analyze performance
+// getPartialShape().compatible(newShape)
+void MKLDNNPlugin::MKLDNNInferRequest::SetShape(const std::string& name, const InferenceEngine::SizeVector& dims) {
+    // Check partial shape compatibility
+    ngraph::PartialShape newShape(dims);
+    InferenceEngine::InputInfo::Ptr foundInput;
+    InferenceEngine::DataPtr foundOutput;
+    if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) {
+        if (!foundInput->getInputData()->getPartialShape().compatible(newShape))
+            IE_THROW() << "New shape " << newShape << " for " << name << " is incompatible with original shape "
+                       << foundInput->getInputData()->getPartialShape();
+    } else {
+        IE_THROW(NotImplemented) << "[DS] Can't SetShape for output node";
+        // if (!foundOutput->getPartialShape().compatible(newShape))
+        //     IE_THROW() << "New shape " << newShape << " for " << name << " is incompatible with original shape " << foundOutput->getPartialShape();
+    }
+
+    m_realShapes[name] = dims;
+
+    auto inputNode = graph->GetInputNodeByName(name);
+    inputNode->resetOutputShape({dims});
+}