From 7a2d1acd7abfa7ebe66224cebe554c474b410aa2 Mon Sep 17 00:00:00 2001
From: Maxim Andronov <maxim.andronov@intel.com>
Date: Tue, 13 Jul 2021 18:38:06 +0300
Subject: [PATCH] blob dumper rewrote (#11)

* blob dumber rewrited

* applied comments

* applied comments 2
---
 .../src/mkldnn_plugin/mkldnn_node.h           |   1 -
 .../src/mkldnn_plugin/utils/blob_dump.cpp     | 268 +++++-------------
 .../src/mkldnn_plugin/utils/blob_dump.h       |  25 +-
 .../src/mkldnn_plugin/utils/node_dumper.cpp   |  21 +-
 4 files changed, 89 insertions(+), 226 deletions(-)
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
index ce4d611dae6a23..d6e00ead82f0e0 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@@ -701,7 +701,6 @@ class MKLDNNNode {
     MKLDNNPrimitive prim;
     std::vector<MKLDNNDescriptor> descs;
 
-    InferenceEngine::Blob::Ptr ext_scales;
     MKLDNNWeightsSharing::Ptr weightCache;
 
     Algorithm algorithm = Algorithm::Undefined;
diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp
index f9813214db4c33..43174f49430905 100644
--- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp
+++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp
@@ -5,6 +5,8 @@
 #include "blob_dump.h"
 #include "blob_factory.hpp"
 #include "mkldnn_memory.h"
+#include "mkldnn_extension_utils.h"
+#include <nodes/common/cpu_memcpy.h>
 
 #include "common/memory_desc_wrapper.hpp"
 
@@ -36,7 +38,7 @@ struct IEB_HEADER {
     unsigned long scaling_data_size;
 };
 
-static IEB_HEADER prepare_header(const TensorDesc& desc) {
+static IEB_HEADER prepare_header(const MemoryDesc& desc) {
     IEB_HEADER header = {};
 
     header.magic[0] = IEB_MAGIC[0];
@@ -50,19 +52,20 @@ static IEB_HEADER prepare_header(const TensorDesc& desc) {
 
     header.precision = desc.getPrecision();
 
-    if (desc.getDims().size() > 7)
+    if (desc.getShape().getRank() > 7)
         IE_THROW() << "Dumper support max 7D blobs";
 
-    header.ndims = desc.getDims().size();
+    header.ndims = desc.getShape().getRank();
+    const auto &dims = desc.getShape().getStaticDims();
     for (int i = 0; i < header.ndims; i++)
-        header.dims[i] = desc.getDims()[i];
+        header.dims[i] = dims[i];
 
     header.scaling_axis = NO_SCALES;
 
     return header;
 }
 
-static TensorDesc parse_header(IEB_HEADER &header) {
+static MKLDNNMemoryDesc parse_header(IEB_HEADER &header) {
     if (header.magic[0] != IEB_MAGIC[0] ||
         header.magic[1] != IEB_MAGIC[1] ||
         header.magic[2] != IEB_MAGIC[2] ||
@@ -73,177 +76,126 @@ static TensorDesc parse_header(IEB_HEADER &header) {
         header.ver[1] != 1)
         IE_THROW() << "Dumper cannot parse file. Unsupported IEB format version.";
 
-    Precision prc = Precision(static_cast<Precision::ePrecision>(header.precision));
+    const auto prc = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision(static_cast<Precision::ePrecision>(header.precision)));
     SizeVector dims(header.ndims);
     for (int i = 0; i < header.ndims; i++)
         dims[i] = header.dims[i];
 
-    return TensorDesc {prc, dims, TensorDesc::getLayoutByDims(dims) };
+    return MKLDNNMemoryDesc{MKLDNNDims(dims), prc, MKLDNNMemory::GetPlainFormatByRank(dims.size()) };
 }
 
+static void prepare_plain_data(const MKLDNNMemoryPtr &memory, std::vector<uint8_t> &data) {
+    const auto &desc = memory->GetDesc();
+    size_t data_size = desc.getShape().getElementsCount();
+    const auto size = data_size * desc.getPrecision().size();
+    data.resize(size);
 
-bool is_plain(const Blob::Ptr &blob) {
-    bool res = true;
-
-    auto orig_strides = blob->getTensorDesc().getBlockingDesc().getStrides();
-    auto orig_order = blob->getTensorDesc().getBlockingDesc().getOrder();
-    auto dims = blob->getTensorDesc().getDims();
-
-    for (int stride = 1, i = dims.size() - 1; i >= 0; --i) {
-        if (stride != orig_strides[i] || i != orig_order[i]) res = false;
-        stride *= dims[i];
-    }
-
-    return res;
-}
-
-static Blob::Ptr prepare_plain_data(Blob::Ptr blob) {
     // check if it already plain
-    if (is_plain(blob)) return blob;
-
-    Blob::Ptr pln_blob = make_plain_blob(blob->getTensorDesc().getPrecision(), blob->getTensorDesc().getDims());
-    pln_blob->allocate();
+    if (desc.checkGeneralLayout(GeneralLayout::ncsp)) {
+        cpu_memcpy(data.data(), reinterpret_cast<const uint8_t*>(memory->GetPtr()), size);
+        return;
+    }
 
     // Copy to plain
-    // TODO [DS]: blob dumper should be rewritten using Memory object
-    MKLDNNMemoryDesc mdesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(blob->getTensorDesc());
-    mkldnn::memory::desc desc = mdesc;
-    mkldnn::impl::memory_desc_wrapper blob_wrp(desc.data);
+    const void *ptr = memory->GetData();
 
-    size_t data_size = blob->size();
-
-    // TODO: make it with blob_copy utility
-    switch (blob->getTensorDesc().getPrecision()) {
+    switch (desc.getPrecision()) {
         case Precision::FP32:
         case Precision::I32: {
-            auto *pln_blob_ptr = pln_blob->buffer().as<int32_t*>();
-            auto *blob_ptr = blob->buffer().as<int32_t*>();
+            auto *pln_blob_ptr = reinterpret_cast<int32_t *>(data.data());
+            auto *blob_ptr = reinterpret_cast<const int32_t *>(ptr);
             for (size_t i = 0; i < data_size; i++)
-                pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)];
+                pln_blob_ptr[i] = blob_ptr[desc.getOffset(i)];
             break;
         }
-        case Precision::I16:
-        case Precision::U16:
         case Precision::BF16: {
-            auto *pln_blob_ptr = pln_blob->buffer().as<int16_t *>();
-            auto *blob_ptr = blob->buffer().as<int16_t *>();
-            for (size_t i = 0; i < data_size; i++) pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)];
+            auto *pln_blob_ptr = reinterpret_cast<int16_t *>(data.data());
+            auto *blob_ptr = reinterpret_cast<const int16_t *>(ptr);
+            for (size_t i = 0; i < data_size; i++)
+                pln_blob_ptr[i] = blob_ptr[desc.getOffset(i)];
             break;
         }
         case Precision::I8:
         case Precision::U8: {
-            auto *pln_blob_ptr = pln_blob->buffer().as<int8_t*>();
-            auto *blob_ptr = blob->buffer().as<int8_t *>();
+            auto *pln_blob_ptr = reinterpret_cast<int8_t*>(data.data());
+            auto *blob_ptr = reinterpret_cast<const int8_t *>(ptr);
             for (size_t i = 0; i < data_size; i++)
-                pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)];
+                pln_blob_ptr[i] = blob_ptr[desc.getOffset(i)];
             break;
         }
         default:
             IE_THROW() << "Dumper. Unsupported precision";
     }
-
-    return pln_blob;
 }
 
 void BlobDumper::dump(std::ostream &stream) const {
-    if (!_blob)
-        IE_THROW() << "Dumper cannot dump empty Blob";
-
-    if (_blob->buffer().as<float*>() == nullptr)
-        IE_THROW() << "Dumper cannot dump. Blob is not allocated.";
+    if (memory == nullptr)
+        IE_THROW() << "Dumper cannot dump. Memory is not allocated.";
 
-    IEB_HEADER header = prepare_header(_blob->getTensorDesc());
-    Blob::Ptr pln_blob = prepare_plain_data(_blob);
+    IEB_HEADER header = prepare_header(memory->GetDesc());
+    std::vector<uint8_t> data;
+    prepare_plain_data(this->memory, data);
 
     header.data_offset = sizeof(header);
-    header.data_size = pln_blob->byteSize();
+    header.data_size = data.size();
     header.scaling_data_offset = 0;
     header.scaling_data_size = 0;
 
-    if (_scales) {
-        header.scaling_axis = 1;
-        header.scaling_data_offset = header.data_offset + header.data_size;
-        header.scaling_data_size = _scales->byteSize();
-    }
-
-    stream.write(reinterpret_cast<char*>(&header), sizeof(header));
-    stream.write(pln_blob->buffer().as<char*>(), pln_blob->byteSize());
-
-    if (_scales) {
-        stream.write(_scales->buffer().as<char*>(), _scales->byteSize());
-    }
+    stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
+    stream.write(reinterpret_cast<char*>(data.data()), data.size());
 }
 
 void BlobDumper::dumpAsTxt(std::ostream &stream) const {
-    if (!_blob)
-        IE_THROW() << "Dumper cannot dump empty Blob";
-
-    if (_blob->buffer().as<float*>() == nullptr)
-        IE_THROW() << "Dumper cannot dump. Blob is not allocated.";
+    if (memory == nullptr)
+        IE_THROW() << "Dumper cannot dump. Memory is not allocated.";
 
-    SizeVector dims = _blob->getTensorDesc().getDims();
+    const auto dims = memory->GetDims();
+    const auto &desc = memory->GetDesc();
+    size_t data_size = desc.getShape().getElementsCount();
 
     // Header like "U8 4D shape: 2 3 224 224 ()
-    stream << _blob->getTensorDesc().getPrecision().name() << " "
+    stream << memory->GetDesc().getPrecision().name() << " "
            << dims.size() << "D "
            << "shape: ";
     for (size_t d : dims) stream << d << " ";
-    stream << "(" << _blob->size() << ")" <<
-    " by address 0x" << std::hex << _blob->buffer().as<long long>() << std::dec <<std::endl;
-
-    // Dump data
-    // TODO [DS]: blob dumper should be rewritten using Memory object
-    MKLDNNMemoryDesc mdesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(_blob->getTensorDesc());
-    mkldnn::memory::desc desc = mdesc;
-    mkldnn::impl::memory_desc_wrapper blob_wrp(desc.data);
-
-    size_t data_size = _blob->size();
-    switch (_blob->getTensorDesc().getPrecision()) {
-        case Precision::FP32: {
-            auto *blob_ptr = _blob->buffer().as<float*>();
+    stream << "(" << data_size << ")" <<
+    " by address 0x" << std::hex << reinterpret_cast<const long long *>(memory->GetData()) << std::dec <<std::endl;
+
+    const void *ptr = memory->GetData();
+
+    switch (desc.getPrecision()) {
+        case Precision::FP32 : {
+            auto *blob_ptr = reinterpret_cast<const float*>(ptr);
             for (size_t i = 0; i < data_size; i++)
-                stream << blob_ptr[blob_wrp.off_l(i)] << std::endl;
+                stream << blob_ptr[desc.getOffset(i)] << std::endl;
             break;
         }
-        case Precision::BF16:
-        {
-            auto *blob_ptr = _blob->buffer().as<int16_t *>();
+        case Precision::BF16: {
+            auto *blob_ptr = reinterpret_cast<const int16_t*>(ptr);
             for (size_t i = 0; i < data_size; i++) {
-                int i16n = blob_ptr[blob_wrp.off_l(i)];
+                int i16n = blob_ptr[desc.getOffset(i)];
                 i16n = i16n << 16;
-                float fn = *(reinterpret_cast<float *>(&i16n));
+                float fn = *(reinterpret_cast<const float *>(&i16n));
                 stream << fn << std::endl;
             }
             break;
         }
         case Precision::I32: {
-            auto *blob_ptr = _blob->buffer().as<int32_t*>();
-            for (size_t i = 0; i < data_size; i++)
-                stream << blob_ptr[blob_wrp.off_l(i)] << std::endl;
-            break;
-        }
-        case Precision::I16: {
-            auto *blob_ptr = _blob->buffer().as<int16_t*>();
-            for (size_t i = 0; i < data_size; i++)
-                stream << static_cast<int>(blob_ptr[blob_wrp.off_l(i)]) << std::endl;
-            break;
-        }
-        case Precision::U16: {
-            auto *blob_ptr = _blob->buffer().as<uint16_t*>();
+            auto *blob_ptr = reinterpret_cast<const int32_t*>(ptr);
             for (size_t i = 0; i < data_size; i++)
-                stream << static_cast<int>(blob_ptr[blob_wrp.off_l(i)]) << std::endl;
+                stream << blob_ptr[desc.getOffset(i)] << std::endl;
             break;
         }
         case Precision::I8: {
-            auto *blob_ptr = _blob->buffer().as<int8_t*>();
+            auto *blob_ptr = reinterpret_cast<const int8_t*>(ptr);
             for (size_t i = 0; i < data_size; i++)
-                stream << static_cast<int>(blob_ptr[blob_wrp.off_l(i)]) << std::endl;
+                stream << static_cast<int>(blob_ptr[desc.getOffset(i)]) << std::endl;
             break;
         }
         case Precision::U8: {
-            auto *blob_ptr = _blob->buffer().as<uint8_t*>();
+            auto *blob_ptr = reinterpret_cast<const uint8_t*>(ptr);
             for (size_t i = 0; i < data_size; i++)
-                stream << static_cast<int>(blob_ptr[blob_wrp.off_l(i)]) << std::endl;
+                stream << static_cast<int>(blob_ptr[desc.getOffset(i)]) << std::endl;
             break;
         }
         default:
@@ -255,29 +207,12 @@ BlobDumper BlobDumper::read(std::istream &stream) {
     IEB_HEADER header;
     stream.read(reinterpret_cast<char*>(&header), sizeof(header));
 
-    TensorDesc desc = parse_header(header);
-    Blob::Ptr blob = make_blob_with_precision(desc);
-    blob->allocate();
+    const auto desc = parse_header(header);
 
+    BlobDumper res(desc);
     stream.seekg(header.data_offset, stream.beg);
-    stream.read(blob->buffer().as<char*>(), header.data_size);
+    stream.read(reinterpret_cast<char *>(res.getDataPtr()), header.data_size);
 
-    BlobDumper res(blob);
-
-    // Parse scales fields.
-    if (header.scaling_axis != NO_SCALES) {
-        if (header.scaling_axis != 1)
-            IE_THROW() << "Dumper support scaling only for channel dims.";
-
-        size_t scl_size = header.scaling_data_size / sizeof(float);
-        auto scl = make_blob_with_precision({Precision::FP32, {scl_size}, C});
-        scl->allocate();
-
-        stream.seekg(header.scaling_data_offset, stream.beg);
-        stream.read(scl->buffer().as<char*>(), header.scaling_data_size);
-
-        res._scales = scl;
-    }
     return res;
 }
 
@@ -312,73 +247,4 @@ void BlobDumper::dumpAsTxt(const std::string& dump_path) const {
     dump_file.close();
 }
 
-Blob::Ptr BlobDumper::get() {
-    return _blob;
-}
-
-template <typename data_t>
-static void plain_copy(const Blob::Ptr &from, const Blob::Ptr &scls, Blob::Ptr &to) {
-    auto dims = from->getTensorDesc().getDims();
-
-    size_t data_size = from->size();
-    size_t outer_size = dims[0];
-    size_t c_size = dims.size() > 1 ? dims[1] : 1;
-    size_t inner_size = dims.size() == 4 ? dims[2]*dims[3] :
-                        dims.size() == 3 ? dims[2] : 1;
-
-    auto to_data  = to->buffer().as<float*>();
-    auto from_data = from->buffer().as<data_t*>();
-
-    if (scls) {
-        auto scls_data = scls->buffer().as<float*>();
-
-        for (size_t o=0; o < outer_size; o++)
-        for (size_t c=0; c < c_size; c++)
-        for (size_t i=0; i < inner_size; i++)
-            *to_data++ = static_cast<float>(*from_data++) * scls_data[c];
-    } else {
-        for (size_t i=0; i < data_size; i++)
-            *to_data++ = static_cast<float>(*from_data++);
-    }
-}
-
-Blob::Ptr BlobDumper::getRealValue() {
-    if (_blob->getTensorDesc().getPrecision() == Precision::FP32 && !_scales)
-        return _blob;
-
-    auto res = make_plain_blob(Precision::FP32, _blob->getTensorDesc().getDims());
-    res->allocate();
-
-    switch (_blob->getTensorDesc().getPrecision()) {
-        case Precision::U8: plain_copy<uint8_t>(_blob, _scales, res); break;
-        case Precision::FP32: plain_copy<float>(_blob, _scales, res); break;
-        case Precision::I8: plain_copy<int8_t >(_blob, _scales, res); break;
-        default: IE_THROW() << "Unsupported precesion for getRealValue method.";
-    }
-
-    return res;
-}
-
-
-BlobDumper& BlobDumper::withScales(InferenceEngine::Blob::Ptr scales) {
-    if ( _blob->getTensorDesc().getDims().size() < 2  ||
-        scales->getTensorDesc().getDims().size() != 1 ||
-        scales->getTensorDesc().getDims()[0] != _blob->getTensorDesc().getDims()[1] ||
-        scales->getTensorDesc().getPrecision() != Precision::FP32)
-        IE_THROW() << "Dumper cannot use passed scales. Blob has incompatible shape.";
-
-    _scales = scales;
-    return *this;
-}
-
-BlobDumper& BlobDumper::withoutScales() {
-    _scales.reset();
-    return *this;
-}
-
-
-const InferenceEngine::Blob::Ptr& BlobDumper::getScales() const {
-    return _scales;
-}
-
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.h b/inference-engine/src/mkldnn_plugin/utils/blob_dump.h
index c2cc793e42107b..babf450b587eca 100644
--- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.h
+++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.h
@@ -4,7 +4,7 @@
 
 #pragma once
 
-#include "ie_blob.h"
+#include "mkldnn_memory.h"
 
 #include <string>
 
@@ -19,15 +19,22 @@ namespace MKLDNNPlugin {
  * NB! Channel is a second dimension for all blob types.
  */
 class BlobDumper {
-    InferenceEngine::Blob::Ptr _blob;
-    InferenceEngine::Blob::Ptr _scales;
+    MKLDNNMemoryPtr memory;
+    // need to store data after read
+    std::vector<uint8_t> data;
 
 public:
     BlobDumper() = default;
+    BlobDumper(const MKLDNNMemoryDesc &desc) {
+        data.resize(desc.getMemSize());
+        mkldnn::engine eng;
+        memory = std::make_shared<MKLDNNMemory>(eng);
+        memory->Create(desc, data.data());
+    }
     BlobDumper(const BlobDumper&) = default;
     BlobDumper& operator = (BlobDumper&&) = default;
 
-    explicit BlobDumper(const InferenceEngine::Blob::Ptr blob):_blob(blob) {}
+    explicit BlobDumper(const MKLDNNMemoryPtr &_memory) : memory(_memory) {}
 
     static BlobDumper read(const std::string &file_path);
     static BlobDumper read(std::istream &stream);
@@ -38,13 +45,9 @@ class BlobDumper {
     void dumpAsTxt(const std::string &file_path) const;
     void dumpAsTxt(std::ostream &stream) const;
 
-    BlobDumper& withScales(InferenceEngine::Blob::Ptr scales);
-    BlobDumper& withoutScales();
-
-    const InferenceEngine::Blob::Ptr& getScales() const;
-
-    InferenceEngine::Blob::Ptr get();
-    InferenceEngine::Blob::Ptr getRealValue();
+    void *getDataPtr() const {
+        return memory->GetPtr();
+    }
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
index f8e5fd4da8e023..2e0b06c0e4d362 100644
--- a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
+++ b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
@@ -9,6 +9,7 @@
 #include "ie_common.h"
 #include "utils/blob_dump.h"
 #include "utils/debug_capabilities.h"
+#include "cpu_memory_desc_utils.h"
 
 #include <array>
 #include <regex>
@@ -69,12 +70,8 @@ void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const {
         if (desc.getPrecision() == Precision::BIN)
             continue;
 
-        // TODO [DS]: rewrite BlobDumper to use MKLDNNMemory
-//        BlobDumper dumper(prEdge->getBlob());
-//        if (pr->ext_scales)
-//            dumper.withScales(pr->ext_scales);
-//
-//        dump(dumper, dump_file);
+        BlobDumper dumper(prEdge->getMemoryPtr());
+        dump(dumper, dump_file);
     }
 
     dumpInternalBlobs(node);
@@ -106,12 +103,8 @@ void NodeDumper::dumpOutputBlobs(const MKLDNNNodePtr& node) const {
         if (desc.getPrecision() == Precision::BIN)
             continue;
 
-        // TODO [DS]: rewrite BlobDumper to use MKLDNNMemory
-//        BlobDumper dumper(childEdge->getBlob());
-//        if (node->ext_scales)
-//            dumper.withScales(node->ext_scales);
-//
-//        dump(dumper, dump_file);
+        BlobDumper dumper(childEdge->getMemoryPtr());
+        dump(dumper, dump_file);
     }
 }
 
@@ -128,7 +121,9 @@ void NodeDumper::dumpInternalBlobs(const MKLDNNNodePtr& node) const {
         if (desc.getPrecision() == Precision::BIN)
             continue;
 
-        BlobDumper dumper(blb);
+        MKLDNNMemoryPtr memory = std::make_shared<MKLDNNMemory>(node->getEngine());
+        memory->Create(MemoryDescUtils::convertToMKLDNNMemoryDesc(desc), blb->buffer());
+        BlobDumper dumper(memory);
         dump(dumper, dump_file);
     }
 }