From 7a2d1acd7abfa7ebe66224cebe554c474b410aa2 Mon Sep 17 00:00:00 2001 From: Maxim Andronov Date: Tue, 13 Jul 2021 18:38:06 +0300 Subject: [PATCH] blob dumper rewrote (#11) * blob dumber rewrited * applied comments * applied comments 2 --- .../src/mkldnn_plugin/mkldnn_node.h | 1 - .../src/mkldnn_plugin/utils/blob_dump.cpp | 268 +++++------------- .../src/mkldnn_plugin/utils/blob_dump.h | 25 +- .../src/mkldnn_plugin/utils/node_dumper.cpp | 21 +- 4 files changed, 89 insertions(+), 226 deletions(-) diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index ce4d611dae6a23..d6e00ead82f0e0 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -701,7 +701,6 @@ class MKLDNNNode { MKLDNNPrimitive prim; std::vector descs; - InferenceEngine::Blob::Ptr ext_scales; MKLDNNWeightsSharing::Ptr weightCache; Algorithm algorithm = Algorithm::Undefined; diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp index f9813214db4c33..43174f49430905 100644 --- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp +++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp @@ -5,6 +5,8 @@ #include "blob_dump.h" #include "blob_factory.hpp" #include "mkldnn_memory.h" +#include "mkldnn_extension_utils.h" +#include #include "common/memory_desc_wrapper.hpp" @@ -36,7 +38,7 @@ struct IEB_HEADER { unsigned long scaling_data_size; }; -static IEB_HEADER prepare_header(const TensorDesc& desc) { +static IEB_HEADER prepare_header(const MemoryDesc& desc) { IEB_HEADER header = {}; header.magic[0] = IEB_MAGIC[0]; @@ -50,19 +52,20 @@ static IEB_HEADER prepare_header(const TensorDesc& desc) { header.precision = desc.getPrecision(); - if (desc.getDims().size() > 7) + if (desc.getShape().getRank() > 7) IE_THROW() << "Dumper support max 7D blobs"; - header.ndims = desc.getDims().size(); + header.ndims = desc.getShape().getRank(); + const auto &dims = desc.getShape().getStaticDims(); for (int i = 0; i < header.ndims; i++) - header.dims[i] = desc.getDims()[i]; + header.dims[i] = dims[i]; header.scaling_axis = NO_SCALES; return header; } -static TensorDesc parse_header(IEB_HEADER &header) { +static MKLDNNMemoryDesc parse_header(IEB_HEADER &header) { if (header.magic[0] != IEB_MAGIC[0] || header.magic[1] != IEB_MAGIC[1] || header.magic[2] != IEB_MAGIC[2] || @@ -73,177 +76,126 @@ static TensorDesc parse_header(IEB_HEADER &header) { header.ver[1] != 1) IE_THROW() << "Dumper cannot parse file. Unsupported IEB format version."; - Precision prc = Precision(static_cast(header.precision)); + const auto prc = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision(static_cast(header.precision))); SizeVector dims(header.ndims); for (int i = 0; i < header.ndims; i++) dims[i] = header.dims[i]; - return TensorDesc {prc, dims, TensorDesc::getLayoutByDims(dims) }; + return MKLDNNMemoryDesc{MKLDNNDims(dims), prc, MKLDNNMemory::GetPlainFormatByRank(dims.size()) }; } +static void prepare_plain_data(const MKLDNNMemoryPtr &memory, std::vector &data) { + const auto &desc = memory->GetDesc(); + size_t data_size = desc.getShape().getElementsCount(); + const auto size = data_size * desc.getPrecision().size(); + data.resize(size); -bool is_plain(const Blob::Ptr &blob) { - bool res = true; - - auto orig_strides = blob->getTensorDesc().getBlockingDesc().getStrides(); - auto orig_order = blob->getTensorDesc().getBlockingDesc().getOrder(); - auto dims = blob->getTensorDesc().getDims(); - - for (int stride = 1, i = dims.size() - 1; i >= 0; --i) { - if (stride != orig_strides[i] || i != orig_order[i]) res = false; - stride *= dims[i]; - } - - return res; -} - -static Blob::Ptr prepare_plain_data(Blob::Ptr blob) { // check if it already plain - if (is_plain(blob)) return blob; - - Blob::Ptr pln_blob = make_plain_blob(blob->getTensorDesc().getPrecision(), blob->getTensorDesc().getDims()); - pln_blob->allocate(); + if (desc.checkGeneralLayout(GeneralLayout::ncsp)) { + cpu_memcpy(data.data(), reinterpret_cast(memory->GetPtr()), size); + return; + } // Copy to plain - // TODO [DS]: blob dumper should be rewritten using Memory object - MKLDNNMemoryDesc mdesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(blob->getTensorDesc()); - mkldnn::memory::desc desc = mdesc; - mkldnn::impl::memory_desc_wrapper blob_wrp(desc.data); + const void *ptr = memory->GetData(); - size_t data_size = blob->size(); - - // TODO: make it with blob_copy utility - switch (blob->getTensorDesc().getPrecision()) { + switch (desc.getPrecision()) { case Precision::FP32: case Precision::I32: { - auto *pln_blob_ptr = pln_blob->buffer().as(); - auto *blob_ptr = blob->buffer().as(); + auto *pln_blob_ptr = reinterpret_cast(data.data()); + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)]; + pln_blob_ptr[i] = blob_ptr[desc.getOffset(i)]; break; } - case Precision::I16: - case Precision::U16: case Precision::BF16: { - auto *pln_blob_ptr = pln_blob->buffer().as(); - auto *blob_ptr = blob->buffer().as(); - for (size_t i = 0; i < data_size; i++) pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)]; + auto *pln_blob_ptr = reinterpret_cast(data.data()); + auto *blob_ptr = reinterpret_cast(ptr); + for (size_t i = 0; i < data_size; i++) + pln_blob_ptr[i] = blob_ptr[desc.getOffset(i)]; break; } case Precision::I8: case Precision::U8: { - auto *pln_blob_ptr = pln_blob->buffer().as(); - auto *blob_ptr = blob->buffer().as(); + auto *pln_blob_ptr = reinterpret_cast(data.data()); + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)]; + pln_blob_ptr[i] = blob_ptr[desc.getOffset(i)]; break; } default: IE_THROW() << "Dumper. Unsupported precision"; } - - return pln_blob; } void BlobDumper::dump(std::ostream &stream) const { - if (!_blob) - IE_THROW() << "Dumper cannot dump empty Blob"; - - if (_blob->buffer().as() == nullptr) - IE_THROW() << "Dumper cannot dump. Blob is not allocated."; + if (memory == nullptr) + IE_THROW() << "Dumper cannot dump. Memory is not allocated."; - IEB_HEADER header = prepare_header(_blob->getTensorDesc()); - Blob::Ptr pln_blob = prepare_plain_data(_blob); + IEB_HEADER header = prepare_header(memory->GetDesc()); + std::vector data; + prepare_plain_data(this->memory, data); header.data_offset = sizeof(header); - header.data_size = pln_blob->byteSize(); + header.data_size = data.size(); header.scaling_data_offset = 0; header.scaling_data_size = 0; - if (_scales) { - header.scaling_axis = 1; - header.scaling_data_offset = header.data_offset + header.data_size; - header.scaling_data_size = _scales->byteSize(); - } - - stream.write(reinterpret_cast(&header), sizeof(header)); - stream.write(pln_blob->buffer().as(), pln_blob->byteSize()); - - if (_scales) { - stream.write(_scales->buffer().as(), _scales->byteSize()); - } + stream.write(reinterpret_cast(&header), sizeof(header)); + stream.write(reinterpret_cast(data.data()), data.size()); } void BlobDumper::dumpAsTxt(std::ostream &stream) const { - if (!_blob) - IE_THROW() << "Dumper cannot dump empty Blob"; - - if (_blob->buffer().as() == nullptr) - IE_THROW() << "Dumper cannot dump. Blob is not allocated."; + if (memory == nullptr) + IE_THROW() << "Dumper cannot dump. Memory is not allocated."; - SizeVector dims = _blob->getTensorDesc().getDims(); + const auto dims = memory->GetDims(); + const auto &desc = memory->GetDesc(); + size_t data_size = desc.getShape().getElementsCount(); // Header like "U8 4D shape: 2 3 224 224 () - stream << _blob->getTensorDesc().getPrecision().name() << " " + stream << memory->GetDesc().getPrecision().name() << " " << dims.size() << "D " << "shape: "; for (size_t d : dims) stream << d << " "; - stream << "(" << _blob->size() << ")" << - " by address 0x" << std::hex << _blob->buffer().as() << std::dec <getTensorDesc()); - mkldnn::memory::desc desc = mdesc; - mkldnn::impl::memory_desc_wrapper blob_wrp(desc.data); - - size_t data_size = _blob->size(); - switch (_blob->getTensorDesc().getPrecision()) { - case Precision::FP32: { - auto *blob_ptr = _blob->buffer().as(); + stream << "(" << data_size << ")" << + " by address 0x" << std::hex << reinterpret_cast(memory->GetData()) << std::dec <GetData(); + + switch (desc.getPrecision()) { + case Precision::FP32 : { + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - stream << blob_ptr[blob_wrp.off_l(i)] << std::endl; + stream << blob_ptr[desc.getOffset(i)] << std::endl; break; } - case Precision::BF16: - { - auto *blob_ptr = _blob->buffer().as(); + case Precision::BF16: { + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) { - int i16n = blob_ptr[blob_wrp.off_l(i)]; + int i16n = blob_ptr[desc.getOffset(i)]; i16n = i16n << 16; - float fn = *(reinterpret_cast(&i16n)); + float fn = *(reinterpret_cast(&i16n)); stream << fn << std::endl; } break; } case Precision::I32: { - auto *blob_ptr = _blob->buffer().as(); - for (size_t i = 0; i < data_size; i++) - stream << blob_ptr[blob_wrp.off_l(i)] << std::endl; - break; - } - case Precision::I16: { - auto *blob_ptr = _blob->buffer().as(); - for (size_t i = 0; i < data_size; i++) - stream << static_cast(blob_ptr[blob_wrp.off_l(i)]) << std::endl; - break; - } - case Precision::U16: { - auto *blob_ptr = _blob->buffer().as(); + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - stream << static_cast(blob_ptr[blob_wrp.off_l(i)]) << std::endl; + stream << blob_ptr[desc.getOffset(i)] << std::endl; break; } case Precision::I8: { - auto *blob_ptr = _blob->buffer().as(); + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - stream << static_cast(blob_ptr[blob_wrp.off_l(i)]) << std::endl; + stream << static_cast(blob_ptr[desc.getOffset(i)]) << std::endl; break; } case Precision::U8: { - auto *blob_ptr = _blob->buffer().as(); + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - stream << static_cast(blob_ptr[blob_wrp.off_l(i)]) << std::endl; + stream << static_cast(blob_ptr[desc.getOffset(i)]) << std::endl; break; } default: @@ -255,29 +207,12 @@ BlobDumper BlobDumper::read(std::istream &stream) { IEB_HEADER header; stream.read(reinterpret_cast(&header), sizeof(header)); - TensorDesc desc = parse_header(header); - Blob::Ptr blob = make_blob_with_precision(desc); - blob->allocate(); + const auto desc = parse_header(header); + BlobDumper res(desc); stream.seekg(header.data_offset, stream.beg); - stream.read(blob->buffer().as(), header.data_size); + stream.read(reinterpret_cast(res.getDataPtr()), header.data_size); - BlobDumper res(blob); - - // Parse scales fields. - if (header.scaling_axis != NO_SCALES) { - if (header.scaling_axis != 1) - IE_THROW() << "Dumper support scaling only for channel dims."; - - size_t scl_size = header.scaling_data_size / sizeof(float); - auto scl = make_blob_with_precision({Precision::FP32, {scl_size}, C}); - scl->allocate(); - - stream.seekg(header.scaling_data_offset, stream.beg); - stream.read(scl->buffer().as(), header.scaling_data_size); - - res._scales = scl; - } return res; } @@ -312,73 +247,4 @@ void BlobDumper::dumpAsTxt(const std::string& dump_path) const { dump_file.close(); } -Blob::Ptr BlobDumper::get() { - return _blob; -} - -template -static void plain_copy(const Blob::Ptr &from, const Blob::Ptr &scls, Blob::Ptr &to) { - auto dims = from->getTensorDesc().getDims(); - - size_t data_size = from->size(); - size_t outer_size = dims[0]; - size_t c_size = dims.size() > 1 ? dims[1] : 1; - size_t inner_size = dims.size() == 4 ? dims[2]*dims[3] : - dims.size() == 3 ? dims[2] : 1; - - auto to_data = to->buffer().as(); - auto from_data = from->buffer().as(); - - if (scls) { - auto scls_data = scls->buffer().as(); - - for (size_t o=0; o < outer_size; o++) - for (size_t c=0; c < c_size; c++) - for (size_t i=0; i < inner_size; i++) - *to_data++ = static_cast(*from_data++) * scls_data[c]; - } else { - for (size_t i=0; i < data_size; i++) - *to_data++ = static_cast(*from_data++); - } -} - -Blob::Ptr BlobDumper::getRealValue() { - if (_blob->getTensorDesc().getPrecision() == Precision::FP32 && !_scales) - return _blob; - - auto res = make_plain_blob(Precision::FP32, _blob->getTensorDesc().getDims()); - res->allocate(); - - switch (_blob->getTensorDesc().getPrecision()) { - case Precision::U8: plain_copy(_blob, _scales, res); break; - case Precision::FP32: plain_copy(_blob, _scales, res); break; - case Precision::I8: plain_copy(_blob, _scales, res); break; - default: IE_THROW() << "Unsupported precesion for getRealValue method."; - } - - return res; -} - - -BlobDumper& BlobDumper::withScales(InferenceEngine::Blob::Ptr scales) { - if ( _blob->getTensorDesc().getDims().size() < 2 || - scales->getTensorDesc().getDims().size() != 1 || - scales->getTensorDesc().getDims()[0] != _blob->getTensorDesc().getDims()[1] || - scales->getTensorDesc().getPrecision() != Precision::FP32) - IE_THROW() << "Dumper cannot use passed scales. Blob has incompatible shape."; - - _scales = scales; - return *this; -} - -BlobDumper& BlobDumper::withoutScales() { - _scales.reset(); - return *this; -} - - -const InferenceEngine::Blob::Ptr& BlobDumper::getScales() const { - return _scales; -} - } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.h b/inference-engine/src/mkldnn_plugin/utils/blob_dump.h index c2cc793e42107b..babf450b587eca 100644 --- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.h +++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.h @@ -4,7 +4,7 @@ #pragma once -#include "ie_blob.h" +#include "mkldnn_memory.h" #include @@ -19,15 +19,22 @@ namespace MKLDNNPlugin { * NB! Channel is a second dimension for all blob types. */ class BlobDumper { - InferenceEngine::Blob::Ptr _blob; - InferenceEngine::Blob::Ptr _scales; + MKLDNNMemoryPtr memory; + // need to store data after read + std::vector data; public: BlobDumper() = default; + BlobDumper(const MKLDNNMemoryDesc &desc) { + data.resize(desc.getMemSize()); + mkldnn::engine eng; + memory = std::make_shared(eng); + memory->Create(desc, data.data()); + } BlobDumper(const BlobDumper&) = default; BlobDumper& operator = (BlobDumper&&) = default; - explicit BlobDumper(const InferenceEngine::Blob::Ptr blob):_blob(blob) {} + explicit BlobDumper(const MKLDNNMemoryPtr &_memory) : memory(_memory) {} static BlobDumper read(const std::string &file_path); static BlobDumper read(std::istream &stream); @@ -38,13 +45,9 @@ class BlobDumper { void dumpAsTxt(const std::string &file_path) const; void dumpAsTxt(std::ostream &stream) const; - BlobDumper& withScales(InferenceEngine::Blob::Ptr scales); - BlobDumper& withoutScales(); - - const InferenceEngine::Blob::Ptr& getScales() const; - - InferenceEngine::Blob::Ptr get(); - InferenceEngine::Blob::Ptr getRealValue(); + void *getDataPtr() const { + return memory->GetPtr(); + } }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp index f8e5fd4da8e023..2e0b06c0e4d362 100644 --- a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp +++ b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp @@ -9,6 +9,7 @@ #include "ie_common.h" #include "utils/blob_dump.h" #include "utils/debug_capabilities.h" +#include "cpu_memory_desc_utils.h" #include #include @@ -69,12 +70,8 @@ void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const { if (desc.getPrecision() == Precision::BIN) continue; - // TODO [DS]: rewrite BlobDumper to use MKLDNNMemory -// BlobDumper dumper(prEdge->getBlob()); -// if (pr->ext_scales) -// dumper.withScales(pr->ext_scales); -// -// dump(dumper, dump_file); + BlobDumper dumper(prEdge->getMemoryPtr()); + dump(dumper, dump_file); } dumpInternalBlobs(node); @@ -106,12 +103,8 @@ void NodeDumper::dumpOutputBlobs(const MKLDNNNodePtr& node) const { if (desc.getPrecision() == Precision::BIN) continue; - // TODO [DS]: rewrite BlobDumper to use MKLDNNMemory -// BlobDumper dumper(childEdge->getBlob()); -// if (node->ext_scales) -// dumper.withScales(node->ext_scales); -// -// dump(dumper, dump_file); + BlobDumper dumper(childEdge->getMemoryPtr()); + dump(dumper, dump_file); } } @@ -128,7 +121,9 @@ void NodeDumper::dumpInternalBlobs(const MKLDNNNodePtr& node) const { if (desc.getPrecision() == Precision::BIN) continue; - BlobDumper dumper(blb); + MKLDNNMemoryPtr memory = std::make_shared(node->getEngine()); + memory->Create(MemoryDescUtils::convertToMKLDNNMemoryDesc(desc), blb->buffer()); + BlobDumper dumper(memory); dump(dumper, dump_file); } }