Skip to content

Commit

Permalink
blob dumper rewrote (openvinotoolkit#11)
Browse files Browse the repository at this point in the history
* blob dumber rewrited

* applied comments

* applied comments 2
  • Loading branch information
Maxim Andronov committed Jul 14, 2021
1 parent d85a983 commit 7a2d1ac
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 226 deletions.
1 change: 0 additions & 1 deletion inference-engine/src/mkldnn_plugin/mkldnn_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,6 @@ class MKLDNNNode {
MKLDNNPrimitive prim;
std::vector<MKLDNNDescriptor> descs;

InferenceEngine::Blob::Ptr ext_scales;
MKLDNNWeightsSharing::Ptr weightCache;

Algorithm algorithm = Algorithm::Undefined;
Expand Down
268 changes: 67 additions & 201 deletions inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include "blob_dump.h"
#include "blob_factory.hpp"
#include "mkldnn_memory.h"
#include "mkldnn_extension_utils.h"
#include <nodes/common/cpu_memcpy.h>

#include "common/memory_desc_wrapper.hpp"

Expand Down Expand Up @@ -36,7 +38,7 @@ struct IEB_HEADER {
unsigned long scaling_data_size;
};

static IEB_HEADER prepare_header(const TensorDesc& desc) {
static IEB_HEADER prepare_header(const MemoryDesc& desc) {
IEB_HEADER header = {};

header.magic[0] = IEB_MAGIC[0];
Expand All @@ -50,19 +52,20 @@ static IEB_HEADER prepare_header(const TensorDesc& desc) {

header.precision = desc.getPrecision();

if (desc.getDims().size() > 7)
if (desc.getShape().getRank() > 7)
IE_THROW() << "Dumper support max 7D blobs";

header.ndims = desc.getDims().size();
header.ndims = desc.getShape().getRank();
const auto &dims = desc.getShape().getStaticDims();
for (int i = 0; i < header.ndims; i++)
header.dims[i] = desc.getDims()[i];
header.dims[i] = dims[i];

header.scaling_axis = NO_SCALES;

return header;
}

static TensorDesc parse_header(IEB_HEADER &header) {
static MKLDNNMemoryDesc parse_header(IEB_HEADER &header) {
if (header.magic[0] != IEB_MAGIC[0] ||
header.magic[1] != IEB_MAGIC[1] ||
header.magic[2] != IEB_MAGIC[2] ||
Expand All @@ -73,177 +76,126 @@ static TensorDesc parse_header(IEB_HEADER &header) {
header.ver[1] != 1)
IE_THROW() << "Dumper cannot parse file. Unsupported IEB format version.";

Precision prc = Precision(static_cast<Precision::ePrecision>(header.precision));
const auto prc = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision(static_cast<Precision::ePrecision>(header.precision)));
SizeVector dims(header.ndims);
for (int i = 0; i < header.ndims; i++)
dims[i] = header.dims[i];

return TensorDesc {prc, dims, TensorDesc::getLayoutByDims(dims) };
return MKLDNNMemoryDesc{MKLDNNDims(dims), prc, MKLDNNMemory::GetPlainFormatByRank(dims.size()) };
}

static void prepare_plain_data(const MKLDNNMemoryPtr &memory, std::vector<uint8_t> &data) {
const auto &desc = memory->GetDesc();
size_t data_size = desc.getShape().getElementsCount();
const auto size = data_size * desc.getPrecision().size();
data.resize(size);

bool is_plain(const Blob::Ptr &blob) {
bool res = true;

auto orig_strides = blob->getTensorDesc().getBlockingDesc().getStrides();
auto orig_order = blob->getTensorDesc().getBlockingDesc().getOrder();
auto dims = blob->getTensorDesc().getDims();

for (int stride = 1, i = dims.size() - 1; i >= 0; --i) {
if (stride != orig_strides[i] || i != orig_order[i]) res = false;
stride *= dims[i];
}

return res;
}

static Blob::Ptr prepare_plain_data(Blob::Ptr blob) {
// check if it already plain
if (is_plain(blob)) return blob;

Blob::Ptr pln_blob = make_plain_blob(blob->getTensorDesc().getPrecision(), blob->getTensorDesc().getDims());
pln_blob->allocate();
if (desc.checkGeneralLayout(GeneralLayout::ncsp)) {
cpu_memcpy(data.data(), reinterpret_cast<const uint8_t*>(memory->GetPtr()), size);
return;
}

// Copy to plain
// TODO [DS]: blob dumper should be rewritten using Memory object
MKLDNNMemoryDesc mdesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(blob->getTensorDesc());
mkldnn::memory::desc desc = mdesc;
mkldnn::impl::memory_desc_wrapper blob_wrp(desc.data);
const void *ptr = memory->GetData();

size_t data_size = blob->size();

// TODO: make it with blob_copy utility
switch (blob->getTensorDesc().getPrecision()) {
switch (desc.getPrecision()) {
case Precision::FP32:
case Precision::I32: {
auto *pln_blob_ptr = pln_blob->buffer().as<int32_t*>();
auto *blob_ptr = blob->buffer().as<int32_t*>();
auto *pln_blob_ptr = reinterpret_cast<int32_t *>(data.data());
auto *blob_ptr = reinterpret_cast<const int32_t *>(ptr);
for (size_t i = 0; i < data_size; i++)
pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)];
pln_blob_ptr[i] = blob_ptr[desc.getOffset(i)];
break;
}
case Precision::I16:
case Precision::U16:
case Precision::BF16: {
auto *pln_blob_ptr = pln_blob->buffer().as<int16_t *>();
auto *blob_ptr = blob->buffer().as<int16_t *>();
for (size_t i = 0; i < data_size; i++) pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)];
auto *pln_blob_ptr = reinterpret_cast<int16_t *>(data.data());
auto *blob_ptr = reinterpret_cast<const int16_t *>(ptr);
for (size_t i = 0; i < data_size; i++)
pln_blob_ptr[i] = blob_ptr[desc.getOffset(i)];
break;
}
case Precision::I8:
case Precision::U8: {
auto *pln_blob_ptr = pln_blob->buffer().as<int8_t*>();
auto *blob_ptr = blob->buffer().as<int8_t *>();
auto *pln_blob_ptr = reinterpret_cast<int8_t*>(data.data());
auto *blob_ptr = reinterpret_cast<const int8_t *>(ptr);
for (size_t i = 0; i < data_size; i++)
pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)];
pln_blob_ptr[i] = blob_ptr[desc.getOffset(i)];
break;
}
default:
IE_THROW() << "Dumper. Unsupported precision";
}

return pln_blob;
}

void BlobDumper::dump(std::ostream &stream) const {
if (!_blob)
IE_THROW() << "Dumper cannot dump empty Blob";

if (_blob->buffer().as<float*>() == nullptr)
IE_THROW() << "Dumper cannot dump. Blob is not allocated.";
if (memory == nullptr)
IE_THROW() << "Dumper cannot dump. Memory is not allocated.";

IEB_HEADER header = prepare_header(_blob->getTensorDesc());
Blob::Ptr pln_blob = prepare_plain_data(_blob);
IEB_HEADER header = prepare_header(memory->GetDesc());
std::vector<uint8_t> data;
prepare_plain_data(this->memory, data);

header.data_offset = sizeof(header);
header.data_size = pln_blob->byteSize();
header.data_size = data.size();
header.scaling_data_offset = 0;
header.scaling_data_size = 0;

if (_scales) {
header.scaling_axis = 1;
header.scaling_data_offset = header.data_offset + header.data_size;
header.scaling_data_size = _scales->byteSize();
}

stream.write(reinterpret_cast<char*>(&header), sizeof(header));
stream.write(pln_blob->buffer().as<char*>(), pln_blob->byteSize());

if (_scales) {
stream.write(_scales->buffer().as<char*>(), _scales->byteSize());
}
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
stream.write(reinterpret_cast<char*>(data.data()), data.size());
}

void BlobDumper::dumpAsTxt(std::ostream &stream) const {
if (!_blob)
IE_THROW() << "Dumper cannot dump empty Blob";

if (_blob->buffer().as<float*>() == nullptr)
IE_THROW() << "Dumper cannot dump. Blob is not allocated.";
if (memory == nullptr)
IE_THROW() << "Dumper cannot dump. Memory is not allocated.";

SizeVector dims = _blob->getTensorDesc().getDims();
const auto dims = memory->GetDims();
const auto &desc = memory->GetDesc();
size_t data_size = desc.getShape().getElementsCount();

// Header like "U8 4D shape: 2 3 224 224 ()
stream << _blob->getTensorDesc().getPrecision().name() << " "
stream << memory->GetDesc().getPrecision().name() << " "
<< dims.size() << "D "
<< "shape: ";
for (size_t d : dims) stream << d << " ";
stream << "(" << _blob->size() << ")" <<
" by address 0x" << std::hex << _blob->buffer().as<long long>() << std::dec <<std::endl;

// Dump data
// TODO [DS]: blob dumper should be rewritten using Memory object
MKLDNNMemoryDesc mdesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(_blob->getTensorDesc());
mkldnn::memory::desc desc = mdesc;
mkldnn::impl::memory_desc_wrapper blob_wrp(desc.data);

size_t data_size = _blob->size();
switch (_blob->getTensorDesc().getPrecision()) {
case Precision::FP32: {
auto *blob_ptr = _blob->buffer().as<float*>();
stream << "(" << data_size << ")" <<
" by address 0x" << std::hex << reinterpret_cast<const long long *>(memory->GetData()) << std::dec <<std::endl;

const void *ptr = memory->GetData();

switch (desc.getPrecision()) {
case Precision::FP32 : {
auto *blob_ptr = reinterpret_cast<const float*>(ptr);
for (size_t i = 0; i < data_size; i++)
stream << blob_ptr[blob_wrp.off_l(i)] << std::endl;
stream << blob_ptr[desc.getOffset(i)] << std::endl;
break;
}
case Precision::BF16:
{
auto *blob_ptr = _blob->buffer().as<int16_t *>();
case Precision::BF16: {
auto *blob_ptr = reinterpret_cast<const int16_t*>(ptr);
for (size_t i = 0; i < data_size; i++) {
int i16n = blob_ptr[blob_wrp.off_l(i)];
int i16n = blob_ptr[desc.getOffset(i)];
i16n = i16n << 16;
float fn = *(reinterpret_cast<float *>(&i16n));
float fn = *(reinterpret_cast<const float *>(&i16n));
stream << fn << std::endl;
}
break;
}
case Precision::I32: {
auto *blob_ptr = _blob->buffer().as<int32_t*>();
for (size_t i = 0; i < data_size; i++)
stream << blob_ptr[blob_wrp.off_l(i)] << std::endl;
break;
}
case Precision::I16: {
auto *blob_ptr = _blob->buffer().as<int16_t*>();
for (size_t i = 0; i < data_size; i++)
stream << static_cast<int>(blob_ptr[blob_wrp.off_l(i)]) << std::endl;
break;
}
case Precision::U16: {
auto *blob_ptr = _blob->buffer().as<uint16_t*>();
auto *blob_ptr = reinterpret_cast<const int32_t*>(ptr);
for (size_t i = 0; i < data_size; i++)
stream << static_cast<int>(blob_ptr[blob_wrp.off_l(i)]) << std::endl;
stream << blob_ptr[desc.getOffset(i)] << std::endl;
break;
}
case Precision::I8: {
auto *blob_ptr = _blob->buffer().as<int8_t*>();
auto *blob_ptr = reinterpret_cast<const int8_t*>(ptr);
for (size_t i = 0; i < data_size; i++)
stream << static_cast<int>(blob_ptr[blob_wrp.off_l(i)]) << std::endl;
stream << static_cast<int>(blob_ptr[desc.getOffset(i)]) << std::endl;
break;
}
case Precision::U8: {
auto *blob_ptr = _blob->buffer().as<uint8_t*>();
auto *blob_ptr = reinterpret_cast<const uint8_t*>(ptr);
for (size_t i = 0; i < data_size; i++)
stream << static_cast<int>(blob_ptr[blob_wrp.off_l(i)]) << std::endl;
stream << static_cast<int>(blob_ptr[desc.getOffset(i)]) << std::endl;
break;
}
default:
Expand All @@ -255,29 +207,12 @@ BlobDumper BlobDumper::read(std::istream &stream) {
IEB_HEADER header;
stream.read(reinterpret_cast<char*>(&header), sizeof(header));

TensorDesc desc = parse_header(header);
Blob::Ptr blob = make_blob_with_precision(desc);
blob->allocate();
const auto desc = parse_header(header);

BlobDumper res(desc);
stream.seekg(header.data_offset, stream.beg);
stream.read(blob->buffer().as<char*>(), header.data_size);
stream.read(reinterpret_cast<char *>(res.getDataPtr()), header.data_size);

BlobDumper res(blob);

// Parse scales fields.
if (header.scaling_axis != NO_SCALES) {
if (header.scaling_axis != 1)
IE_THROW() << "Dumper support scaling only for channel dims.";

size_t scl_size = header.scaling_data_size / sizeof(float);
auto scl = make_blob_with_precision({Precision::FP32, {scl_size}, C});
scl->allocate();

stream.seekg(header.scaling_data_offset, stream.beg);
stream.read(scl->buffer().as<char*>(), header.scaling_data_size);

res._scales = scl;
}
return res;
}

Expand Down Expand Up @@ -312,73 +247,4 @@ void BlobDumper::dumpAsTxt(const std::string& dump_path) const {
dump_file.close();
}

Blob::Ptr BlobDumper::get() {
return _blob;
}

template <typename data_t>
static void plain_copy(const Blob::Ptr &from, const Blob::Ptr &scls, Blob::Ptr &to) {
auto dims = from->getTensorDesc().getDims();

size_t data_size = from->size();
size_t outer_size = dims[0];
size_t c_size = dims.size() > 1 ? dims[1] : 1;
size_t inner_size = dims.size() == 4 ? dims[2]*dims[3] :
dims.size() == 3 ? dims[2] : 1;

auto to_data = to->buffer().as<float*>();
auto from_data = from->buffer().as<data_t*>();

if (scls) {
auto scls_data = scls->buffer().as<float*>();

for (size_t o=0; o < outer_size; o++)
for (size_t c=0; c < c_size; c++)
for (size_t i=0; i < inner_size; i++)
*to_data++ = static_cast<float>(*from_data++) * scls_data[c];
} else {
for (size_t i=0; i < data_size; i++)
*to_data++ = static_cast<float>(*from_data++);
}
}

Blob::Ptr BlobDumper::getRealValue() {
if (_blob->getTensorDesc().getPrecision() == Precision::FP32 && !_scales)
return _blob;

auto res = make_plain_blob(Precision::FP32, _blob->getTensorDesc().getDims());
res->allocate();

switch (_blob->getTensorDesc().getPrecision()) {
case Precision::U8: plain_copy<uint8_t>(_blob, _scales, res); break;
case Precision::FP32: plain_copy<float>(_blob, _scales, res); break;
case Precision::I8: plain_copy<int8_t >(_blob, _scales, res); break;
default: IE_THROW() << "Unsupported precesion for getRealValue method.";
}

return res;
}


BlobDumper& BlobDumper::withScales(InferenceEngine::Blob::Ptr scales) {
if ( _blob->getTensorDesc().getDims().size() < 2 ||
scales->getTensorDesc().getDims().size() != 1 ||
scales->getTensorDesc().getDims()[0] != _blob->getTensorDesc().getDims()[1] ||
scales->getTensorDesc().getPrecision() != Precision::FP32)
IE_THROW() << "Dumper cannot use passed scales. Blob has incompatible shape.";

_scales = scales;
return *this;
}

BlobDumper& BlobDumper::withoutScales() {
_scales.reset();
return *this;
}


const InferenceEngine::Blob::Ptr& BlobDumper::getScales() const {
return _scales;
}

} // namespace MKLDNNPlugin
Loading

0 comments on commit 7a2d1ac

Please sign in to comment.