Skip to content

Commit

Permalink
[NODE][REFLECTION] Support NDArray as field (apache#1452)
Browse files Browse the repository at this point in the history
  • Loading branch information
tqchen authored and sergei-mironov committed Aug 8, 2018
1 parent 6432d8b commit 07e3888
Show file tree
Hide file tree
Showing 15 changed files with 582 additions and 170 deletions.
2 changes: 1 addition & 1 deletion HalideIR
Submodule HalideIR updated 1 files
+5 −0 src/tvm/node.h
143 changes: 138 additions & 5 deletions include/tvm/runtime/ndarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <vector>
#include <utility>
#include "./c_runtime_api.h"
#include "./serializer.h"

namespace tvm {
namespace runtime {
Expand Down Expand Up @@ -103,8 +104,25 @@ class NDArray {
* \note The copy may happen asynchrously if it involves a GPU context.
* TVMSynchronize is necessary.
*/
inline void CopyTo(DLTensor* other);
inline void CopyTo(const NDArray& other);
inline void CopyTo(DLTensor* other) const;
inline void CopyTo(const NDArray& other) const;
/*!
* \brief Copy the data to another context.
* \param ctx The target context.
* \return The array under another context.
*/
inline NDArray CopyTo(const DLContext& ctx) const;
/*!
* \brief Load NDArray from stream
* \param stream The input data stream
* \return Whether load is successful
*/
inline bool Load(dmlc::Stream* stream);
/*!
* \brief Save NDArray to stream
* \param stream The output data stream
*/
inline void Save(dmlc::Stream* stream) const;
/*!
* \brief Create a NDArray that shares the data memory with the current one.
* \param shape The shape of the new array.
Expand Down Expand Up @@ -161,6 +179,13 @@ class NDArray {
friend class TVMArgsSetter;
};

/*!
* \brief Save a DLTensor to stream
* \param strm The outpu stream
* \param tensor The tensor to be saved.
*/
inline bool SaveDLTensor(dmlc::Stream* strm, const DLTensor* tensor);

/*!
* \brief Reference counted Container object used to back NDArray.
*
Expand Down Expand Up @@ -260,17 +285,26 @@ inline void NDArray::CopyFrom(const NDArray& other) {
CopyFromTo(&(other.data_->dl_tensor), &(data_->dl_tensor));
}

inline void NDArray::CopyTo(DLTensor* other) {
inline void NDArray::CopyTo(DLTensor* other) const {
CHECK(data_ != nullptr);
CopyFromTo(&(data_->dl_tensor), other);
}

inline void NDArray::CopyTo(const NDArray& other) {
inline void NDArray::CopyTo(const NDArray& other) const {
CHECK(data_ != nullptr);
CHECK(other.data_ != nullptr);
CopyFromTo(&(data_->dl_tensor), &(other.data_->dl_tensor));
}

inline NDArray NDArray::CopyTo(const DLContext& ctx) const {
CHECK(data_ != nullptr);
const DLTensor* dptr = operator->();
NDArray ret = Empty(std::vector<int64_t>(dptr->shape, dptr->shape + dptr->ndim),
dptr->dtype, ctx);
this->CopyTo(ret);
return ret;
}

inline int NDArray::use_count() const {
if (data_ == nullptr) return 0;
return data_->ref_counter_.load(std::memory_order_relaxed);
Expand All @@ -280,7 +314,106 @@ inline const DLTensor* NDArray::operator->() const {
return &(data_->dl_tensor);
}

/*! \brief Magic number for NDArray file */
constexpr uint64_t kTVMNDArrayMagic = 0xDD5E40F096B4A13F;

inline bool SaveDLTensor(dmlc::Stream* strm,
DLTensor* tensor) {
uint64_t header = kTVMNDArrayMagic, reserved = 0;
strm->Write(header);
strm->Write(reserved);
// Always save data as CPU context
//
// Parameters that get serialized should be in CPU by default.
// So even the array's context is GPU, it will be stored as CPU array.
// This is used to prevent case when another user loads the parameters
// back on machine that do not have GPU or related context.
//
// We can always do array.CopyTo(target_ctx) to get a corresponding
// array in the target context.
DLContext cpu_ctx;
cpu_ctx.device_type = kDLCPU;
cpu_ctx.device_id = 0;
strm->Write(cpu_ctx);
strm->Write(tensor->ndim);
strm->Write(tensor->dtype);
int ndim = tensor->ndim;
strm->WriteArray(tensor->shape, ndim);
int type_bytes = tensor->dtype.bits / 8;
int64_t num_elems = 1;
for (int i = 0; i < ndim; ++i) {
num_elems *= tensor->shape[i];
}
int64_t data_byte_size = type_bytes * num_elems;
strm->Write(data_byte_size);

if (DMLC_IO_NO_ENDIAN_SWAP &&
tensor->ctx.device_type == kDLCPU &&
tensor->strides == nullptr &&
tensor->byte_offset == 0) {
// quick path
strm->Write(tensor->data, data_byte_size);
} else {
std::vector<uint8_t> bytes(data_byte_size);
CHECK_EQ(TVMArrayCopyToBytes(
tensor, dmlc::BeginPtr(bytes), data_byte_size), 0)
<< TVMGetLastError();
if (!DMLC_IO_NO_ENDIAN_SWAP) {
dmlc::ByteSwap(dmlc::BeginPtr(bytes), type_bytes, num_elems);
}
strm->Write(dmlc::BeginPtr(bytes), data_byte_size);
}
return true;
}

inline void NDArray::Save(dmlc::Stream* strm) const {
SaveDLTensor(strm, const_cast<DLTensor*>(operator->()));
}

inline bool NDArray::Load(dmlc::Stream* strm) {
uint64_t header, reserved;
CHECK(strm->Read(&header))
<< "Invalid DLTensor file format";
CHECK(strm->Read(&reserved))
<< "Invalid DLTensor file format";
CHECK(header == kTVMNDArrayMagic)
<< "Invalid DLTensor file format";
DLContext ctx;
int ndim;
DLDataType dtype;
CHECK(strm->Read(&ctx))
<< "Invalid DLTensor file format";
CHECK(strm->Read(&ndim))
<< "Invalid DLTensor file format";
CHECK(strm->Read(&dtype))
<< "Invalid DLTensor file format";
CHECK_EQ(ctx.device_type, kDLCPU)
<< "Invalid DLTensor context: can only save as CPU tensor";
std::vector<int64_t> shape(ndim);
if (ndim != 0) {
CHECK(strm->ReadArray(&shape[0], ndim))
<< "Invalid DLTensor file format";
}
NDArray ret = NDArray::Empty(shape, dtype, ctx);
int64_t num_elems = 1;
int elem_bytes = (ret->dtype.bits + 7) / 8;
for (int i = 0; i < ret->ndim; ++i) {
num_elems *= ret->shape[i];
}
int64_t data_byte_size;
CHECK(strm->Read(&data_byte_size))
<< "Invalid DLTensor file format";
CHECK(data_byte_size == num_elems * elem_bytes)
<< "Invalid DLTensor file format";
CHECK(strm->Read(ret->data, data_byte_size))
<< "Invalid DLTensor file format";
if (!DMLC_IO_NO_ENDIAN_SWAP) {
dmlc::ByteSwap(ret->data, elem_bytes, num_elems);
}
*this = ret;
return true;
}

} // namespace runtime
} // namespace tvm

#endif // TVM_RUNTIME_NDARRAY_H_
1 change: 1 addition & 0 deletions include/tvm/runtime/serializer.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <dmlc/io.h>
#include <dmlc/serializer.h>
#include "./c_runtime_api.h"
#include "./ndarray.h"

namespace dmlc {
namespace serializer {
Expand Down
12 changes: 2 additions & 10 deletions nnvm/python/nnvm/compiler/param_dict.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# pylint: disable=invalid-name
"""Helper utility to save parameter dict"""
import ctypes
import tvm
from tvm._ffi.runtime_ctypes import TVMArrayHandle

_save_param_dict = tvm.get_global_func("nnvm.compiler._save_param_dict")
_load_param_dict = tvm.get_global_func("nnvm.compiler._load_param_dict")
Expand Down Expand Up @@ -59,11 +57,5 @@ def load_param_dict(param_bytes):
"""
if isinstance(param_bytes, (bytes, str)):
param_bytes = bytearray(param_bytes)
load_mod = _load_param_dict(param_bytes)
size = load_mod(0)
param_dict = {}
for i in range(size):
key = load_mod(1, i)
dltensor_handle = ctypes.cast(load_mod(2, i), TVMArrayHandle)
param_dict[key] = tvm.nd.NDArray(dltensor_handle, False)
return param_dict
load_arr = _load_param_dict(param_bytes)
return {v.name : v.array for v in load_arr}
109 changes: 12 additions & 97 deletions nnvm/src/compiler/graph_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@
* \brief Interface code with TVM graph runtime.
*/
#include <dmlc/memory_io.h>
#include <tvm/runtime/packed_func.h>
#include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/registry.h>
#include <tvm/runtime/serializer.h>
#include "./graph_runtime.h"

namespace nnvm {
Expand Down Expand Up @@ -37,81 +33,6 @@ NNVM_REGISTER_OP(tvm_op)
return param.num_outputs;
});

bool SaveDLTensor(dmlc::Stream* strm, DLTensor* tensor) {
uint64_t header = kTVMNDArrayMagic, reserved = 0;
strm->Write(header);
strm->Write(reserved);
strm->Write(tensor->ctx);
strm->Write(tensor->ndim);
strm->Write(tensor->dtype);
int ndim = tensor->ndim;
strm->WriteArray(tensor->shape, ndim);

int type_bytes = tensor->dtype.bits / 8;
int64_t num_elems = 1;
for (int i = 0; i < ndim; ++i) {
num_elems *= tensor->shape[i];
}
int64_t data_byte_size = type_bytes * num_elems;
strm->Write(data_byte_size);
// handle endianness of data correctly.
if (DMLC_IO_NO_ENDIAN_SWAP) {
strm->Write(tensor->data, data_byte_size);
} else {
uint8_t* dptr = reinterpret_cast<uint8_t*>(tensor->data);
std::vector<uint8_t> bytes(dptr, dptr + data_byte_size);
dmlc::ByteSwap(dmlc::BeginPtr(bytes), type_bytes, num_elems);
strm->Write(dmlc::BeginPtr(bytes), data_byte_size);
}
return true;
}

DLTensor* LoadDLTensor(dmlc::Stream* strm) {
uint64_t header, reserved;
CHECK(strm->Read(&header))
<< "Invalid DLTensor file format";
CHECK(strm->Read(&reserved))
<< "Invalid DLTensor file format";
CHECK(header == kTVMNDArrayMagic)
<< "Invalid DLTensor file format";
DLTensor tensor;
CHECK(strm->Read(&(tensor.ctx)))
<< "Invalid DLTensor file format";
CHECK(strm->Read(&(tensor.ndim)))
<< "Invalid DLTensor file format";
CHECK(strm->Read(&(tensor.dtype)))
<< "Invalid DLTensor file format";
std::vector<int64_t> shape(tensor.ndim);
if (tensor.ndim != 0) {
CHECK(strm->ReadArray(&shape[0], tensor.ndim))
<< "Invalid DLTensor file format";
}
DLTensor* ret;
CHECK_EQ(TVMArrayAlloc(shape.data(),
tensor.ndim,
tensor.dtype.code,
tensor.dtype.bits,
tensor.dtype.lanes,
static_cast<int>(tensor.ctx.device_type),
tensor.ctx.device_id,
&ret), 0) << TVMGetLastError();
int64_t num_elems = 1;
int elem_bytes = (ret->dtype.bits + 7) / 8;
for (int i = 0; i < ret->ndim; ++i) {
num_elems *= ret->shape[i];
}
int64_t data_byte_size;
CHECK(strm->Read(&data_byte_size))
<< "Invalid DLTensor file format";
CHECK(data_byte_size == num_elems * elem_bytes)
<< "Invalid DLTensor file format";
CHECK(strm->Read(ret->data, data_byte_size))
<< "Invalid DLTensor file format";
if (!DMLC_IO_NO_ENDIAN_SWAP) {
dmlc::ByteSwap(ret->data, elem_bytes, num_elems);
}
return ret;
}

TVM_REGISTER_GLOBAL("nnvm.compiler._save_param_dict")
.set_body([](TVMArgs args, TVMRetValue *rv) {
Expand All @@ -136,7 +57,7 @@ TVM_REGISTER_GLOBAL("nnvm.compiler._save_param_dict")
uint64_t sz = static_cast<uint64_t>(arrays.size());
fo->Write(sz);
for (size_t i = 0; i < sz; ++i) {
SaveDLTensor(fo, arrays[i]);
tvm::runtime::SaveDLTensor(fo, arrays[i]);
}
}
TVMByteArray arr;
Expand All @@ -149,11 +70,9 @@ TVM_REGISTER_GLOBAL("nnvm.compiler._save_param_dict")
TVM_REGISTER_GLOBAL("nnvm.compiler._load_param_dict")
.set_body([](TVMArgs args, TVMRetValue *rv) {
std::string bytes = args[0];
std::vector<DLTensor*> data;
std::vector<std::string> names;
dmlc::MemoryStringStream memstrm(&bytes);
dmlc::Stream* strm = &memstrm;

uint64_t header, reserved;
CHECK(strm->Read(&header))
<< "Invalid parameters file format";
Expand All @@ -168,23 +87,19 @@ TVM_REGISTER_GLOBAL("nnvm.compiler._load_param_dict")
size_t size = static_cast<size_t>(sz);
CHECK(size == names.size())
<< "Invalid parameters file format";
tvm::Array<NDArrayWrapper> ret;
for (size_t i = 0; i < size; ++i) {
data.push_back(LoadDLTensor(strm));
tvm::runtime::NDArray temp;
temp.Load(strm);
std::shared_ptr<NDArrayWrapperNode> n
= std::make_shared<NDArrayWrapperNode>();
n->name = std::move(names[i]);
n->array = temp;
ret.push_back(NDArrayWrapper(n));
}
auto packed = [data, names](TVMArgs args, TVMRetValue* rv) {
int code = args[0];
if (code == 0) {
*rv = static_cast<int64_t>(data.size());
} else if (code == 1) {
int index = args[1];
*rv = names[index];
} else {
CHECK_EQ(code, 2);
int index = args[1];
*rv = static_cast<void*>(data[index]);
}
};
*rv = PackedFunc(packed);
*rv = ret;
});

TVM_REGISTER_NODE_TYPE(NDArrayWrapperNode);
} // namespace compiler
} // namespace nnvm
Loading

0 comments on commit 07e3888

Please sign in to comment.