Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

MKL-DNN RNN checks NDArray version #16071

Merged
merged 3 commits into from
Sep 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 21 additions & 58 deletions src/operator/rnn-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -409,10 +409,11 @@ class RNNOp {
std::vector<mkldnn::memory> bias_memory;
std::vector<mkldnn::memory> y_memory;
std::vector<mkldnn::memory> hcy_memory;
size_t weights_version;
bool has_cache;
bool init_mem_;
size_t reserve_mem_size_;
Storage::Handle mem_space_;
NDArray mem_space_;
#endif
explicit RNNOp(RNNParam param, Context ctx) {
this->param_ = param;
Expand Down Expand Up @@ -522,12 +523,6 @@ class RNNOp {
}

~RNNOp() {
#if MXNET_USE_MKLDNN == 1
if (init_mem_) {
Storage::Get()->Free(mem_space_);
init_mem_ = false;
}
#endif // MXNET_USE_MKLDNN
#if MXNET_USE_CUDNN == 1
CUDNN_CALL(cudnnDestroyTensorDescriptor(hx_desc_));
CUDNN_CALL(cudnnDestroyTensorDescriptor(cx_desc_));
Expand Down Expand Up @@ -560,17 +555,6 @@ class RNNOp {
CUDNN_CALL(cudnnDestroyRNNDataDescriptor(dy_data_desc_));
#endif // MXNET_USE_CUDNN_GE_7200
#endif // MXNET_USE_CUDNN

if (ctx_.dev_type == kCPU) {
if (init_space_) {
Storage::Get()->Free(reserve_cpu_space_);
init_space_ = false;
}
if (temp_init_space_) {
Storage::Get()->Free(temp_cpu_space_);
temp_init_space_ = false;
}
}
}

void Forward(const OpContext &ctx, const std::vector<TBlob> &in_data,
Expand Down Expand Up @@ -855,37 +839,30 @@ class RNNOp {
#endif // MXNET_USE_CUDNN == 1 && defined(__CUDACC__)

if (ctx_.dev_type == kCPU) {
// allocate temp space
const size_t work_cpu_space_size = GetRNNWorkspaceSize(param_.seq_length_, param_.batch_size_,
param_.state_size, direction, param_.mode);
if (!temp_init_space_ || temp_cpu_space_size_ < work_cpu_space_size) {
temp_cpu_space_size_ = work_cpu_space_size;
temp_cpu_space_ = NDArray(TShape({static_cast<dim_t>(temp_cpu_space_size_)}), ctx_,
false, in_data[rnn_enum::kData].type_flag_);
temp_init_space_ = true;
}
DType* work_cpu_space = static_cast<DType*>(temp_cpu_space_.data().dptr_);

if (ctx.is_train) {
// allocate temp space
const size_t work_cpu_space_size =
GetRNNWorkspaceSize(param_.seq_length_, param_.batch_size_,
param_.state_size, direction, param_.mode);
if (temp_init_space_ && temp_cpu_space_size_ < work_cpu_space_size) {
Storage::Get()->Free(temp_cpu_space_);
temp_init_space_ = false;
}
if (!temp_init_space_) {
temp_cpu_space_ = Storage::Get()->Alloc
(work_cpu_space_size * sizeof(DType), Context::CPU());
temp_cpu_space_size_ = work_cpu_space_size;
temp_init_space_ = true;
}
DType* work_cpu_space = static_cast<DType*>(temp_cpu_space_.dptr);
// allocate reserve space

const size_t r_size = GetRNNReserveSpaceSize(param_.num_layers, direction,
param_.seq_length_, param_.batch_size_,
param_.state_size, param_.mode);
if (init_space_ && reserve_cpu_space_size_ < r_size) {
Storage::Get()->Free(reserve_cpu_space_);
init_space_ = false;
}
if (!init_space_) {
reserve_cpu_space_ = Storage::Get()->Alloc(r_size * sizeof(DType), Context::CPU());
if (!init_space_ || reserve_cpu_space_size_ < r_size) {
reserve_cpu_space_size_ = r_size;
reserve_cpu_space_ = NDArray(TShape({static_cast<dim_t>(reserve_cpu_space_size_)}), ctx_,
false, in_data[rnn_enum::kData].type_flag_);
init_space_ = true;
}

DType* reserve_space_ptr = static_cast<DType*>(reserve_cpu_space_.dptr);
DType* reserve_space_ptr = static_cast<DType*>(reserve_cpu_space_.data().dptr_);

RNNForwardTraining<DType>(work_cpu_space,
reserve_space_ptr,
Expand Down Expand Up @@ -945,20 +922,6 @@ class RNNOp {
#endif // MXNET_USE_MKLDNN == 1
// Before integrating MKLDNN GRU fp32 inference
// using below code for keep func being OK
const size_t work_cpu_space_size =
GetRNNWorkspaceSize(param_.seq_length_, param_.batch_size_,
param_.state_size, direction, param_.mode);
if (temp_init_space_ && temp_cpu_space_size_ < work_cpu_space_size) {
Storage::Get()->Free(temp_cpu_space_);
temp_init_space_ = false;
}
if (!temp_init_space_) {
temp_cpu_space_ = Storage::Get()->Alloc
(work_cpu_space_size * sizeof(DType), Context::CPU());
temp_cpu_space_size_ = work_cpu_space_size;
temp_init_space_ = true;
}
DType* work_cpu_space = static_cast<DType*>(temp_cpu_space_.dptr);
RNNForwardInference<DType>(work_cpu_space,
param_.state_outputs,
param_.num_layers,
Expand Down Expand Up @@ -1171,7 +1134,7 @@ class RNNOp {
if (!temp_init_space_ || temp_cpu_space_size_ != work_cpu_space_size) {
LOG(FATAL) << "Check temp init error";
}
DType* work_cpu_space = static_cast<DType*>(temp_cpu_space_.dptr);
DType* work_cpu_space = static_cast<DType*>(temp_cpu_space_.data().dptr_);
size_t r_size = GetRNNReserveSpaceSize(param_.num_layers, direction,
param_.seq_length_, param_.batch_size_,
param_.state_size, param_.mode);
Expand All @@ -1180,7 +1143,7 @@ class RNNOp {
LOG(FATAL) << "Check forward init error";
}

DType* reserve_space_ptr = static_cast<DType*>(reserve_cpu_space_.dptr);
DType* reserve_space_ptr = static_cast<DType*>(reserve_cpu_space_.data().dptr_);
RNNBackward<DType>(work_cpu_space,
reserve_space_ptr,
param_.num_layers,
Expand Down Expand Up @@ -1551,7 +1514,7 @@ class RNNOp {
#endif // MXNET_USE_CUDNN
bool init_space_, temp_init_space_;
size_t reserve_cpu_space_size_, temp_cpu_space_size_;
Storage::Handle reserve_cpu_space_, temp_cpu_space_;
NDArray reserve_cpu_space_, temp_cpu_space_;
}; // class RNNOp

static OpStatePtr CreateRNNState(const nnvm::NodeAttrs &attrs,
Expand Down
14 changes: 8 additions & 6 deletions src/operator/rnn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -270,22 +270,24 @@ static void RNNStatefulComputeCPU(const OpStatePtr& state_ptr,

const size_t r_size = GetMKLDNNRNNCacheMemorySize(L, D, T, N, I, H, param.mode);
if (op.init_mem_ && op.reserve_mem_size_ < r_size) {
Storage::Get()->Free(op.mem_space_);
op.init_mem_ = false;
}
const size_t weights_version = inputs[rnn_enum::kParams].version();
if (!op.init_mem_) {
op.mem_space_ = Storage::Get()->Alloc(
r_size * sizeof(DType),
Context::CPU());
op.mem_space_ = NDArray(TShape({static_cast<dim_t>(r_size)}), op.ctx_, false, dtype);
op.reserve_mem_size_ = r_size;
op.init_mem_ = true;
op.has_cache = false;
// Assign weights_version
op.weights_version = weights_version;
}
if (op.has_cache && op.x_memory.size() == 0) {
// Check if NDArray was changed.
if (op.weights_version != weights_version) {
op.has_cache = false;
op.weights_version = weights_version;
}

DType* workptr = static_cast<DType*>(op.mem_space_.dptr);
DType* workptr = static_cast<DType*>(op.mem_space_.data().dptr_);
mkldnn::memory::dims src_layer_tz_0 = {T, N, I};
mkldnn::memory::dims src_layer_tz = {T, N, D * H};
mkldnn::memory::dims dst_layer_tz = {T, N, D * H};
Expand Down
39 changes: 39 additions & 0 deletions tests/python/unittest/test_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,45 @@ def check_rnn_consistency(cell1, cell2, T, N, I, H, grad_req, rtol=1e-2, atol=1e
assert(mod2.get_input_grads()[0] == None)


@with_seed()
@assert_raises_cudnn_not_satisfied(min_version='5.1.10')
def test_rnn_with_new_param():
rnn_modes = ['rnn_relu', 'rnn_tanh', 'gru', 'lstm']
ngates_ = [1, 1, 3, 4]
num_layers, input_size, seq_len, batch_size, state_size = 3, 128, 5, 64, 8
for bidirectional in [False, True]:
directions = 2 if bidirectional else 1
for mode, ngates in zip(rnn_modes, ngates_):
first_layer_size = (input_size * state_size + state_size * state_size + state_size * 2) * ngates
rest_layer_size = (state_size * directions * state_size + state_size * state_size + state_size * 2) \
* ngates * (num_layers - 1)
param_size = (first_layer_size + rest_layer_size) * directions
sym = mx.sym.RNN(mode=mode, num_layers=num_layers, bidirectional=bidirectional,
state_outputs=False, state_size=state_size, name='rnn')

bind_dict = {
'rnn_data': mx.ndarray.random.uniform(low=-1, high=1, shape=(seq_len, batch_size, input_size)),
'rnn_parameters': mx.ndarray.random.uniform(low=-1, high=1, shape=(param_size)),
'rnn_state': mx.ndarray.zeros(shape=(num_layers * directions, batch_size, state_size))
}
if mode == 'lstm':
bind_dict['rnn_state_cell'] = mx.ndarray.zeros(
shape=(num_layers * directions, batch_size, state_size))

ex = sym.bind(default_context(), bind_dict)
ex.forward(is_train=True)
ex01 = ex.output_dict['rnn_output'].asnumpy()
ex.forward(is_train=False)
ex02 = ex.output_dict['rnn_output'].asnumpy()
assert_allclose(ex01, ex02, rtol=1e-2, atol=1e-4)
bind_dict['rnn_parameters'] = mx.ndarray.random.uniform(low=-1, high=1, shape=(param_size))
ex.copy_params_from(bind_dict)
ex.forward(is_train=True)
ex03 = ex.output_dict['rnn_output'].asnumpy()
ex.forward(is_train=False)
ex04 = ex.output_dict['rnn_output'].asnumpy()
assert_allclose(ex03, ex04, rtol=1e-2, atol=1e-4)


@with_seed()
@assert_raises_cudnn_not_satisfied(min_version='5.1.10')
Expand Down