Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
gpu temporary space request alignment
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu committed Jun 16, 2020
1 parent 8fea409 commit c251837
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
1 change: 1 addition & 0 deletions src/operator/tensor/index_update-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <vector>
#include <algorithm>
#include "./index_add-inl.h"
#include "./sort_op.h"
#include "../mxnet_op.h"
#include "../operator_common.h"
#include "../elemwise_op_common.h"
Expand Down
8 changes: 5 additions & 3 deletions src/operator/tensor/index_update.cu
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,15 @@ void IndexUpdateOpBackwardAImpl<gpu>(const OpContext& ctx,
using namespace mshadow;
mshadow::Stream<gpu> *s = ctx.get_stream<gpu>();
MSHADOW_TYPE_SWITCH(grad_a.type_flag_, DType, {
size_t temp_mem_size = ind.shape_.Size() * sizeof(int) +
ograd.shape_.Size() * sizeof(DType);
size_t alignment = std::max(sizeof(DType), sizeof(int32_t));
size_t id_size = PadBytes(sizeof(int32_t) * ind.Size(), alignment);
size_t ograd_size = PadBytes(sizeof(DType) * ograd.Size(), alignment);
size_t temp_mem_size = id_size + ograd_size;
Tensor<gpu, 1, char> temp_mem =
ctx.requested[0].get_space_typed<gpu, 1, char>(Shape1(temp_mem_size), s);
TBlob t_ograd = TBlob(temp_mem.dptr_, ograd.shape_, ograd.dev_mask(),
ograd.type_flag_, ograd.dev_id());
TBlob t_ind = TBlob(temp_mem.dptr_ + ograd.Size() * sizeof(DType), ind.shape_, ind.dev_mask(),
TBlob t_ind = TBlob(temp_mem.dptr_ + ograd_size, ind.shape_, ind.dev_mask(),
mshadow::kInt32, ind.dev_id());
mxnet_op::copy(s, t_ograd, ograd);
mxnet_op::copy(s, t_ind, ind);
Expand Down

0 comments on commit c251837

Please sign in to comment.