From b2ad3022a29656dc7f2c607c70d0102e19d0c200 Mon Sep 17 00:00:00 2001 From: Chris Olivier Date: Wed, 9 Aug 2017 08:59:23 -0700 Subject: [PATCH] Allocate temp data on the fly for some casting operations (#149) --- src/operator/tensor/cast_storage-inl.cuh | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/operator/tensor/cast_storage-inl.cuh b/src/operator/tensor/cast_storage-inl.cuh index 027c29d1ff25..1a47b68f7aac 100644 --- a/src/operator/tensor/cast_storage-inl.cuh +++ b/src/operator/tensor/cast_storage-inl.cuh @@ -207,6 +207,15 @@ struct FillRspValsKernel { } }; +template +inline mshadow::Tensor AllocateTempDataForCast(const OpContext& op_ctx, + const mshadow::Shape& shape) { + Resource rsc = ResourceManager::Get()->Request(op_ctx.run_ctx.ctx, + ResourceRequest(ResourceRequest::kTempSpace)); + mshadow::Stream *stream = op_ctx.run_ctx.get_stream(); + return rsc.get_space_typed(shape, stream); +}; + /*! * \brief GPU implementation of casting a dns tensor to rsp type. */ @@ -245,8 +254,8 @@ inline void CastStorageDnsRspImpl(const OpContext& ctx, mshadow::Stream::GetStream(s)); // Allocate temp storage for marking non-zero rows and for cub's prefix sum - mshadow::Tensor workspace = ctx.requested[0] - .get_space_typed(Shape1(num_rows*sizeof(RType)+temp_storage_bytes), s); + auto workspace = AllocateTempDataForCast(ctx, Shape1(num_rows*sizeof(RType) + + temp_storage_bytes)); row_flg = reinterpret_cast(workspace.dptr_); d_temp_storage = workspace.dptr_ + num_rows*sizeof(RType); @@ -652,8 +661,8 @@ inline void CastStorageDnsCsrImpl(const OpContext& ctx, mshadow::Stream::GetStream(s)); // Allocate temporary storage - mshadow::Tensor workspace = ctx.requested[0] - .get_space_typed(Shape1(temp_storage_bytes), s); + auto workspace = AllocateTempDataForCast(ctx, Shape1(temp_storage_bytes)); + d_temp_storage = workspace.dptr_; // Compute indptr through inclusive prefix sum