Skip to content

Commit

Permalink
fix the compiler error when gcc4 cuda9.0 (#29997)
Browse files Browse the repository at this point in the history
  • Loading branch information
wangchaochaohu authored Jan 5, 2021
1 parent 1fa863d commit d0a5620
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions paddle/fluid/operators/elementwise/elementwise_add_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ __global__ void MatrixColReduce(const T *__restrict__ in, T *__restrict__ out,
}
}

#if CUDA_VERSION >= 10000
template <int SIZE>
__global__ void VecFP16MatrixColReduce(const __half2 *__restrict__ in,
__half2 *__restrict__ out, size_t width,
Expand All @@ -199,6 +200,7 @@ __global__ void VecFP16MatrixColReduce(const __half2 *__restrict__ in,
}
#endif
}
#endif

template <typename T>
__global__ void MatrixReduceLongWidth(const T *__restrict__ in, T *out,
Expand Down Expand Up @@ -365,6 +367,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> {
int max_blocks = std::max(max_physical_threads / (block_x * block_y), 1);
int theory_block = (width + blocks.x - 1) / blocks.x;
dim3 grids(std::min(theory_block, max_blocks));
#if CUDA_VERSION >= 10000
if (std::is_same<T, paddle::platform::float16>::value && width < 2048 &&
width % 2 == 0 && height % 64 == 0) {
auto &dev_ctx =
Expand All @@ -382,6 +385,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> {
width, height);
return;
}
#endif

if (width / height < 32) {
MatrixColReduce<T, block_x, block_y><<<grids, blocks, 0, stream>>>(
Expand Down

0 comments on commit d0a5620

Please sign in to comment.