Skip to content

Commit

Permalink
Add GPU sqrt functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Patrick Follmann authored and jeffdonahue committed Apr 13, 2017
1 parent fecee6c commit e98023a
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
3 changes: 3 additions & 0 deletions include/caffe/util/math_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,9 @@ void caffe_gpu_log(const int n, const Dtype* a, Dtype* y);
template <typename Dtype>
void caffe_gpu_powx(const int n, const Dtype* a, const Dtype b, Dtype* y);

template <typename Dtype>
void caffe_gpu_sqrt(const int n, const Dtype* a, Dtype* y);

// caffe_gpu_rng_uniform with two arguments generates integers in the range
// [0, UINT_MAX].
void caffe_gpu_rng_uniform(const int n, unsigned int* r);
Expand Down
21 changes: 21 additions & 0 deletions src/caffe/util/math_functions.cu
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,27 @@ void caffe_gpu_powx<double>(const int N, const double* a,
N, a, alpha, y);
}

template <typename Dtype>
__global__ void sqrt_kernel(const int n, const Dtype* a, Dtype* y) {
CUDA_KERNEL_LOOP(index, n) {
y[index] = sqrt(a[index]);
}
}

template <>
void caffe_gpu_sqrt<float>(const int N, const float* a, float* y) {
// NOLINT_NEXT_LINE(whitespace/operators)
sqrt_kernel<float><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
N, a, y);
}

template <>
void caffe_gpu_sqrt<double>(const int N, const double* a, double* y) {
// NOLINT_NEXT_LINE(whitespace/operators)
sqrt_kernel<double><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
N, a, y);
}

DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sign, y[index] = (Dtype(0) < x[index])
- (x[index] < Dtype(0)));
DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sgnbit, y[index] = signbit(x[index]));
Expand Down

0 comments on commit e98023a

Please sign in to comment.