forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
RReLU.cu
122 lines (103 loc) · 2.63 KB
/
RReLU.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#include <THCUNN/THCUNN.h>
#include <TH/THHalf.h>
#include <THCUNN/THCHalfAutoNumerics.cuh>
#include <THC/THCApply.cuh>
#include <THCUNN/common.h>
#include <curand.h>
#include <curand_kernel.h>
// copied from cutorch/lib/THC/THCTensorRandom.cu
#define MAX_NUM_BLOCKS 64
#define BLOCK_SIZE 256
#define NUM_BLOCKS(n) min((int)THCCeilDiv(n, (ptrdiff_t) BLOCK_SIZE), MAX_NUM_BLOCKS)
template<typename T>
inline T __device__ curand_uniform_type(curandStateMtgp32 *state);
template <>
inline THHalf __device__ curand_uniform_type<THHalf>(curandStateMtgp32 *state) {
return ScalarConvert<float, THHalf>::to(curand_uniform(state));
}
template <>
inline float __device__ curand_uniform_type<float>(curandStateMtgp32 *state) {
return curand_uniform(state);
}
template <>
inline double __device__ curand_uniform_type<double>(curandStateMtgp32 *state) {
return curand_uniform_double(state);
}
template <typename T>
__global__ void rreluUpdateOutputTrain(int n, curandStateMtgp32 *state,
T *input, T* noise, T *output, double a, double b)
{
CUDA_KERNEL_LOOP(i, n)
{
if (input[i] <= 0)
{
T r = curand_uniform_type<T>(&state[blockIdx.x]);
r = ScalarConvert<double, T>::to(r * (b-a) + a);
output[i] = input[i] * r;
noise[i] = r;
}
else
{
output[i] = input[i];
noise[i] = ScalarConvert<int, T>::to(1);
}
}
}
template <typename T>
struct RReLUUpdateOutputEval_functor
{
const T negSlope_;
RReLUUpdateOutputEval_functor(T negSlope)
: negSlope_(negSlope)
{}
__device__ __forceinline__ void operator()(T *out, T *in)
{
const T x = *in;
const T r = x <= 0 ? negSlope_ : ScalarConvert<int, T>::to(1);
*out = x * r;
}
};
template <typename T>
struct RReLUUpdateOutputEvalIP_functor
{
const T negSlope_;
RReLUUpdateOutputEvalIP_functor(T negSlope)
: negSlope_(negSlope)
{}
__device__ __forceinline__ void operator()(T *x)
{
if (*x <= 0)
{
*x = *x * negSlope_;
}
}
};
template <typename T>
struct RReLUupdateGradInputEval_functor
{
const T negSlope_;
RReLUupdateGradInputEval_functor(T negSlope)
: negSlope_(negSlope)
{}
__device__ __forceinline__ void operator()(T *gradIn, T *gradOut, T *in)
{
*gradIn = (*in) <= 0 ? (*gradOut) * negSlope_ : (*gradOut);
}
};
template <typename T>
struct RReLUupdateGradInputEvalIP_functor
{
const T negSlope_;
RReLUupdateGradInputEvalIP_functor(T negSlope)
: negSlope_(negSlope)
{}
__device__ __forceinline__ void operator()(T *gradOut, T *in)
{
if (*in <= 0)
{
*gradOut = (*gradOut) * negSlope_;
}
}
};
#include <THCUNN/generic/RReLU.cu>
#include <THC/THCGenerateFloatTypes.h>