forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
VolumetricMaxUnpooling.cu
57 lines (51 loc) · 1.91 KB
/
VolumetricMaxUnpooling.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#include "THCUNN.h"
#include "THCTensor.hpp"
#include "common.h"
#include "THCDeviceTensor.cuh"
#include "THCDeviceTensorUtils.cuh"
#include "THCDeviceUtils.cuh"
#include "TH/THHalf.h"
#include "THCHalfAutoNumerics.cuh"
#include <cfloat>
template <typename Dtype>
__global__ void cuda_VolumetricMaxUnpooling_updateOutput(
THCDeviceTensor<Dtype, 4> input,
THCDeviceTensor<THCIndex_t, 4> indices,
Dtype* outputData,
int oT, int oH, int oW,
int dT, int dH, int dW,
int padT, int padH, int padW, int offsetZ)
{
int64_t iColumn = blockIdx.x * blockDim.x + threadIdx.x;
int64_t iRow = blockIdx.y * blockDim.y + threadIdx.y;
int64_t iFrame = (blockIdx.z + offsetZ) % input.getSize(1); // intput frame/time
int64_t slice = (blockIdx.z + offsetZ) / input.getSize(1); // intput slice/feature
if (iRow < input.getSize(2) && iColumn < input.getSize(3))
{
Dtype val = input[slice][iFrame][iRow][iColumn];
int64_t index = indices[slice][iFrame][iRow][iColumn];
outputData[slice*oT*oH*oW + index] = val;
}
}
template <typename Dtype>
__global__ void cuda_VolumetricMaxUnpooling_updateGradInput(
Dtype* gradOutputData,
int oT, int oH, int oW,
THCDeviceTensor<THCIndex_t, 4> indices,
THCDeviceTensor<Dtype, 4> gradInput,
int dT, int dH, int dW,
int padT, int padH, int padW, int offsetZ)
{
int iColumn = blockIdx.x * blockDim.x + threadIdx.x;
int iRow = blockIdx.y * blockDim.y + threadIdx.y;
int iFrame = (blockIdx.z + offsetZ) % gradInput.getSize(1); // output frame/time
int slice = (blockIdx.z + offsetZ) / gradInput.getSize(1); // output slice/feature
if (iRow < gradInput.getSize(2) && iColumn < gradInput.getSize(3))
{
int64_t index = indices[slice][iFrame][iRow][iColumn];
Dtype grad_val = gradOutputData[slice*oT*oH*oW + index];
gradInput[slice][iFrame][iRow][iColumn] = grad_val;
}
}
#include "generic/VolumetricMaxUnpooling.cu"
#include "THCGenerateFloatTypes.h"