Skip to content

Commit

Permalink
switch over 1-10 num_spatial_axes in {im2col,col2im}_gpu kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffdonahue committed Mar 6, 2015
1 parent d639086 commit 5b75694
Showing 1 changed file with 135 additions and 11 deletions.
146 changes: 135 additions & 11 deletions src/caffe/util/im2col.cu
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,71 @@ void im2col_gpu(const Dtype* data_im, const int num_spatial_axes,
const int num_kernels, const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
Dtype* data_col) {
im2col_gpu_kernel<Dtype, 2> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
kernel_shape, pad, stride, data_col);
switch (num_spatial_axes) {
case 1:
im2col_gpu_kernel<Dtype, 1> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
kernel_shape, pad, stride, data_col);
break;
case 2:
im2col_gpu_kernel<Dtype, 2> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
kernel_shape, pad, stride, data_col);
break;
case 3:
im2col_gpu_kernel<Dtype, 3> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
kernel_shape, pad, stride, data_col);
break;
case 4:
im2col_gpu_kernel<Dtype, 4> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
kernel_shape, pad, stride, data_col);
break;
case 5:
im2col_gpu_kernel<Dtype, 5> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
kernel_shape, pad, stride, data_col);
break;
case 6:
im2col_gpu_kernel<Dtype, 6> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
kernel_shape, pad, stride, data_col);
break;
case 7:
im2col_gpu_kernel<Dtype, 7> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
kernel_shape, pad, stride, data_col);
break;
case 8:
im2col_gpu_kernel<Dtype, 8> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
kernel_shape, pad, stride, data_col);
break;
case 9:
im2col_gpu_kernel<Dtype, 9> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
kernel_shape, pad, stride, data_col);
break;
case 10:
im2col_gpu_kernel<Dtype, 10> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
kernel_shape, pad, stride, data_col);
break;
default:
LOG(FATAL) << "im2col_gpu does not support computation with "
<< num_spatial_axes << " spatial axes";
}
CUDA_POST_KERNEL_CHECK;
}

Expand All @@ -107,7 +168,6 @@ __global__ void col2im_gpu_kernel(const int n, const Dtype* data_col,
int d_col_start[num_axes]; // NOLINT(runtime/arrays)
int d_col_end[num_axes]; // NOLINT(runtime/arrays)
CUDA_KERNEL_LOOP(index, n) {
Dtype val = 0;
// Initialize channel_in, computed in the loop below, with intermediate
// computations used to compute the spatial indices.
int channel_im = index;
Expand All @@ -117,14 +177,16 @@ __global__ void col2im_gpu_kernel(const int n, const Dtype* data_col,
channel_im /= im_shape[i + 1];
}
// Calculate col start/end indices.
bool done = false; // Skip computation if any dims are out of range.
bool done = false;
for (int i = 0; i < num_axes; ++i) {
d_col_start[i] = d_col_iter[i] =
(d_im[i] < kernel_shape[i]) ?
0 : (d_im[i] - kernel_shape[i]) / stride[i] + 1;
d_col_end[i] = min(d_im[i] / stride[i] + 1, col_shape[i + 1]);
if (d_col_start[i] >= d_col_end[i]) {
data_im[index] = val;
// Skip computation if the dimension is 0 at any spatial axis --
// final val will be 0.
data_im[index] = 0;
done = true;
break; // for (int i = 0; i < num_axes; ++i)
}
Expand All @@ -133,6 +195,7 @@ __global__ void col2im_gpu_kernel(const int n, const Dtype* data_col,
continue; // CUDA_KERNEL_LOOP(index, n)
}
// Loop over the col to compute the output val.
Dtype val = 0;
bool incremented = true;
do {
// Compute the final offset.
Expand Down Expand Up @@ -170,10 +233,71 @@ void col2im_gpu(const Dtype* data_col, const int num_spatial_axes,
const int im_size, const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
Dtype* data_im) {
col2im_gpu_kernel<Dtype, 2> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
kernel_shape, pad, stride, data_im);
switch (num_spatial_axes) {
case 1:
col2im_gpu_kernel<Dtype, 1> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
kernel_shape, pad, stride, data_im);
break;
case 2:
col2im_gpu_kernel<Dtype, 2> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
kernel_shape, pad, stride, data_im);
break;
case 3:
col2im_gpu_kernel<Dtype, 3> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
kernel_shape, pad, stride, data_im);
break;
case 4:
col2im_gpu_kernel<Dtype, 4> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
kernel_shape, pad, stride, data_im);
break;
case 5:
col2im_gpu_kernel<Dtype, 5> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
kernel_shape, pad, stride, data_im);
break;
case 6:
col2im_gpu_kernel<Dtype, 6> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
kernel_shape, pad, stride, data_im);
break;
case 7:
col2im_gpu_kernel<Dtype, 7> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
kernel_shape, pad, stride, data_im);
break;
case 8:
col2im_gpu_kernel<Dtype, 8> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
kernel_shape, pad, stride, data_im);
break;
case 9:
col2im_gpu_kernel<Dtype, 9> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
kernel_shape, pad, stride, data_im);
break;
case 10:
col2im_gpu_kernel<Dtype, 10> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
kernel_shape, pad, stride, data_im);
break;
default:
LOG(FATAL) << "im2col_gpu does not support computation with "
<< num_spatial_axes << " spatial axes";
}
CUDA_POST_KERNEL_CHECK;
}

Expand Down

0 comments on commit 5b75694

Please sign in to comment.