Skip to content

Commit

Permalink
Im2col and Convolution layers have N spatial axes
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffdonahue committed Mar 6, 2015
1 parent fad111a commit dcb0f79
Show file tree
Hide file tree
Showing 16 changed files with 751 additions and 441 deletions.
32 changes: 16 additions & 16 deletions include/caffe/util/im2col.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,28 @@
namespace caffe {

template <typename Dtype>
void im2col_cpu(const Dtype* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, Dtype* data_col);
void im2col_cpu(const Dtype* data_im, const int num_spatial_axes,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
Dtype* data_col);

template <typename Dtype>
void col2im_cpu(const Dtype* data_col, const int channels,
const int height, const int width, const int patch_h, const int patch_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, Dtype* data_im);
void col2im_cpu(const Dtype* data_col, const int num_spatial_axes,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
Dtype* data_im);

template <typename Dtype>
void im2col_gpu(const Dtype* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, Dtype* data_col);
void im2col_gpu(const Dtype* data_im, const int num_spatial_axes,
const int col_size, const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
Dtype* data_col);

template <typename Dtype>
void col2im_gpu(const Dtype* data_col, const int channels,
const int height, const int width, const int patch_h, const int patch_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, Dtype* data_im);
void col2im_gpu(const Dtype* data_col, const int num_spatial_axes,
const int im_size, const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
Dtype* data_im);

} // namespace caffe

Expand Down
75 changes: 56 additions & 19 deletions include/caffe/vision_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,44 +64,66 @@ class BaseConvolutionLayer : public Layer<Dtype> {
// Compute height_out_ and width_out_ from other parameters.
virtual void compute_output_shape() = 0;

int kernel_h_, kernel_w_;
int stride_h_, stride_w_;
/// @brief The spatial dimensions of a filter kernel.
Blob<int> kernel_shape_;
/// @brief The spatial dimensions of the stride.
Blob<int> stride_;
/// @brief The spatial dimensions of the padding.
Blob<int> pad_;
/// @brief The spatial dimensions of the convolution input.
Blob<int> conv_input_shape_;
/// @brief The spatial dimensions of the input.
Blob<int> input_shape_;
/// @brief The spatial dimensions of the col_buffer.
vector<int> col_buffer_shape_;
/// @brief The spatial dimensions of the output.
vector<int> output_shape_;

int num_spatial_axes_;
int bottom_dim_;
int top_dim_;

int channel_axis_;
int num_;
int channels_;
int pad_h_, pad_w_;
int height_, width_;
int group_;
int num_output_;
int height_out_, width_out_;
bool bias_term_;
bool is_1x1_;

private:
// wrap im2col/col2im so we don't have to remember the (long) argument lists
inline void conv_im2col_cpu(const Dtype* data, Dtype* col_buff) {
im2col_cpu(data, conv_in_channels_, conv_in_height_, conv_in_width_,
kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_buff);
im2col_cpu(data, num_spatial_axes_, conv_input_shape_.cpu_data(),
col_buffer_shape_.data(), kernel_shape_.cpu_data(),
pad_.cpu_data(), stride_.cpu_data(), col_buff);
}
inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data) {
col2im_cpu(col_buff, conv_in_channels_, conv_in_height_, conv_in_width_,
kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data);
col2im_cpu(col_buff, num_spatial_axes_, conv_input_shape_.cpu_data(),
col_buffer_shape_.data(), kernel_shape_.cpu_data(),
pad_.cpu_data(), stride_.cpu_data(), data);
}
#ifndef CPU_ONLY
inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) {
im2col_gpu(data, conv_in_channels_, conv_in_height_, conv_in_width_,
kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_buff);
im2col_gpu(data, num_spatial_axes_, num_kernels_im2col_,
conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),
kernel_shape_.gpu_data(), pad_.gpu_data(),
stride_.gpu_data(), col_buff);
}
inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) {
col2im_gpu(col_buff, conv_in_channels_, conv_in_height_, conv_in_width_,
kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data);
col2im_gpu(col_buff, num_spatial_axes_, num_kernels_col2im_,
conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),
kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(),
data);
}
#endif

int num_kernels_im2col_;
int num_kernels_col2im_;
int conv_out_channels_;
int conv_in_channels_;
int conv_out_spatial_dim_;
int conv_in_height_;
int conv_in_width_;
int out_spatial_dim_;
int kernel_dim_;
int weight_offset_;
int col_offset_;
Expand Down Expand Up @@ -285,11 +307,26 @@ class Im2colLayer : public Layer<Dtype> {
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

int kernel_h_, kernel_w_;
int stride_h_, stride_w_;
/// @brief The spatial dimensions of a filter kernel.
Blob<int> kernel_shape_;
/// @brief The spatial dimensions of the stride.
Blob<int> stride_;
/// @brief The spatial dimensions of the padding.
Blob<int> pad_;
/// @brief The (full) shape of the input.
Blob<int>* input_shape_;
/// @brief The (full) shape of the conv input.
Blob<int>* conv_input_shape_;
/// @brief The spatial dimensions of the output col.
vector<int> col_shape_;

int num_spatial_axes_;
int bottom_dim_;
int top_dim_;

int channel_axis_;
int num_;
int channels_;
int height_, width_;
int pad_h_, pad_w_;
};

// Forward declare PoolingLayer and SplitLayer for use in LRNLayer.
Expand Down
Loading

0 comments on commit dcb0f79

Please sign in to comment.