ResNet Implementation for Faster-rcnn #62

twtygqyy · 2016-01-14T01:46:19Z

Recently, I'm trying to combine ResNet network with Faster-rcnn. As the first step, I tried to train a model with ResNet 34 networks without bottleneck architectures. There is no error during training process, however, the detection result is very bad. I believe there is something wrong in my implementation, here is the prototxt I used for training, can anybody offer some help about how should I modify it?

name: "ResNet34"
layer {
  name: 'input-data'
  type: 'Python'
  top: 'data'
  top: 'im_info'
  top: 'gt_boxes'
  python_param {
    module: 'roi_data_layer.layer'
    layer: 'RoIDataLayer'
    param_str: "'num_classes': 2"
  }
}

#conv1 7x7 64 /2
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 7
    pad: 1
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv1_bn"
  type: "BatchNorm"
  bottom: "conv1"
  top: "conv1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv1_relu"
  type: "ReLU"
  bottom: "conv1_bn"
  top: "conv1_bn"
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1_bn"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}

#conv2_1 3x3 64
layer {
  name: "conv2_1_1"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2_1_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_1_1_bn"
  type: "BatchNorm"
  bottom: "conv2_1_1"
  top: "conv2_1_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_1_1_relu"
  type: "ReLU"
  bottom: "conv2_1_1_bn"
  top: "conv2_1_1_bn"
}
layer {
  name: "conv2_1_2"
  type: "Convolution"
  bottom: "conv2_1_1_bn"
  top: "conv2_1_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_1_2_bn"
  type: "BatchNorm"
  bottom: "conv2_1_2"
  top: "conv2_1_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_1_sum"
  type: "Eltwise"
  bottom: "pool1"
  bottom: "conv2_1_2_bn"
  top: "conv2_1_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv2_1_sum_relu"
  type: "ReLU"
  bottom: "conv2_1_sum"
  top: "conv2_1_sum"
}

#conv2_2 3x3 64
layer {
  name: "conv2_2_1"
  type: "Convolution"
  bottom: "conv2_1_sum"
  top: "conv2_2_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_2_1_bn"
  type: "BatchNorm"
  bottom: "conv2_2_1"
  top: "conv2_2_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_2_1_relu"
  type: "ReLU"
  bottom: "conv2_2_1_bn"
  top: "conv2_2_1_bn"
}
layer {
  name: "conv2_2_2"
  type: "Convolution"
  bottom: "conv2_2_1_bn"
  top: "conv2_2_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_2_2_bn"
  type: "BatchNorm"
  bottom: "conv2_2_2"
  top: "conv2_2_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_2_sum"
  type: "Eltwise"
  bottom: "conv2_1_sum"
  bottom: "conv2_2_2_bn"
  top: "conv2_2_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv2_2_sum_relu"
  type: "ReLU"
  bottom: "conv2_2_sum"
  top: "conv2_2_sum"
}

#conv2_3 3x3 64
layer {
  name: "conv2_3_1"
  type: "Convolution"
  bottom: "conv2_2_sum"
  top: "conv2_3_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_3_1_bn"
  type: "BatchNorm"
  bottom: "conv2_3_1"
  top: "conv2_3_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_3_1_relu"
  type: "ReLU"
  bottom: "conv2_3_1_bn"
  top: "conv2_3_1_bn"
}
layer {
  name: "conv2_3_2"
  type: "Convolution"
  bottom: "conv2_3_1_bn"
  top: "conv2_3_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_3_2_bn"
  type: "BatchNorm"
  bottom: "conv2_3_2"
  top: "conv2_3_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_3_sum"
  type: "Eltwise"
  bottom: "conv2_2_sum"
  bottom: "conv2_3_2_bn"
  top: "conv2_3_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv2_3_sum_relu"
  type: "ReLU"
  bottom: "conv2_3_sum"
  top: "conv2_3_sum"
}
layer {
  name: "conv2_proj"
  type: "Convolution"
  bottom: "conv2_3_sum"
  top: "conv2_proj"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 1
    pad: 0
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_proj_bn"
  type: "BatchNorm"
  bottom: "conv2_proj"
  top: "conv2_proj_bn"
  batch_norm_param {
  }
}

#conv3_1 3x3 128
layer {
  name: "conv3_1_1"
  type: "Convolution"
  bottom: "conv2_3_sum"
  top: "conv3_1_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_1_1_bn"
  type: "BatchNorm"
  bottom: "conv3_1_1"
  top: "conv3_1_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_1_1_relu"
  type: "ReLU"
  bottom: "conv3_1_1_bn"
  top: "conv3_1_1_bn"
}
layer {
  name: "conv3_1_2"
  type: "Convolution"
  bottom: "conv3_1_1_bn"
  top: "conv3_1_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_1_2_bn"
  type: "BatchNorm"
  bottom: "conv3_1_2"
  top: "conv3_1_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_1_sum"
  type: "Eltwise"
  bottom: "conv2_proj_bn"
  bottom: "conv3_1_2_bn"
  top: "conv3_1_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv3_1_sum_relu"
  type: "ReLU"
  bottom: "conv3_1_sum"
  top: "conv3_1_sum"
}

#conv3_2 3x3 128
layer {
  name: "conv3_2_1"
  type: "Convolution"
  bottom: "conv3_1_sum"
  top: "conv3_2_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_2_1_bn"
  type: "BatchNorm"
  bottom: "conv3_2_1"
  top: "conv3_2_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_2_1_relu"
  type: "ReLU"
  bottom: "conv3_2_1_bn"
  top: "conv3_2_1_bn"
}
layer {
  name: "conv3_2_2"
  type: "Convolution"
  bottom: "conv3_2_1_bn"
  top: "conv3_2_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_2_2_bn"
  type: "BatchNorm"
  bottom: "conv3_2_2"
  top: "conv3_2_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_2_sum"
  type: "Eltwise"
  bottom: "conv3_1_sum"
  bottom: "conv3_2_2_bn"
  top: "conv3_2_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv3_2_sum_relu"
  type: "ReLU"
  bottom: "conv3_2_sum"
  top: "conv3_2_sum"
}

#conv3_3 3x3 128
layer {
  name: "conv3_3_1"
  type: "Convolution"
  bottom: "conv3_2_sum"
  top: "conv3_3_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_3_1_bn"
  type: "BatchNorm"
  bottom: "conv3_3_1"
  top: "conv3_3_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_3_1_relu"
  type: "ReLU"
  bottom: "conv3_3_1_bn"
  top: "conv3_3_1_bn"
}
layer {
  name: "conv3_3_2"
  type: "Convolution"
  bottom: "conv3_3_1_bn"
  top: "conv3_3_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_3_2_bn"
  type: "BatchNorm"
  bottom: "conv3_3_2"
  top: "conv3_3_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_3_sum"
  type: "Eltwise"
  bottom: "conv3_2_sum"
  bottom: "conv3_3_2_bn"
  top: "conv3_3_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv3_3_sum_relu"
  type: "ReLU"
  bottom: "conv3_3_sum"
  top: "conv3_3_sum"
}

#conv3_4 3x3 128
layer {
  name: "conv3_4_1"
  type: "Convolution"
  bottom: "conv3_3_sum"
  top: "conv3_4_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_4_1_bn"
  type: "BatchNorm"
  bottom: "conv3_4_1"
  top: "conv3_4_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_4_1_relu"
  type: "ReLU"
  bottom: "conv3_4_1_bn"
  top: "conv3_4_1_bn"
}
layer {
  name: "conv3_4_2"
  type: "Convolution"
  bottom: "conv3_4_1_bn"
  top: "conv3_4_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_4_2_bn"
  type: "BatchNorm"
  bottom: "conv3_4_2"
  top: "conv3_4_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_4_sum"
  type: "Eltwise"
  bottom: "conv3_3_sum"
  bottom: "conv3_4_2_bn"
  top: "conv3_4_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv3_4_sum_relu"
  type: "ReLU"
  bottom: "conv3_4_sum"
  top: "conv3_4_sum"
}
layer {
  name: "conv3_proj"
  type: "Convolution"
  bottom: "conv3_4_sum"
  top: "conv3_proj"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 1
    pad: 0
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_proj_bn"
  type: "BatchNorm"
  bottom: "conv3_proj"
  top: "conv3_proj_bn"
  batch_norm_param {
  }
}

#conv4_1 3x3 256
layer {
  name: "conv4_1_1"
  type: "Convolution"
  bottom: "conv3_4_sum"
  top: "conv4_1_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_1_1_bn"
  type: "BatchNorm"
  bottom: "conv4_1_1"
  top: "conv4_1_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_1_1_relu"
  type: "ReLU"
  bottom: "conv4_1_1_bn"
  top: "conv4_1_1_bn"
}
layer {
  name: "conv4_1_2"
  type: "Convolution"
  bottom: "conv4_1_1_bn"
  top: "conv4_1_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_1_2_bn"
  type: "BatchNorm"
  bottom: "conv4_1_2"
  top: "conv4_1_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_1_sum"
  type: "Eltwise"
  bottom: "conv3_proj_bn"
  bottom: "conv4_1_2_bn"
  top: "conv4_1_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_1_sum_relu"
  type: "ReLU"
  bottom: "conv4_1_sum"
  top: "conv4_1_sum"
}

#conv4_2 3x3 256
layer {
  name: "conv4_2_1"
  type: "Convolution"
  bottom: "conv4_1_sum"
  top: "conv4_2_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_2_1_bn"
  type: "BatchNorm"
  bottom: "conv4_2_1"
  top: "conv4_2_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_2_1_relu"
  type: "ReLU"
  bottom: "conv4_2_1_bn"
  top: "conv4_2_1_bn"
}
layer {
  name: "conv4_2_2"
  type: "Convolution"
  bottom: "conv4_2_1_bn"
  top: "conv4_2_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_2_2_bn"
  type: "BatchNorm"
  bottom: "conv4_2_2"
  top: "conv4_2_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_2_sum"
  type: "Eltwise"
  bottom: "conv4_1_sum"
  bottom: "conv4_2_2_bn"
  top: "conv4_2_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_2_sum_relu"
  type: "ReLU"
  bottom: "conv4_2_sum"
  top: "conv4_2_sum"
}

#conv4_3 3x3 256
layer {
  name: "conv4_3_1"
  type: "Convolution"
  bottom: "conv4_2_sum"
  top: "conv4_3_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_3_1_bn"
  type: "BatchNorm"
  bottom: "conv4_3_1"
  top: "conv4_3_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_3_1_relu"
  type: "ReLU"
  bottom: "conv4_3_1_bn"
  top: "conv4_3_1_bn"
}
layer {
  name: "conv4_3_2"
  type: "Convolution"
  bottom: "conv4_3_1_bn"
  top: "conv4_3_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_3_2_bn"
  type: "BatchNorm"
  bottom: "conv4_3_2"
  top: "conv4_3_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_3_sum"
  type: "Eltwise"
  bottom: "conv4_2_sum"
  bottom: "conv4_3_2_bn"
  top: "conv4_3_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_3_sum_relu"
  type: "ReLU"
  bottom: "conv4_3_sum"
  top: "conv4_3_sum"
}

#conv4_4 3x3 256
layer {
  name: "conv4_4_1"
  type: "Convolution"
  bottom: "conv4_3_sum"
  top: "conv4_4_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_4_1_bn"
  type: "BatchNorm"
  bottom: "conv4_4_1"
  top: "conv4_4_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_4_1_relu"
  type: "ReLU"
  bottom: "conv4_4_1_bn"
  top: "conv4_4_1_bn"
}
layer {
  name: "conv4_4_2"
  type: "Convolution"
  bottom: "conv4_4_1_bn"
  top: "conv4_4_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_4_2_bn"
  type: "BatchNorm"
  bottom: "conv4_4_2"
  top: "conv4_4_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_4_sum"
  type: "Eltwise"
  bottom: "conv4_3_sum"
  bottom: "conv4_4_2_bn"
  top: "conv4_4_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_4_sum_relu"
  type: "ReLU"
  bottom: "conv4_4_sum"
  top: "conv4_4_sum"
}

#conv4_5 3x3 256
layer {
  name: "conv4_5_1"
  type: "Convolution"
  bottom: "conv4_4_sum"
  top: "conv4_5_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_5_1_bn"
  type: "BatchNorm"
  bottom: "conv4_5_1"
  top: "conv4_5_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_5_1_relu"
  type: "ReLU"
  bottom: "conv4_5_1_bn"
  top: "conv4_5_1_bn"
}
layer {
  name: "conv4_5_2"
  type: "Convolution"
  bottom: "conv4_5_1_bn"
  top: "conv4_5_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_5_2_bn"
  type: "BatchNorm"
  bottom: "conv4_5_2"
  top: "conv4_5_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_5_sum"
  type: "Eltwise"
  bottom: "conv4_4_sum"
  bottom: "conv4_5_2_bn"
  top: "conv4_5_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_5_sum_relu"
  type: "ReLU"
  bottom: "conv4_5_sum"
  top: "conv4_5_sum"
}

#conv4_6 3x3 256
layer {
  name: "conv4_6_1"
  type: "Convolution"
  bottom: "conv4_5_sum"
  top: "conv4_6_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_6_1_bn"
  type: "BatchNorm"
  bottom: "conv4_6_1"
  top: "conv4_6_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_6_1_relu"
  type: "ReLU"
  bottom: "conv4_6_1_bn"
  top: "conv4_6_1_bn"
}
layer {
  name: "conv4_6_2"
  type: "Convolution"
  bottom: "conv4_6_1_bn"
  top: "conv4_6_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_6_2_bn"
  type: "BatchNorm"
  bottom: "conv4_6_2"
  top: "conv4_6_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_6_sum"
  type: "Eltwise"
  bottom: "conv4_5_sum"
  bottom: "conv4_6_2_bn"
  top: "conv4_6_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_6_sum_relu"
  type: "ReLU"
  bottom: "conv4_6_sum"
  top: "conv4_6_sum"
}
layer {
  name: "conv4_proj"
  type: "Convolution"
  bottom: "conv4_6_sum"
  top: "conv4_proj"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 1
    pad: 0
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_proj_bn"
  type: "BatchNorm"
  bottom: "conv4_proj"
  top: "conv4_proj_bn"
  batch_norm_param {
  }
}

#conv5_1 3x3 512
layer {
  name: "conv5_1_1"
  type: "Convolution"
  bottom: "conv4_6_sum"
  top: "conv5_1_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_1_1_bn"
  type: "BatchNorm"
  bottom: "conv5_1_1"
  top: "conv5_1_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_1_1_relu"
  type: "ReLU"
  bottom: "conv5_1_1_bn"
  top: "conv5_1_1_bn"
}
layer {
  name: "conv5_1_2"
  type: "Convolution"
  bottom: "conv5_1_1_bn"
  top: "conv5_1_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_1_2_bn"
  type: "BatchNorm"
  bottom: "conv5_1_2"
  top: "conv5_1_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_1_sum"
  type: "Eltwise"
  bottom: "conv4_proj_bn"
  bottom: "conv5_1_2_bn"
  top: "conv5_1_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv5_1_sum_relu"
  type: "ReLU"
  bottom: "conv5_1_sum"
  top: "conv5_1_sum"
}

#conv5_2 3x3 512
layer {
  name: "conv5_2_1"
  type: "Convolution"
  bottom: "conv5_1_sum"
  top: "conv5_2_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_2_1_bn"
  type: "BatchNorm"
  bottom: "conv5_2_1"
  top: "conv5_2_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_2_1_relu"
  type: "ReLU"
  bottom: "conv5_2_1_bn"
  top: "conv5_2_1_bn"
}
layer {
  name: "conv5_2_2"
  type: "Convolution"
  bottom: "conv5_2_1_bn"
  top: "conv5_2_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_2_2_bn"
  type: "BatchNorm"
  bottom: "conv5_2_2"
  top: "conv5_2_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_2_sum"
  type: "Eltwise"
  bottom: "conv5_1_sum"
  bottom: "conv5_2_2_bn"
  top: "conv5_2_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv5_2_sum_relu"
  type: "ReLU"
  bottom: "conv5_2_sum"
  top: "conv5_2_sum"
}

#conv5_3 3x3 512
layer {
  name: "conv5_3_1"
  type: "Convolution"
  bottom: "conv5_2_sum"
  top: "conv5_3_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_3_1_bn"
  type: "BatchNorm"
  bottom: "conv5_3_1"
  top: "conv5_3_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_3_1_relu"
  type: "ReLU"
  bottom: "conv5_3_1_bn"
  top: "conv5_3_1_bn"
}
layer {
  name: "conv5_3_2"
  type: "Convolution"
  bottom: "conv5_3_1_bn"
  top: "conv5_3_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_3_2_bn"
  type: "BatchNorm"
  bottom: "conv5_3_2"
  top: "conv5_3_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_3_sum"
  type: "Eltwise"
  bottom: "conv5_2_sum"
  bottom: "conv5_3_2_bn"
  top: "conv5_3_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv5_3_sum_relu"
  type: "ReLU"
  bottom: "conv5_3_sum"
  top: "conv5_3_sum"
}

#========= RPN ============

layer {
  name: "rpn_conv/3x3"
  type: "Convolution"
  bottom: "conv5_3_sum"
  top: "rpn/output"
  param { lr_mult: 1.0 }
  param { lr_mult: 2.0 }
  convolution_param {
    num_output: 512
    kernel_size: 3 pad: 1 stride: 1
    weight_filler { type: "gaussian" std: 0.01 }
    bias_filler { type: "constant" value: 0 }
  }
}
layer {
  name: "rpn_relu/3x3"
  type: "ReLU"
  bottom: "rpn/output"
  top: "rpn/output"
}

layer {
  name: "rpn_cls_score"
  type: "Convolution"
  bottom: "rpn/output"
  top: "rpn_cls_score"
  param { lr_mult: 1.0 }
  param { lr_mult: 2.0 }
  convolution_param {
    num_output: 18   # 2(bg/fg) * 9(anchors)
    kernel_size: 1 pad: 0 stride: 1
    weight_filler { type: "gaussian" std: 0.01 }
    bias_filler { type: "constant" value: 0 }
  }
}

layer {
  name: "rpn_bbox_pred"
  type: "Convolution"
  bottom: "rpn/output"
  top: "rpn_bbox_pred"
  param { lr_mult: 1.0 }
  param { lr_mult: 2.0 }
  convolution_param {
    num_output: 36   # 4 * 9(anchors)
    kernel_size: 1 pad: 0 stride: 1
    weight_filler { type: "gaussian" std: 0.01 }
    bias_filler { type: "constant" value: 0 }
  }
}

layer {
   bottom: "rpn_cls_score"
   top: "rpn_cls_score_reshape"
   name: "rpn_cls_score_reshape"
   type: "Reshape"
   reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
}

layer {
  name: 'rpn-data'
  type: 'Python'
  bottom: 'rpn_cls_score'
  bottom: 'gt_boxes'
  bottom: 'im_info'
  bottom: 'data'
  top: 'rpn_labels'
  top: 'rpn_bbox_targets'
  top: 'rpn_bbox_inside_weights'
  top: 'rpn_bbox_outside_weights'
  python_param {
    module: 'rpn.anchor_target_layer'
    layer: 'AnchorTargetLayer'
    param_str: "'feat_stride': 16"
  }
}

layer {
  name: "rpn_loss_cls"
  type: "SoftmaxWithLoss"
  bottom: "rpn_cls_score_reshape"
  bottom: "rpn_labels"
  propagate_down: 1
  propagate_down: 0
  top: "rpn_cls_loss"
  loss_weight: 1
  loss_param {
    ignore_label: -1
    normalize: true
  }
}

layer {
  name: "rpn_loss_bbox"
  type: "SmoothL1Loss"
  bottom: "rpn_bbox_pred"
  bottom: "rpn_bbox_targets"
  bottom: 'rpn_bbox_inside_weights'
  bottom: 'rpn_bbox_outside_weights'
  top: "rpn_loss_bbox"
  loss_weight: 1
  smooth_l1_loss_param { sigma: 3.0 }
}

#========= RoI Proposal ============

layer {
  name: "rpn_cls_prob"
  type: "Softmax"
  bottom: "rpn_cls_score_reshape"
  top: "rpn_cls_prob"
}

layer {
  name: 'rpn_cls_prob_reshape'
  type: 'Reshape'
  bottom: 'rpn_cls_prob'
  top: 'rpn_cls_prob_reshape'
  reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
}

layer {
  name: 'proposal'
  type: 'Python'
  bottom: 'rpn_cls_prob_reshape'
  bottom: 'rpn_bbox_pred'
  bottom: 'im_info'
  top: 'rpn_rois'
#  top: 'rpn_scores'
  python_param {
    module: 'rpn.proposal_layer'
    layer: 'ProposalLayer'
    param_str: "'feat_stride': 16"
  }
}

#layer {
#  name: 'debug-data'
#  type: 'Python'
#  bottom: 'data'
#  bottom: 'rpn_rois'
#  bottom: 'rpn_scores'
#  python_param {
#    module: 'rpn.debug_layer'
#    layer: 'RPNDebugLayer'
#  }
#}

layer {
  name: 'roi-data'
  type: 'Python'
  bottom: 'rpn_rois'
  bottom: 'gt_boxes'
  top: 'rois'
  top: 'labels'
  top: 'bbox_targets'
  top: 'bbox_inside_weights'
  top: 'bbox_outside_weights'
  python_param {
    module: 'rpn.proposal_target_layer'
    layer: 'ProposalTargetLayer'
    param_str: "'num_classes': 2"
  }
}

#========= RCNN ============

layer {
  name: "roi_pool5"
  type: "ROIPooling"
  bottom: "conv5_3_sum"
  bottom: "rois"
  top: "pool5"
  roi_pooling_param {
    pooled_w: 7
    pooled_h: 7
    spatial_scale: 0.0625 # 1/16
  }
}
layer {
  name: "cls_score"
  type: "InnerProduct"
  bottom: "pool5"
  top: "cls_score"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "bbox_pred"
  type: "InnerProduct"
  bottom: "pool5"
  top: "bbox_pred"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 8
    weight_filler {
      type: "gaussian"
      std: 0.001
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "loss_cls"
  type: "SoftmaxWithLoss"
  bottom: "cls_score"
  bottom: "labels"
  propagate_down: 1
  propagate_down: 0
  top: "loss_cls"
  loss_weight: 1
}
layer {
  name: "loss_bbox"
  type: "SmoothL1Loss"
  bottom: "bbox_pred"
  bottom: "bbox_targets"
  bottom: "bbox_inside_weights"
  bottom: "bbox_outside_weights"
  top: "loss_bbox"
  loss_weight: 1
}

victorhcm · 2016-02-03T01:35:37Z

Hi @twtygqyy, what training error and validation error are you obtaining for your ResNet?

twtygqyy · 2016-02-04T05:17:16Z

Hi @victorhcm I'm still working on it and I found it is not easy to train a Faster-rcnn model from scratch with ResNet. Fortunately, the author released the pre-trained models yesterday, you can download them from https://github.com/KaimingHe/deep-residual-networks to fine-tune a fater-rcnn model

victorhcm · 2016-02-04T14:51:28Z

Thank you, @twtygqyy!

kshalini · 2016-02-08T16:38:27Z

@twtygqyy
actually 2 questions. is it straight forward to use BVLCNet instead of ZF or VGG and train Faster RCNN? i.e fine tune it with the earlier model weights?

the 2nd question is, have you achieved any success in finetuning ResNET? can you pls share your experience

twtygqyy · 2016-02-09T04:55:07Z

@kshalini
For the first question, it is straight forward to change the networks and use pre-trained model for fine-tuning. The only thing you should modify is inserting the RPN layers before fully connected layers.

For the second question, in order to use ResNet model for fine-tuning, it is necessary to update the current caffe fork in py-faster-rcnn since it is using an old version without BN and scaling layers which ResNet requires.
I tried to update the caffe version and I'm currently training the faster-rcnn with ResNet model. However, I found that the training speed is very slow, maybe I made some mistakes. I will see whether I can get a good model soon.

rbgirshick · 2016-02-09T21:30:01Z

I've just updated the caffe submodule to one rebased onto Caffe master as of this morning (commit 33f2445).

twtygqyy · 2016-02-10T03:27:45Z

@rbgirshick Thanks!

hariag · 2016-02-10T11:12:48Z

@rbgirshick Thanks!

SilverWaveGL · 2016-02-11T08:19:25Z

Was someone successful in training ResNet Faster-RCNN models? If so, could you upload the prototxt and solvers.
I'm unable to finetune for tasks that I had successes using ZF model

kshalini · 2016-02-11T12:37:33Z

a naive question here. is there any comparative study on using plain resNet vs doing a pyfasterRCNN+resNet?

on the VOC datasets, the resNET-51 seems to give >80% accuracy already. in case we are dealing with lesser no. of object categories (say < 100), what would be better recommendation - just go with resNET (34, 51 etc.) or to go with resNET + pyFasterRCNN?

victorhcm · 2016-02-11T12:44:34Z

Hi @kshalini, do you need the localization of the objects? If that is the case, I think you should go with resNET + pyFasterRCNN, as resNET alone just provides the object class scores.

kshalini · 2016-02-11T12:56:13Z

@victorhcm
yes localization is desirable. maybe not always, but in some cases yes. i get it now. thanks!

in order to train pyFasterRCNN with resNET, we also need the (resnet) train_val.prototxt which am not able to locate anywhere yet. only deploy.prototxt seems to be available. does someone have one? (maybe @twtygqyy ?)

siddharthm83 · 2016-02-12T00:05:10Z

@kshalini : Take a look at models/VGG/faster_rcnn_alt_opt/ . The solvers and train-val equivalents are there. For eg: in stage1_fast_rcnn_solver30k40k.pt the train prototxt is
train_net: "models/VGG16/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt"
for pyfasterRCNN with resNet, i think you have to change models and follow a similar format.

twtygqyy · 2016-02-12T13:14:25Z

@kshalini Actually MSRA used ResNet with RPN layer from Faster-rcnn paper to do object detection on MSCOCO object detection challenge (80 categories), I'm not quite understand your question about the selection between ResNet or ResNet Faster-rcnn to deal with lesser no. of object categories.

twtygqyy · 2016-02-12T13:19:38Z

@kshalini There is not region proposal networks in ResNet train_val prototxt, you have to insert RPN layers for object detection instead of only ResNet for classification.
About how to insert RPN layes, you can take the prototxt I shown at the beginning of this issuse for example and modify the ResNet network accoding to https://github.com/KaimingHe/deep-residual-networks to fine-tune a faster-rcnn model

kshalini · 2016-02-12T23:51:24Z

@twtygqyy @siddharthm83 @victorhcm

thanks! actually i had managed to get a pyFasterRCNN trained by finetuning the VGGnet using the end2end method, about a month ago and the results were quite decent (>85% classification for my dataset with about 50 categories).

i now want to try this using resNET for classification. just that I am struggling to locate a train_val.prototxt for resNet. The Kaiming He, Github link just has the deploy.prototxt. I don't know yet how to modify that and make it into a train_val (i think some edits needs to go in at the top and bottom)

that's where I am looking for some help.

twtygqyy · 2016-02-13T00:59:44Z

@kshalini
Please check the following link for a three-class train_val example
https://github.com/twtygqyy/deep-residual-networks/tree/master/prototxt/ResNet-50-3classes-train_val

kshalini · 2016-02-13T01:09:11Z

@twtygqyy awesome!!! thanks a lot. will try this out shortly.

as a first step - to train as a simple Caffe model and then step-2, in combination with fasterRCNN, just so that I know if am getting anything wrong along the path.

are there any additional instructions to keep in mind while training? (like flip or other data augmentations like skew, scale etc.)

siddharthm83 · 2016-02-13T02:03:27Z

@kshalini ; i dont completely understand your question. I thought you wanted to do detection and hence asking your question here (faster-rcnn). Can you explain what your use case is?
If you only want classification, training resnet from scratch for imagenet would possibly need a ton of GPU memory and multi-gpu. If you just need to train for a smaller subset for classification, i would recommend fine tuning. See eg here: http://caffe.berkeleyvision.org/gathered/examples/finetune_flickr_style.html

kshalini · 2016-02-13T02:19:01Z

@siddharthm83 my use case is to classify types of cars.

initially i tried basic finetuning (no rcnn) and got up to a certain level of accuracy (~70%). later i tried out faster-rcnn (finetuned vgg) and i got around ~80% classification accuracy.

now am exploring if i can use resNet for the same. so i want to try this out in steps - first without rcnn and then with faster-rcnn. I am comparing accuracy and speed of these approaches.

though the primary objective is classification, the reason i am interested in localization also is to analyze the failure cases (false +ves etc.) to understand where it is going wrong.

pls advice if i am missing something. thanks.

siddharthm83 · 2016-02-15T17:23:15Z

@kshalini , good problem to solve. Check Hinton's paper on dark knowledge where he talks about building specialist nets to classify datasets where the datasets can be easily confused (too similar).
http://arxiv.org/pdf/1503.02531v1.pdf

arushk1 · 2016-02-15T19:58:54Z

@kshalini How did you finetune the pyfasterrcnn with VGG16?

twtygqyy · 2016-02-15T23:32:35Z

@arushk1
You can modify the 'num_classes' and 'num_output' in prototxt files from models/VGG16/faster_rcnn_end2end and use --weights data/imagenet_models/VGG16.v2.caffemodel option for finetuning

kshalini · 2016-02-16T11:09:33Z

@arushk1, yes as @twtygqyy says - I did exactly the same. just make sure you get your path settings (to your data etc.) right. it should start showing signs of converging after sometime. good luck!

kaishijeng · 2016-02-16T22:34:09Z

@twtygqyy

Are you able to replace VGG16 with Resent in py-faster-rcnn and use Kaiming's pretrained models to finetune py-faster-rcnn successfully?

twtygqyy · 2016-02-17T00:30:10Z

@kaishijeng Yes, I did, just by inserting RPN layers after conv5 and I add one additional conv layer for size normalization. I'm not sure whether I did it in a right way but it seems work for me.

kaishijeng · 2016-02-17T01:09:57Z

@twtygqyy

Which pretrained resnet model (50, 101 or 152) do you use? Do you mind sharing your trained model and train.prototxt? I like to compare performance with VGG16.

twtygqyy · 2016-02-17T07:44:59Z

@kaishijeng
I fine-tuned ResNet50 on my own dataset with only 2 classes (1 for background) for test. Due to the memory issue, I deleted several blocks for each conv and only trained with 20,000 iterations to check whether it can work or not. Thus the performance cannot be compared with full ResNet + faster-rcnn.

SilverWaveGL · 2016-02-17T10:51:34Z

@twtygqyy Why did you insert RPN layers after conv5 and not before conv5? see quote from paper bellow.

We compute the full-image shared conv feature maps using ... (conv1x, conv2x, conv3x, and conv4x ... These layers are shared by a region proposal network ... and a Fast R-CNN detection network. RoI pool-ing is performed before conv5_1. ... all layers of conv5x and up are adopted for each region.

twtygqyy · 2016-02-17T11:49:00Z

@SilverWaveGL Thanks for pointing out that, it should be like the quote you mentioned for reproducing the result in the paper. In my case, just for a quick test, I removed several repeated blocks in early convs since I do not have enough memory on my machine for a full ResNet faster-rcnn training.

liuchang8am · 2016-06-02T10:24:31Z

I'm training faster-rcnn with ResNet-50 layers on ms coco, train.prototxt provided by @siddharthm83. However, I'm getting loss_bbox = 0 and loss_cls = 0. Any insight would be appreciated.

I0531 13:21:11.898075 32392 solver.cpp:229] Iteration 1860, loss = 0.474211

I0531 13:21:11.898149 32392 solver.cpp:245] Train net output #0: loss_bbox = 0 (* 1 = 0 loss)

I0531 13:21:11.898160 32392 solver.cpp:245] Train net output #1: loss_cls = 0 (* 1 = 0 loss)

I0531 13:21:11.898170 32392 solver.cpp:245] Train net output #2: rpn_cls_loss = 0.501028 (* 1 = 0.501028 loss)

I0531 13:21:11.898180 32392 solver.cpp:245] Train net output #3: rpn_loss_bbox = 0.286518 (* 1 = 0.286518 loss)

siddharthm83 · 2016-06-02T15:19:56Z

@liuchang8am , I am not sure what you are doing wrong. Hard to tell from the 4 lines you have pasted. Is your classifier loss 0 from the first iteration, or gradually converges to 0?
I have not trained it on mscoco, so I am not sure.

liuchang8am · 2016-06-13T08:34:57Z

@siddharthm83 The loss_bbox is 0 since the first iteration. I test the trained model, and it can't detect any objects in the pre-defined categories. Full experiment log is attached, hope it shows my error somewhere.
log.txt

agarwal-shubham · 2016-06-13T17:27:34Z

@siddharthm83 I also followed a similar approach when merging resNet and py faster rcnn, however can you please explain me the reason why you removed the ( fc -relu - dropout ) layers and added a conv layer between rpn - ROI and rcnn modules

afantideng · 2016-08-08T08:36:16Z

@siddharthm83 I cannot think exactly why the output size of conv5_3 in VGG16 is 7 * 7. For example, if the size of input image is 600 * 1000，shouldn't it be 39*64 ?

kristellmarisse · 2016-08-09T13:49:55Z

In order to get a pretrained resnet model for PASCAL VOC dataset (since py-faster-rcnn needs a pretrained model), should I train the Images after cropping the images based on annotations or should I train using whole images?

twtygqyy · 2016-08-10T00:45:24Z

@kristellmarisse You should use the ResNet model trained on Imagenet and train on whole images without cropping so as to generate negative samples.

kshalini · 2016-08-10T00:59:58Z

@abhirevan, when you got resnet-50 working with faster-rcnn, which dataset did you train on. I want to train on VOC being with. I can find imageNet models trained with resnet-50. should I first fine-tune imagenet for VOC (using plain resnet-50) and then use that model to train the rcnn? Or can I start the rcnn training with the imageNet model itself?

zimenglan-sysu-512 · 2016-08-23T11:49:58Z

@SilverWaveGL set the lr_mult and decay_mult to be zero in training stage? or in testing stage? can you mind explaining it? thanks.

mengzhangjian · 2016-09-19T07:12:25Z

@siddharthm83 ,can you help me solve this problem?Thank you
#345

jade2014 · 2016-10-17T07:25:43Z

@janakipj you mentioned "For testing, I use the net.forward (standard python code) and try to interpret the output of the last layer using the key "fc3" which is the last layer in your trainval. And I get all equal values". Do you find why the values all equal ? I met same

RubinaRashid · 2016-11-06T07:27:39Z

i am doing work on ResNet model for fixation prediction,i deploy ResNet model for one image but after input layer data is not feeding to the model?
Any one can guide me

chuckbasstan123 · 2017-01-06T01:07:01Z

@twtygqyy
Just one quick question about the "param_str: "'feat_stride': 16" and "spatial_scale: 0.0675#1/16".
It is related to how many strides you totally made for final layer before rpn-layer. I thought it is 2**5=32 by searching "stride: 2" in the training file.
Is there anyone that can provide some clues about setting up "feat_stride" and "spatial_scale"? It should be modified from network to network, right?

Eniac-Xie · 2017-02-19T14:30:05Z

Hi, everyone, I have release a Implementation of ResNet-101 based Faster-RCNN, which also used OHEM (Online Hard Example Mining) while training, you can find the code and model weights here

onkarganjewar · 2017-02-22T22:29:42Z

@abhirevan @kshalini @twtygqyy @siddharthm83 @rbgirshick

I'm trying to train the ResNet-50 model on PASCAL VOC 2007 trainval dataset. I'm using this command to start the training

./tools/train_net.py --gpu 1 --weights data/imagenet_models/ResNet-50-model.caffemodel --imdb voc_2007_trainval --cfg experiments/cfgs/faster_rcnn_end2end.yml --solver models/ResNet-50/faster_rcnn_end2end/solver.prototxt

I'm using the solver/train prototxt files from @twtygqyy repo

However, I'm getting this error:

Normalizing targets done WARNING: Logging before InitGoogleLogging() is written to STDERR I0222 13:59:58.538053 23076 solver.cpp:54] Initializing solver from parameters: test_iter: 100 test_interval: 1000 base_lr: 0.0001 display: 100 max_iter: 200000 lr_policy: "multistep" gamma: 0.1 momentum: 0.9 weight_decay: 0.0001 stepsize: 20000 snapshot: 10000
snapshot_prefix: "resnet50_train" solver_mode: GPU
net: "models/ResNet-50/faster_rcnn_end2end/ResNet-50-train_val.prototxt"
test_initialization: false
I0222 13:59:58.538121 23076 solver.cpp:96] Creating training net from net file: models/ResNet-50/faster_rcnn_end2end/ResNet-50-train_val.prototxt
[libprotobuf ERROR google/protobuf/text_format.cc:245] Error parsing text-format caffe.NetParameter: 74:26: Message type "caffe.LayerParameter" has no field named "batch_norm_param".
F0222 13:59:58.538242 23076 upgrade_proto.cpp:928] Check failed: ReadProtoFromTextFile(param_file, param) Failed to parse NetParameter file: models/ResNet-50/faster_rcnn_end2end/ResNet-50-train_val.prototxt
*** Check failure stack trace: *** Aborted (core dumped)

I'm on the latest commit of faster-rcnn branch of caffe-fast-rcnn

Pardon my lack of knowledge, but would you guys mind helping me resolve this error, please? Appreciate it. Thanks.

hgaiser · 2017-02-23T08:31:39Z

That's a weird error, the error that it is giving is that the LayerParameter type has no field named batch_norm_param, but even upstream Caffe has that field..

Does VGG16 work properly then?

onkarganjewar · 2017-02-23T22:46:42Z

@hgaiser

Does VGG16 work properly then?

No, I suppose not. I'm able to train and test VGG_CNN_M_1024 model successfully. However, the loss_cls and loss_bbox seems to remain 0 until the last iteration. I don't know whether that is relevant or not, in this case.

./tools/train_net.py --gpu 1 --weights data/imagenet_models/VGG_CNN_M_1024.v2.caffemodel --imdb voc_2007_trainval --cfg experiments/cfgs/faster_rcnn_end2end.yml --solver models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_end2end/solver.prototxt

Anyways, this is the output snippet from my VGG_CNN_M_1024 training:

Train net output #0: loss_bbox = 0 (* 1 = 0 loss)
I0223 14:28:28.146657 3834 solver.cpp:258] Train net output #1: loss_cls = 0 (* 1 = 0 loss)
I0223 14:28:28.146667 3834 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.121718 (* 1 = 0.121718 loss)

I0223 14:28:28.146678 3834 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.11191 (* 1 = 0.11191 loss)

I0223 14:28:28.146688 3834 solver.cpp:571] Iteration 100, lr = 0.001
I0223 14:28:30.319339 3834 solver.cpp:242] Iteration 120, loss = 0.286128

I0223 14:28:30.319385 3834 solver.cpp:258] Train net output #0: loss_bbox = 0 (* 1 = 0 loss)

I0223 14:28:30.319396 3834 solver.cpp:258] Train net output #1: loss_cls = 0 (* 1 = 0 loss)

##########################################################################

On the other hand, I'm getting the out of memory error in case of VGG16.

./tools/train_net.py --gpu 0 --weights data/imagenet_models/VGG16.v2.caffemodel --imdb voc_2007_trainval --cfg experiments/cfgs/faster_rcnn_end2end.yml --solver models/pascal_voc/VGG16/faster_rcnn_end2end/solver.prototxt

Iteration 0, loss = 3.53162
I0223 14:26:47.963392 3791 solver.cpp:258] Train net output #0: loss_bbox = 0 (* 1 = 0 loss)

I0223 14:26:47.963405 3791 solver.cpp:258] Train net output #1: loss_cls = 1.98189 (* 1 = 1.98189 loss)

I0223 14:26:47.963416 3791 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.79846 (* 1 = 0.79846 loss)

I0223 14:26:47.963426 3791 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.690695 (* 1 = 0.690695 loss)

I0223 14:26:47.963438 3791 solver.cpp:571] Iteration 0, lr = 0.001
F0223 14:26:47.970721 3791 syncedmem.cpp:58] Check failed: error == cudaSuccess (2 vs. 0) out of memory
*** Check failure stack trace: ***
Aborted (core dumped)

FYI, I'm using caffe-fast-rcnn from @rbgirshick repo and I think it does not have a batch_norm_param field.

Thanks for reaching out, though. Let me know if you need any other information. Thank you very much.

DeepestNet · 2017-04-11T06:26:53Z

@kshalini, how did you find the accuracy value for pyFasterRCNN trained by finetuning the VGGnet using the end2end method? What additional codes you used to get accuracy value?

646677064 · 2017-05-05T14:58:49Z

@onkarganjewar ,hi,why do you remove the BN from the original ResNet? I.m confused

tianzhi0549 · 2017-07-17T16:21:11Z

Hi everyone, I open source my implementation of Faster RCNN with ResNet for ImageNet Detection here. Hope it is useful for someone. Thanks!

sulth · 2017-10-29T06:30:35Z

@tianzhi0549
I have used Resnet resnet101_faster_rcnn_bn_scale_merged_end2end_iter_70000.caffemodel pretrained weight tomake a pretrained model at 0th epoch and finetuned on it.
resnetTrain.txt

On testing, the mAP is very low.Also detecting only class with bigger boundingboxes correctly in majority cases.Also has issue with overlapping cases.Can you please help me to trigger the issue.

Thanks in advance.

whmin · 2017-10-31T08:57:42Z

@twtygqyy Could you please give me your resnet files for faster rcnn?i saw someone find your train.prototxt and test.protxt,but i can not find them.I try to modify my .prototxt file,but failed.Thank you very much!!!

nnop · 2017-12-01T20:01:43Z

Have found the reason of training very slow？I met the same problem. @twtygqyy

xiaoming-qxm · 2018-01-07T13:01:07Z

@sulth something wrong with the method of calculate the AP, you need to implement your owns.

xiaoming-qxm · 2018-01-11T06:28:02Z

@siddharthm83 Can we remain pooled_w & pooled_h params in the ROIpooling layer to equal 7 and just set spatio_scale=1/8 instead of spatio_scale=1/16?

R1234A · 2019-02-26T09:09:00Z

Hi I am trying to train Resnet-18 I have used train.prototxt as -

name: "ResNet-18"

layer {
name: 'input-data'
type: 'Python'
top: 'data'
top: 'im_info'
top: 'gt_boxes'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 21"
}
}

layer {
bottom: "data"
top: "conv1"
name: "conv1"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 7
pad: 3
stride: 2
}
}

layer {
bottom: "conv1"
top: "conv1"
name: "bn_conv1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "conv1"
top: "conv1"
name: "scale_conv1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "conv1"
top: "conv1"
name: "conv1_relu"
type: "ReLU"
}

layer {
bottom: "conv1"
top: "pool1"
name: "pool1"
type: "Pooling"
pooling_param {
kernel_size: 3
stride: 2
pool: MAX
}
}

layer {
bottom: "pool1"
top: "res2a_branch1"
name: "res2a_branch1"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 1
pad: 0
stride: 1
bias_term: false
}
}

layer {
bottom: "res2a_branch1"
top: "res2a_branch1"
name: "bn2a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2a_branch1"
top: "res2a_branch1"
name: "scale2a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "pool1"
top: "res2a_branch2a"
name: "res2a_branch2a"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2a"
name: "bn2a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2a"
name: "scale2a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2a"
name: "res2a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2b"
name: "res2a_branch2b"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2a_branch2b"
top: "res2a_branch2b"
name: "bn2a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2a_branch2b"
top: "res2a_branch2b"
name: "scale2a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2a_branch1"
bottom: "res2a_branch2b"
top: "res2a"
name: "res2a"
type: "Eltwise"
}

layer {
bottom: "res2a"
top: "res2a"
name: "res2a_relu"
type: "ReLU"
}

layer {
bottom: "res2a"
top: "res2b_branch2a"
name: "res2b_branch2a"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2a"
name: "bn2b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2a"
name: "scale2b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2a"
name: "res2b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2b"
name: "res2b_branch2b"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2b_branch2b"
top: "res2b_branch2b"
name: "bn2b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2b_branch2b"
top: "res2b_branch2b"
name: "scale2b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2a"
bottom: "res2b_branch2b"
top: "res2b"
name: "res2b"
type: "Eltwise"
}

layer {
bottom: "res2b"
top: "res2b"
name: "res2b_relu"
type: "ReLU"
}

layer {
bottom: "res2b"
top: "res3a_branch1"
name: "res3a_branch1"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 1
pad: 0
stride: 2
bias_term: false
}
}

layer {
bottom: "res3a_branch1"
top: "res3a_branch1"
name: "bn3a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3a_branch1"
top: "res3a_branch1"
name: "scale3a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2b"
top: "res3a_branch2a"
name: "res3a_branch2a"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 2
bias_term: false
}
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2a"
name: "bn3a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2a"
name: "scale3a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2a"
name: "res3a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2b"
name: "res3a_branch2b"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res3a_branch2b"
top: "res3a_branch2b"
name: "bn3a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3a_branch2b"
top: "res3a_branch2b"
name: "scale3a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3a_branch1"
bottom: "res3a_branch2b"
top: "res3a"
name: "res3a"
type: "Eltwise"
}

layer {
bottom: "res3a"
top: "res3a"
name: "res3a_relu"
type: "ReLU"
}

layer {
bottom: "res3a"
top: "res3b_branch2a"
name: "res3b_branch2a"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2a"
name: "bn3b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2a"
name: "scale3b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2a"
name: "res3b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2b"
name: "res3b_branch2b"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res3b_branch2b"
top: "res3b_branch2b"
name: "bn3b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3b_branch2b"
top: "res3b_branch2b"
name: "scale3b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3a"
bottom: "res3b_branch2b"
top: "res3b"
name: "res3b"
type: "Eltwise"
}

layer {
bottom: "res3b"
top: "res3b"
name: "res3b_relu"
type: "ReLU"
}

layer {
bottom: "res3b"
top: "res4a_branch1"
name: "res4a_branch1"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 1
pad: 0
stride: 2
bias_term: false
}
}

layer {
bottom: "res4a_branch1"
top: "res4a_branch1"
name: "bn4a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4a_branch1"
top: "res4a_branch1"
name: "scale4a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3b"
top: "res4a_branch2a"
name: "res4a_branch2a"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 2
bias_term: false
}
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2a"
name: "bn4a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2a"
name: "scale4a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2a"
name: "res4a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2b"
name: "res4a_branch2b"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res4a_branch2b"
top: "res4a_branch2b"
name: "bn4a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4a_branch2b"
top: "res4a_branch2b"
name: "scale4a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4a_branch1"
bottom: "res4a_branch2b"
top: "res4a"
name: "res4a"
type: "Eltwise"
}

layer {
bottom: "res4a"
top: "res4a"
name: "res4a_relu"
type: "ReLU"
}

layer {
bottom: "res4a"
top: "res4b_branch2a"
name: "res4b_branch2a"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2a"
name: "bn4b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2a"
name: "scale4b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2a"
name: "res4b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2b"
name: "res4b_branch2b"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res4b_branch2b"
top: "res4b_branch2b"
name: "bn4b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4b_branch2b"
top: "res4b_branch2b"
name: "scale4b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4a"
bottom: "res4b_branch2b"
top: "res4b"
name: "res4b"
type: "Eltwise"
}

layer {
bottom: "res4b"
top: "res4b"
name: "res4b_relu"
type: "ReLU"
}

layer {
name: "rpn_cls_score"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_cls_score"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 18 # 2(bg/fg) * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}

layer {
name: "rpn_bbox_pred"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_bbox_pred"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 36 # 4 * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}

layer {
bottom: "rpn_cls_score"
top: "rpn_cls_score_reshape"
name: "rpn_cls_score_reshape"
type: "Reshape"
reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
}

layer {
name: 'rpn-data'
type: 'Python'
bottom: 'rpn_cls_score'
bottom: 'gt_boxes'
bottom: 'im_info'
bottom: 'data'
top: 'rpn_labels'
top: 'rpn_bbox_targets'
top: 'rpn_bbox_inside_weights'
top: 'rpn_bbox_outside_weights'
python_param {
module: 'rpn.anchor_target_layer'
layer: 'AnchorTargetLayer'
param_str: "'feat_stride': 16"
}
}

layer {
name: "rpn_loss_cls"
type: "SoftmaxWithLoss"
bottom: "rpn_cls_score_reshape"
bottom: "rpn_labels"
propagate_down: 1
propagate_down: 0
top: "rpn_cls_loss"
loss_weight: 1
loss_param {
ignore_label: -1
normalize: true
}
}

layer {
name: "rpn_loss_bbox"
type: "SmoothL1Loss"
bottom: "rpn_bbox_pred"
bottom: "rpn_bbox_targets"
bottom: 'rpn_bbox_inside_weights'
bottom: 'rpn_bbox_outside_weights'
top: "rpn_loss_bbox"
loss_weight: 1
smooth_l1_loss_param { sigma: 3.0 }
}

#========= RoI Proposal ============

layer {
name: "rpn_cls_prob"
type: "Softmax"
bottom: "rpn_cls_score_reshape"
top: "rpn_cls_prob"
}

layer {
name: 'rpn_cls_prob_reshape'
type: 'Reshape'
bottom: 'rpn_cls_prob'
top: 'rpn_cls_prob_reshape'
reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
}

layer {
name: 'proposal'
type: 'Python'
bottom: 'rpn_cls_prob_reshape'
bottom: 'rpn_bbox_pred'
bottom: 'im_info'
top: 'rpn_rois'

top: 'rpn_scores'

python_param {
	module: 'rpn.proposal_layer'
	layer: 'ProposalLayer'
	param_str: "'feat_stride': 16"
}

}

#layer {

name: 'debug-data'

type: 'Python'

bottom: 'data'

bottom: 'rpn_rois'

bottom: 'rpn_scores'

python_param {

module: 'rpn.debug_layer'

layer: 'RPNDebugLayer'

}

#}

layer {
name: 'roi-data'
type: 'Python'
bottom: 'rpn_rois'
bottom: 'gt_boxes'
top: 'rois'
top: 'labels'
top: 'bbox_targets'
top: 'bbox_inside_weights'
top: 'bbox_outside_weights'
python_param {
module: 'rpn.proposal_target_layer'
layer: 'ProposalTargetLayer'
param_str: "'num_classes': 21"
}
}

Stop Resnet till conv4x and add ROIpooling

layer {
name: "roi_pool5"
type: "ROIPooling"
bottom: "res4b"
bottom: "rois"
top: "roipool5"
roi_pooling_param {
pooled_w: 14
pooled_h: 14
spatial_scale: 0.0625 # 1/16
}
}

Resume Resnet conv5x layers after ROIpooling layers

layer {
bottom: "roipool5"
top: "res5a_branch1"
name: "res5a_branch1"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 1
pad: 0
stride: 2
bias_term: false
}
}

layer {
bottom: "res5a_branch1"
top: "res5a_branch1"
name: "bn5a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5a_branch1"
top: "res5a_branch1"
name: "scale5a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "roipool5"
top: "res5a_branch2a"
name: "res5a_branch2a"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 2
bias_term: false
}
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2a"
name: "bn5a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2a"
name: "scale5a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2a"
name: "res5a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2b"
name: "res5a_branch2b"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res5a_branch2b"
top: "res5a_branch2b"
name: "bn5a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5a_branch2b"
top: "res5a_branch2b"
name: "scale5a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5a_branch1"
bottom: "res5a_branch2b"
top: "res5a"
name: "res5a"
type: "Eltwise"
}

layer {
bottom: "res5a"
top: "res5a"
name: "res5a_relu"
type: "ReLU"
}

layer {
bottom: "res5a"
top: "res5b_branch2a"
name: "res5b_branch2a"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2a"
name: "bn5b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2a"
name: "scale5b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2a"
name: "res5b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2b"
name: "res5b_branch2b"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res5b_branch2b"
top: "res5b_branch2b"
name: "bn5b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5b_branch2b"
top: "res5b_branch2b"
name: "scale5b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5a"
bottom: "res5b_branch2b"
top: "res5b"
name: "res5b"
type: "Eltwise"
}

layer {
bottom: "res5b"
top: "res5b"
name: "res5b_relu"
type: "ReLU"
}

layer {
bottom: "res5b"
top: "pool5"
name: "pool5"
type: "Pooling"
pooling_param {
kernel_size: 7
stride: 1
pool: AVE
}
}

######### Add faster RCNN cls and bbox layer

layer {
name: "cls_score_uefa"
type: "InnerProduct"
bottom: "pool5"
top: "cls_score_uefa"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 21
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}

layer {
name: "bbox_pred_uefa"
type: "InnerProduct"
bottom: "pool5"
top: "bbox_pred_uefa"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 84
weight_filler {
type: "gaussian"
std: 0.001
}
bias_filler {
type: "constant"
value: 0
}
}
}

layer {
name: "loss_cls"
type: "SoftmaxWithLoss"
bottom: "cls_score_uefa"
bottom: "labels"
propagate_down: 1
propagate_down: 0
top: "loss_cls"
loss_weight: 1
}

layer {
name: "loss_bbox"
type: "SmoothL1Loss"
bottom: "bbox_pred_uefa"
bottom: "bbox_targets"
bottom: "bbox_inside_weights"
bottom: "bbox_outside_weights"
top: "loss_bbox"
loss_weight: 1
}

And the test.prototxt file is -

name: "ResNet-18"

input: "data"
input_shape {
dim: 1
dim: 3
dim: 1280
dim: 720
}

input: "im_info"
input_shape {
dim: 1
dim: 3
}