Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ResNet Implementation for Faster-rcnn #62

Closed
twtygqyy opened this issue Jan 14, 2016 · 108 comments
Closed

ResNet Implementation for Faster-rcnn #62

twtygqyy opened this issue Jan 14, 2016 · 108 comments

Comments

@twtygqyy
Copy link

Recently, I'm trying to combine ResNet network with Faster-rcnn. As the first step, I tried to train a model with ResNet 34 networks without bottleneck architectures. There is no error during training process, however, the detection result is very bad. I believe there is something wrong in my implementation, here is the prototxt I used for training, can anybody offer some help about how should I modify it?

name: "ResNet34"
layer {
  name: 'input-data'
  type: 'Python'
  top: 'data'
  top: 'im_info'
  top: 'gt_boxes'
  python_param {
    module: 'roi_data_layer.layer'
    layer: 'RoIDataLayer'
    param_str: "'num_classes': 2"
  }
}

#conv1 7x7 64 /2
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 7
    pad: 1
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv1_bn"
  type: "BatchNorm"
  bottom: "conv1"
  top: "conv1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv1_relu"
  type: "ReLU"
  bottom: "conv1_bn"
  top: "conv1_bn"
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1_bn"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}

#conv2_1 3x3 64
layer {
  name: "conv2_1_1"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2_1_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_1_1_bn"
  type: "BatchNorm"
  bottom: "conv2_1_1"
  top: "conv2_1_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_1_1_relu"
  type: "ReLU"
  bottom: "conv2_1_1_bn"
  top: "conv2_1_1_bn"
}
layer {
  name: "conv2_1_2"
  type: "Convolution"
  bottom: "conv2_1_1_bn"
  top: "conv2_1_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_1_2_bn"
  type: "BatchNorm"
  bottom: "conv2_1_2"
  top: "conv2_1_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_1_sum"
  type: "Eltwise"
  bottom: "pool1"
  bottom: "conv2_1_2_bn"
  top: "conv2_1_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv2_1_sum_relu"
  type: "ReLU"
  bottom: "conv2_1_sum"
  top: "conv2_1_sum"
}

#conv2_2 3x3 64
layer {
  name: "conv2_2_1"
  type: "Convolution"
  bottom: "conv2_1_sum"
  top: "conv2_2_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_2_1_bn"
  type: "BatchNorm"
  bottom: "conv2_2_1"
  top: "conv2_2_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_2_1_relu"
  type: "ReLU"
  bottom: "conv2_2_1_bn"
  top: "conv2_2_1_bn"
}
layer {
  name: "conv2_2_2"
  type: "Convolution"
  bottom: "conv2_2_1_bn"
  top: "conv2_2_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_2_2_bn"
  type: "BatchNorm"
  bottom: "conv2_2_2"
  top: "conv2_2_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_2_sum"
  type: "Eltwise"
  bottom: "conv2_1_sum"
  bottom: "conv2_2_2_bn"
  top: "conv2_2_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv2_2_sum_relu"
  type: "ReLU"
  bottom: "conv2_2_sum"
  top: "conv2_2_sum"
}

#conv2_3 3x3 64
layer {
  name: "conv2_3_1"
  type: "Convolution"
  bottom: "conv2_2_sum"
  top: "conv2_3_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_3_1_bn"
  type: "BatchNorm"
  bottom: "conv2_3_1"
  top: "conv2_3_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_3_1_relu"
  type: "ReLU"
  bottom: "conv2_3_1_bn"
  top: "conv2_3_1_bn"
}
layer {
  name: "conv2_3_2"
  type: "Convolution"
  bottom: "conv2_3_1_bn"
  top: "conv2_3_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_3_2_bn"
  type: "BatchNorm"
  bottom: "conv2_3_2"
  top: "conv2_3_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_3_sum"
  type: "Eltwise"
  bottom: "conv2_2_sum"
  bottom: "conv2_3_2_bn"
  top: "conv2_3_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv2_3_sum_relu"
  type: "ReLU"
  bottom: "conv2_3_sum"
  top: "conv2_3_sum"
}
layer {
  name: "conv2_proj"
  type: "Convolution"
  bottom: "conv2_3_sum"
  top: "conv2_proj"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 1
    pad: 0
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_proj_bn"
  type: "BatchNorm"
  bottom: "conv2_proj"
  top: "conv2_proj_bn"
  batch_norm_param {
  }
}

#conv3_1 3x3 128
layer {
  name: "conv3_1_1"
  type: "Convolution"
  bottom: "conv2_3_sum"
  top: "conv3_1_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_1_1_bn"
  type: "BatchNorm"
  bottom: "conv3_1_1"
  top: "conv3_1_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_1_1_relu"
  type: "ReLU"
  bottom: "conv3_1_1_bn"
  top: "conv3_1_1_bn"
}
layer {
  name: "conv3_1_2"
  type: "Convolution"
  bottom: "conv3_1_1_bn"
  top: "conv3_1_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_1_2_bn"
  type: "BatchNorm"
  bottom: "conv3_1_2"
  top: "conv3_1_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_1_sum"
  type: "Eltwise"
  bottom: "conv2_proj_bn"
  bottom: "conv3_1_2_bn"
  top: "conv3_1_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv3_1_sum_relu"
  type: "ReLU"
  bottom: "conv3_1_sum"
  top: "conv3_1_sum"
}

#conv3_2 3x3 128
layer {
  name: "conv3_2_1"
  type: "Convolution"
  bottom: "conv3_1_sum"
  top: "conv3_2_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_2_1_bn"
  type: "BatchNorm"
  bottom: "conv3_2_1"
  top: "conv3_2_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_2_1_relu"
  type: "ReLU"
  bottom: "conv3_2_1_bn"
  top: "conv3_2_1_bn"
}
layer {
  name: "conv3_2_2"
  type: "Convolution"
  bottom: "conv3_2_1_bn"
  top: "conv3_2_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_2_2_bn"
  type: "BatchNorm"
  bottom: "conv3_2_2"
  top: "conv3_2_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_2_sum"
  type: "Eltwise"
  bottom: "conv3_1_sum"
  bottom: "conv3_2_2_bn"
  top: "conv3_2_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv3_2_sum_relu"
  type: "ReLU"
  bottom: "conv3_2_sum"
  top: "conv3_2_sum"
}

#conv3_3 3x3 128
layer {
  name: "conv3_3_1"
  type: "Convolution"
  bottom: "conv3_2_sum"
  top: "conv3_3_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_3_1_bn"
  type: "BatchNorm"
  bottom: "conv3_3_1"
  top: "conv3_3_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_3_1_relu"
  type: "ReLU"
  bottom: "conv3_3_1_bn"
  top: "conv3_3_1_bn"
}
layer {
  name: "conv3_3_2"
  type: "Convolution"
  bottom: "conv3_3_1_bn"
  top: "conv3_3_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_3_2_bn"
  type: "BatchNorm"
  bottom: "conv3_3_2"
  top: "conv3_3_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_3_sum"
  type: "Eltwise"
  bottom: "conv3_2_sum"
  bottom: "conv3_3_2_bn"
  top: "conv3_3_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv3_3_sum_relu"
  type: "ReLU"
  bottom: "conv3_3_sum"
  top: "conv3_3_sum"
}

#conv3_4 3x3 128
layer {
  name: "conv3_4_1"
  type: "Convolution"
  bottom: "conv3_3_sum"
  top: "conv3_4_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_4_1_bn"
  type: "BatchNorm"
  bottom: "conv3_4_1"
  top: "conv3_4_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_4_1_relu"
  type: "ReLU"
  bottom: "conv3_4_1_bn"
  top: "conv3_4_1_bn"
}
layer {
  name: "conv3_4_2"
  type: "Convolution"
  bottom: "conv3_4_1_bn"
  top: "conv3_4_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_4_2_bn"
  type: "BatchNorm"
  bottom: "conv3_4_2"
  top: "conv3_4_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_4_sum"
  type: "Eltwise"
  bottom: "conv3_3_sum"
  bottom: "conv3_4_2_bn"
  top: "conv3_4_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv3_4_sum_relu"
  type: "ReLU"
  bottom: "conv3_4_sum"
  top: "conv3_4_sum"
}
layer {
  name: "conv3_proj"
  type: "Convolution"
  bottom: "conv3_4_sum"
  top: "conv3_proj"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 1
    pad: 0
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_proj_bn"
  type: "BatchNorm"
  bottom: "conv3_proj"
  top: "conv3_proj_bn"
  batch_norm_param {
  }
}

#conv4_1 3x3 256
layer {
  name: "conv4_1_1"
  type: "Convolution"
  bottom: "conv3_4_sum"
  top: "conv4_1_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_1_1_bn"
  type: "BatchNorm"
  bottom: "conv4_1_1"
  top: "conv4_1_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_1_1_relu"
  type: "ReLU"
  bottom: "conv4_1_1_bn"
  top: "conv4_1_1_bn"
}
layer {
  name: "conv4_1_2"
  type: "Convolution"
  bottom: "conv4_1_1_bn"
  top: "conv4_1_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_1_2_bn"
  type: "BatchNorm"
  bottom: "conv4_1_2"
  top: "conv4_1_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_1_sum"
  type: "Eltwise"
  bottom: "conv3_proj_bn"
  bottom: "conv4_1_2_bn"
  top: "conv4_1_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_1_sum_relu"
  type: "ReLU"
  bottom: "conv4_1_sum"
  top: "conv4_1_sum"
}

#conv4_2 3x3 256
layer {
  name: "conv4_2_1"
  type: "Convolution"
  bottom: "conv4_1_sum"
  top: "conv4_2_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_2_1_bn"
  type: "BatchNorm"
  bottom: "conv4_2_1"
  top: "conv4_2_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_2_1_relu"
  type: "ReLU"
  bottom: "conv4_2_1_bn"
  top: "conv4_2_1_bn"
}
layer {
  name: "conv4_2_2"
  type: "Convolution"
  bottom: "conv4_2_1_bn"
  top: "conv4_2_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_2_2_bn"
  type: "BatchNorm"
  bottom: "conv4_2_2"
  top: "conv4_2_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_2_sum"
  type: "Eltwise"
  bottom: "conv4_1_sum"
  bottom: "conv4_2_2_bn"
  top: "conv4_2_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_2_sum_relu"
  type: "ReLU"
  bottom: "conv4_2_sum"
  top: "conv4_2_sum"
}

#conv4_3 3x3 256
layer {
  name: "conv4_3_1"
  type: "Convolution"
  bottom: "conv4_2_sum"
  top: "conv4_3_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_3_1_bn"
  type: "BatchNorm"
  bottom: "conv4_3_1"
  top: "conv4_3_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_3_1_relu"
  type: "ReLU"
  bottom: "conv4_3_1_bn"
  top: "conv4_3_1_bn"
}
layer {
  name: "conv4_3_2"
  type: "Convolution"
  bottom: "conv4_3_1_bn"
  top: "conv4_3_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_3_2_bn"
  type: "BatchNorm"
  bottom: "conv4_3_2"
  top: "conv4_3_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_3_sum"
  type: "Eltwise"
  bottom: "conv4_2_sum"
  bottom: "conv4_3_2_bn"
  top: "conv4_3_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_3_sum_relu"
  type: "ReLU"
  bottom: "conv4_3_sum"
  top: "conv4_3_sum"
}

#conv4_4 3x3 256
layer {
  name: "conv4_4_1"
  type: "Convolution"
  bottom: "conv4_3_sum"
  top: "conv4_4_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_4_1_bn"
  type: "BatchNorm"
  bottom: "conv4_4_1"
  top: "conv4_4_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_4_1_relu"
  type: "ReLU"
  bottom: "conv4_4_1_bn"
  top: "conv4_4_1_bn"
}
layer {
  name: "conv4_4_2"
  type: "Convolution"
  bottom: "conv4_4_1_bn"
  top: "conv4_4_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_4_2_bn"
  type: "BatchNorm"
  bottom: "conv4_4_2"
  top: "conv4_4_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_4_sum"
  type: "Eltwise"
  bottom: "conv4_3_sum"
  bottom: "conv4_4_2_bn"
  top: "conv4_4_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_4_sum_relu"
  type: "ReLU"
  bottom: "conv4_4_sum"
  top: "conv4_4_sum"
}

#conv4_5 3x3 256
layer {
  name: "conv4_5_1"
  type: "Convolution"
  bottom: "conv4_4_sum"
  top: "conv4_5_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_5_1_bn"
  type: "BatchNorm"
  bottom: "conv4_5_1"
  top: "conv4_5_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_5_1_relu"
  type: "ReLU"
  bottom: "conv4_5_1_bn"
  top: "conv4_5_1_bn"
}
layer {
  name: "conv4_5_2"
  type: "Convolution"
  bottom: "conv4_5_1_bn"
  top: "conv4_5_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_5_2_bn"
  type: "BatchNorm"
  bottom: "conv4_5_2"
  top: "conv4_5_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_5_sum"
  type: "Eltwise"
  bottom: "conv4_4_sum"
  bottom: "conv4_5_2_bn"
  top: "conv4_5_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_5_sum_relu"
  type: "ReLU"
  bottom: "conv4_5_sum"
  top: "conv4_5_sum"
}

#conv4_6 3x3 256
layer {
  name: "conv4_6_1"
  type: "Convolution"
  bottom: "conv4_5_sum"
  top: "conv4_6_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_6_1_bn"
  type: "BatchNorm"
  bottom: "conv4_6_1"
  top: "conv4_6_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_6_1_relu"
  type: "ReLU"
  bottom: "conv4_6_1_bn"
  top: "conv4_6_1_bn"
}
layer {
  name: "conv4_6_2"
  type: "Convolution"
  bottom: "conv4_6_1_bn"
  top: "conv4_6_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_6_2_bn"
  type: "BatchNorm"
  bottom: "conv4_6_2"
  top: "conv4_6_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_6_sum"
  type: "Eltwise"
  bottom: "conv4_5_sum"
  bottom: "conv4_6_2_bn"
  top: "conv4_6_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_6_sum_relu"
  type: "ReLU"
  bottom: "conv4_6_sum"
  top: "conv4_6_sum"
}
layer {
  name: "conv4_proj"
  type: "Convolution"
  bottom: "conv4_6_sum"
  top: "conv4_proj"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 1
    pad: 0
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_proj_bn"
  type: "BatchNorm"
  bottom: "conv4_proj"
  top: "conv4_proj_bn"
  batch_norm_param {
  }
}

#conv5_1 3x3 512
layer {
  name: "conv5_1_1"
  type: "Convolution"
  bottom: "conv4_6_sum"
  top: "conv5_1_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_1_1_bn"
  type: "BatchNorm"
  bottom: "conv5_1_1"
  top: "conv5_1_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_1_1_relu"
  type: "ReLU"
  bottom: "conv5_1_1_bn"
  top: "conv5_1_1_bn"
}
layer {
  name: "conv5_1_2"
  type: "Convolution"
  bottom: "conv5_1_1_bn"
  top: "conv5_1_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_1_2_bn"
  type: "BatchNorm"
  bottom: "conv5_1_2"
  top: "conv5_1_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_1_sum"
  type: "Eltwise"
  bottom: "conv4_proj_bn"
  bottom: "conv5_1_2_bn"
  top: "conv5_1_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv5_1_sum_relu"
  type: "ReLU"
  bottom: "conv5_1_sum"
  top: "conv5_1_sum"
}

#conv5_2 3x3 512
layer {
  name: "conv5_2_1"
  type: "Convolution"
  bottom: "conv5_1_sum"
  top: "conv5_2_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_2_1_bn"
  type: "BatchNorm"
  bottom: "conv5_2_1"
  top: "conv5_2_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_2_1_relu"
  type: "ReLU"
  bottom: "conv5_2_1_bn"
  top: "conv5_2_1_bn"
}
layer {
  name: "conv5_2_2"
  type: "Convolution"
  bottom: "conv5_2_1_bn"
  top: "conv5_2_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_2_2_bn"
  type: "BatchNorm"
  bottom: "conv5_2_2"
  top: "conv5_2_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_2_sum"
  type: "Eltwise"
  bottom: "conv5_1_sum"
  bottom: "conv5_2_2_bn"
  top: "conv5_2_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv5_2_sum_relu"
  type: "ReLU"
  bottom: "conv5_2_sum"
  top: "conv5_2_sum"
}

#conv5_3 3x3 512
layer {
  name: "conv5_3_1"
  type: "Convolution"
  bottom: "conv5_2_sum"
  top: "conv5_3_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_3_1_bn"
  type: "BatchNorm"
  bottom: "conv5_3_1"
  top: "conv5_3_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_3_1_relu"
  type: "ReLU"
  bottom: "conv5_3_1_bn"
  top: "conv5_3_1_bn"
}
layer {
  name: "conv5_3_2"
  type: "Convolution"
  bottom: "conv5_3_1_bn"
  top: "conv5_3_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_3_2_bn"
  type: "BatchNorm"
  bottom: "conv5_3_2"
  top: "conv5_3_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_3_sum"
  type: "Eltwise"
  bottom: "conv5_2_sum"
  bottom: "conv5_3_2_bn"
  top: "conv5_3_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv5_3_sum_relu"
  type: "ReLU"
  bottom: "conv5_3_sum"
  top: "conv5_3_sum"
}

#========= RPN ============

layer {
  name: "rpn_conv/3x3"
  type: "Convolution"
  bottom: "conv5_3_sum"
  top: "rpn/output"
  param { lr_mult: 1.0 }
  param { lr_mult: 2.0 }
  convolution_param {
    num_output: 512
    kernel_size: 3 pad: 1 stride: 1
    weight_filler { type: "gaussian" std: 0.01 }
    bias_filler { type: "constant" value: 0 }
  }
}
layer {
  name: "rpn_relu/3x3"
  type: "ReLU"
  bottom: "rpn/output"
  top: "rpn/output"
}

layer {
  name: "rpn_cls_score"
  type: "Convolution"
  bottom: "rpn/output"
  top: "rpn_cls_score"
  param { lr_mult: 1.0 }
  param { lr_mult: 2.0 }
  convolution_param {
    num_output: 18   # 2(bg/fg) * 9(anchors)
    kernel_size: 1 pad: 0 stride: 1
    weight_filler { type: "gaussian" std: 0.01 }
    bias_filler { type: "constant" value: 0 }
  }
}

layer {
  name: "rpn_bbox_pred"
  type: "Convolution"
  bottom: "rpn/output"
  top: "rpn_bbox_pred"
  param { lr_mult: 1.0 }
  param { lr_mult: 2.0 }
  convolution_param {
    num_output: 36   # 4 * 9(anchors)
    kernel_size: 1 pad: 0 stride: 1
    weight_filler { type: "gaussian" std: 0.01 }
    bias_filler { type: "constant" value: 0 }
  }
}

layer {
   bottom: "rpn_cls_score"
   top: "rpn_cls_score_reshape"
   name: "rpn_cls_score_reshape"
   type: "Reshape"
   reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
}

layer {
  name: 'rpn-data'
  type: 'Python'
  bottom: 'rpn_cls_score'
  bottom: 'gt_boxes'
  bottom: 'im_info'
  bottom: 'data'
  top: 'rpn_labels'
  top: 'rpn_bbox_targets'
  top: 'rpn_bbox_inside_weights'
  top: 'rpn_bbox_outside_weights'
  python_param {
    module: 'rpn.anchor_target_layer'
    layer: 'AnchorTargetLayer'
    param_str: "'feat_stride': 16"
  }
}

layer {
  name: "rpn_loss_cls"
  type: "SoftmaxWithLoss"
  bottom: "rpn_cls_score_reshape"
  bottom: "rpn_labels"
  propagate_down: 1
  propagate_down: 0
  top: "rpn_cls_loss"
  loss_weight: 1
  loss_param {
    ignore_label: -1
    normalize: true
  }
}

layer {
  name: "rpn_loss_bbox"
  type: "SmoothL1Loss"
  bottom: "rpn_bbox_pred"
  bottom: "rpn_bbox_targets"
  bottom: 'rpn_bbox_inside_weights'
  bottom: 'rpn_bbox_outside_weights'
  top: "rpn_loss_bbox"
  loss_weight: 1
  smooth_l1_loss_param { sigma: 3.0 }
}

#========= RoI Proposal ============

layer {
  name: "rpn_cls_prob"
  type: "Softmax"
  bottom: "rpn_cls_score_reshape"
  top: "rpn_cls_prob"
}

layer {
  name: 'rpn_cls_prob_reshape'
  type: 'Reshape'
  bottom: 'rpn_cls_prob'
  top: 'rpn_cls_prob_reshape'
  reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
}

layer {
  name: 'proposal'
  type: 'Python'
  bottom: 'rpn_cls_prob_reshape'
  bottom: 'rpn_bbox_pred'
  bottom: 'im_info'
  top: 'rpn_rois'
#  top: 'rpn_scores'
  python_param {
    module: 'rpn.proposal_layer'
    layer: 'ProposalLayer'
    param_str: "'feat_stride': 16"
  }
}

#layer {
#  name: 'debug-data'
#  type: 'Python'
#  bottom: 'data'
#  bottom: 'rpn_rois'
#  bottom: 'rpn_scores'
#  python_param {
#    module: 'rpn.debug_layer'
#    layer: 'RPNDebugLayer'
#  }
#}

layer {
  name: 'roi-data'
  type: 'Python'
  bottom: 'rpn_rois'
  bottom: 'gt_boxes'
  top: 'rois'
  top: 'labels'
  top: 'bbox_targets'
  top: 'bbox_inside_weights'
  top: 'bbox_outside_weights'
  python_param {
    module: 'rpn.proposal_target_layer'
    layer: 'ProposalTargetLayer'
    param_str: "'num_classes': 2"
  }
}

#========= RCNN ============

layer {
  name: "roi_pool5"
  type: "ROIPooling"
  bottom: "conv5_3_sum"
  bottom: "rois"
  top: "pool5"
  roi_pooling_param {
    pooled_w: 7
    pooled_h: 7
    spatial_scale: 0.0625 # 1/16
  }
}
layer {
  name: "cls_score"
  type: "InnerProduct"
  bottom: "pool5"
  top: "cls_score"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "bbox_pred"
  type: "InnerProduct"
  bottom: "pool5"
  top: "bbox_pred"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 8
    weight_filler {
      type: "gaussian"
      std: 0.001
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "loss_cls"
  type: "SoftmaxWithLoss"
  bottom: "cls_score"
  bottom: "labels"
  propagate_down: 1
  propagate_down: 0
  top: "loss_cls"
  loss_weight: 1
}
layer {
  name: "loss_bbox"
  type: "SmoothL1Loss"
  bottom: "bbox_pred"
  bottom: "bbox_targets"
  bottom: "bbox_inside_weights"
  bottom: "bbox_outside_weights"
  top: "loss_bbox"
  loss_weight: 1
}
@victorhcm
Copy link

Hi @twtygqyy, what training error and validation error are you obtaining for your ResNet?

@twtygqyy
Copy link
Author

twtygqyy commented Feb 4, 2016

Hi @victorhcm I'm still working on it and I found it is not easy to train a Faster-rcnn model from scratch with ResNet. Fortunately, the author released the pre-trained models yesterday, you can download them from https://github.com/KaimingHe/deep-residual-networks to fine-tune a fater-rcnn model

@victorhcm
Copy link

Thank you, @twtygqyy!

@kshalini
Copy link

kshalini commented Feb 8, 2016

@twtygqyy
actually 2 questions. is it straight forward to use BVLCNet instead of ZF or VGG and train Faster RCNN? i.e fine tune it with the earlier model weights?

the 2nd question is, have you achieved any success in finetuning ResNET? can you pls share your experience

@twtygqyy
Copy link
Author

twtygqyy commented Feb 9, 2016

@kshalini
For the first question, it is straight forward to change the networks and use pre-trained model for fine-tuning. The only thing you should modify is inserting the RPN layers before fully connected layers.

For the second question, in order to use ResNet model for fine-tuning, it is necessary to update the current caffe fork in py-faster-rcnn since it is using an old version without BN and scaling layers which ResNet requires.
I tried to update the caffe version and I'm currently training the faster-rcnn with ResNet model. However, I found that the training speed is very slow, maybe I made some mistakes. I will see whether I can get a good model soon.

@rbgirshick
Copy link
Owner

I've just updated the caffe submodule to one rebased onto Caffe master as of this morning (commit 33f2445).

@twtygqyy
Copy link
Author

@rbgirshick Thanks!

1 similar comment
@hariag
Copy link

hariag commented Feb 10, 2016

@rbgirshick Thanks!

@SilverWaveGL
Copy link

Was someone successful in training ResNet Faster-RCNN models? If so, could you upload the prototxt and solvers.
I'm unable to finetune for tasks that I had successes using ZF model

@kshalini
Copy link

a naive question here. is there any comparative study on using plain resNet vs doing a pyfasterRCNN+resNet?

on the VOC datasets, the resNET-51 seems to give >80% accuracy already. in case we are dealing with lesser no. of object categories (say < 100), what would be better recommendation - just go with resNET (34, 51 etc.) or to go with resNET + pyFasterRCNN?

@victorhcm
Copy link

Hi @kshalini, do you need the localization of the objects? If that is the case, I think you should go with resNET + pyFasterRCNN, as resNET alone just provides the object class scores.

@kshalini
Copy link

@victorhcm
yes localization is desirable. maybe not always, but in some cases yes. i get it now. thanks!

in order to train pyFasterRCNN with resNET, we also need the (resnet) train_val.prototxt which am not able to locate anywhere yet. only deploy.prototxt seems to be available. does someone have one? (maybe @twtygqyy ?)

@siddharthm83
Copy link

@kshalini : Take a look at models/VGG/faster_rcnn_alt_opt/ . The solvers and train-val equivalents are there. For eg: in stage1_fast_rcnn_solver30k40k.pt the train prototxt is
train_net: "models/VGG16/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt"
for pyfasterRCNN with resNet, i think you have to change models and follow a similar format.

@twtygqyy
Copy link
Author

@kshalini Actually MSRA used ResNet with RPN layer from Faster-rcnn paper to do object detection on MSCOCO object detection challenge (80 categories), I'm not quite understand your question about the selection between ResNet or ResNet Faster-rcnn to deal with lesser no. of object categories.

@twtygqyy
Copy link
Author

@kshalini There is not region proposal networks in ResNet train_val prototxt, you have to insert RPN layers for object detection instead of only ResNet for classification.
About how to insert RPN layes, you can take the prototxt I shown at the beginning of this issuse for example and modify the ResNet network accoding to https://github.com/KaimingHe/deep-residual-networks to fine-tune a faster-rcnn model

@kshalini
Copy link

@twtygqyy @siddharthm83 @victorhcm

thanks! actually i had managed to get a pyFasterRCNN trained by finetuning the VGGnet using the end2end method, about a month ago and the results were quite decent (>85% classification for my dataset with about 50 categories).

i now want to try this using resNET for classification. just that I am struggling to locate a train_val.prototxt for resNet. The Kaiming He, Github link just has the deploy.prototxt. I don't know yet how to modify that and make it into a train_val (i think some edits needs to go in at the top and bottom)

that's where I am looking for some help.

@twtygqyy
Copy link
Author

@kshalini
Please check the following link for a three-class train_val example
https://github.com/twtygqyy/deep-residual-networks/tree/master/prototxt/ResNet-50-3classes-train_val

@kshalini
Copy link

@twtygqyy awesome!!! thanks a lot. will try this out shortly.

as a first step - to train as a simple Caffe model and then step-2, in combination with fasterRCNN, just so that I know if am getting anything wrong along the path.

are there any additional instructions to keep in mind while training? (like flip or other data augmentations like skew, scale etc.)

@siddharthm83
Copy link

@kshalini ; i dont completely understand your question. I thought you wanted to do detection and hence asking your question here (faster-rcnn). Can you explain what your use case is?
If you only want classification, training resnet from scratch for imagenet would possibly need a ton of GPU memory and multi-gpu. If you just need to train for a smaller subset for classification, i would recommend fine tuning. See eg here: http://caffe.berkeleyvision.org/gathered/examples/finetune_flickr_style.html

@kshalini
Copy link

@siddharthm83 my use case is to classify types of cars.

initially i tried basic finetuning (no rcnn) and got up to a certain level of accuracy (~70%). later i tried out faster-rcnn (finetuned vgg) and i got around ~80% classification accuracy.

now am exploring if i can use resNet for the same. so i want to try this out in steps - first without rcnn and then with faster-rcnn. I am comparing accuracy and speed of these approaches.

though the primary objective is classification, the reason i am interested in localization also is to analyze the failure cases (false +ves etc.) to understand where it is going wrong.

pls advice if i am missing something. thanks.

@siddharthm83
Copy link

@kshalini , good problem to solve. Check Hinton's paper on dark knowledge where he talks about building specialist nets to classify datasets where the datasets can be easily confused (too similar).
http://arxiv.org/pdf/1503.02531v1.pdf

@arushk1
Copy link

arushk1 commented Feb 15, 2016

@kshalini How did you finetune the pyfasterrcnn with VGG16?

@twtygqyy
Copy link
Author

@arushk1
You can modify the 'num_classes' and 'num_output' in prototxt files from models/VGG16/faster_rcnn_end2end and use --weights data/imagenet_models/VGG16.v2.caffemodel option for finetuning

@kshalini
Copy link

@arushk1, yes as @twtygqyy says - I did exactly the same. just make sure you get your path settings (to your data etc.) right. it should start showing signs of converging after sometime. good luck!

@kaishijeng
Copy link

@twtygqyy

Are you able to replace VGG16 with Resent in py-faster-rcnn and use Kaiming's pretrained models to finetune py-faster-rcnn successfully?

@twtygqyy
Copy link
Author

@kaishijeng Yes, I did, just by inserting RPN layers after conv5 and I add one additional conv layer for size normalization. I'm not sure whether I did it in a right way but it seems work for me.

@kaishijeng
Copy link

@twtygqyy

Which pretrained resnet model (50, 101 or 152) do you use? Do you mind sharing your trained model and train.prototxt? I like to compare performance with VGG16.

@twtygqyy
Copy link
Author

@kaishijeng
I fine-tuned ResNet50 on my own dataset with only 2 classes (1 for background) for test. Due to the memory issue, I deleted several blocks for each conv and only trained with 20,000 iterations to check whether it can work or not. Thus the performance cannot be compared with full ResNet + faster-rcnn.

@SilverWaveGL
Copy link

@twtygqyy Why did you insert RPN layers after conv5 and not before conv5? see quote from paper bellow.

We compute the full-image shared conv feature maps using ... (conv1x, conv2x, conv3x, and conv4x ... These layers are shared by a region proposal network ... and a Fast R-CNN detection network. RoI pool-ing is performed before conv5_1. ... all layers of conv5x and up are adopted for each region.

@twtygqyy
Copy link
Author

@SilverWaveGL Thanks for pointing out that, it should be like the quote you mentioned for reproducing the result in the paper. In my case, just for a quick test, I removed several repeated blocks in early convs since I do not have enough memory on my machine for a full ResNet faster-rcnn training.

@liuchang8am
Copy link

liuchang8am commented Jun 2, 2016

I'm training faster-rcnn with ResNet-50 layers on ms coco, train.prototxt provided by @siddharthm83. However, I'm getting loss_bbox = 0 and loss_cls = 0. Any insight would be appreciated.


I0531 13:21:11.898075 32392 solver.cpp:229] Iteration 1860, loss = 0.474211

I0531 13:21:11.898149 32392 solver.cpp:245] Train net output #0: loss_bbox = 0 (* 1 = 0 loss)

I0531 13:21:11.898160 32392 solver.cpp:245] Train net output #1: loss_cls = 0 (* 1 = 0 loss)

I0531 13:21:11.898170 32392 solver.cpp:245] Train net output #2: rpn_cls_loss = 0.501028 (* 1 = 0.501028 loss)

I0531 13:21:11.898180 32392 solver.cpp:245] Train net output #3: rpn_loss_bbox = 0.286518 (* 1 = 0.286518 loss)

@siddharthm83
Copy link

@liuchang8am , I am not sure what you are doing wrong. Hard to tell from the 4 lines you have pasted. Is your classifier loss 0 from the first iteration, or gradually converges to 0?
I have not trained it on mscoco, so I am not sure.

@liuchang8am
Copy link

liuchang8am commented Jun 13, 2016

@siddharthm83 The loss_bbox is 0 since the first iteration. I test the trained model, and it can't detect any objects in the pre-defined categories. Full experiment log is attached, hope it shows my error somewhere.
log.txt

@agarwal-shubham
Copy link

@siddharthm83 I also followed a similar approach when merging resNet and py faster rcnn, however can you please explain me the reason why you removed the ( fc -relu - dropout ) layers and added a conv layer between rpn - ROI and rcnn modules

@afantideng
Copy link

afantideng commented Aug 8, 2016

@siddharthm83 I cannot think exactly why the output size of conv5_3 in VGG16 is 7 * 7. For example, if the size of input image is 600 * 1000,shouldn't it be 39*64 ?

@kristellmarisse
Copy link

In order to get a pretrained resnet model for PASCAL VOC dataset (since py-faster-rcnn needs a pretrained model), should I train the Images after cropping the images based on annotations or should I train using whole images?

@twtygqyy
Copy link
Author

@kristellmarisse You should use the ResNet model trained on Imagenet and train on whole images without cropping so as to generate negative samples.

@kshalini
Copy link

@abhirevan, when you got resnet-50 working with faster-rcnn, which dataset did you train on. I want to train on VOC being with. I can find imageNet models trained with resnet-50. should I first fine-tune imagenet for VOC (using plain resnet-50) and then use that model to train the rcnn? Or can I start the rcnn training with the imageNet model itself?

@zimenglan-sysu-512
Copy link

@SilverWaveGL set the lr_mult and decay_mult to be zero in training stage? or in testing stage? can you mind explaining it? thanks.

@mengzhangjian
Copy link

mengzhangjian commented Sep 19, 2016

@siddharthm83 ,can you help me solve this problem?Thank you
#345

@jade2014
Copy link

@janakipj you mentioned "For testing, I use the net.forward (standard python code) and try to interpret the output of the last layer using the key "fc3" which is the last layer in your trainval. And I get all equal values". Do you find why the values all equal ? I met same

@RubinaRashid
Copy link

i am doing work on ResNet model for fixation prediction,i deploy ResNet model for one image but after input layer data is not feeding to the model?
Any one can guide me

@chuckbasstan123
Copy link

@twtygqyy
Just one quick question about the "param_str: "'feat_stride': 16" and "spatial_scale: 0.0675#1/16".
It is related to how many strides you totally made for final layer before rpn-layer. I thought it is 2**5=32 by searching "stride: 2" in the training file.
Is there anyone that can provide some clues about setting up "feat_stride" and "spatial_scale"? It should be modified from network to network, right?

@Eniac-Xie
Copy link

Hi, everyone, I have release a Implementation of ResNet-101 based Faster-RCNN, which also used OHEM (Online Hard Example Mining) while training, you can find the code and model weights here

@onkarganjewar
Copy link

onkarganjewar commented Feb 22, 2017

@abhirevan @kshalini @twtygqyy @siddharthm83 @rbgirshick

I'm trying to train the ResNet-50 model on PASCAL VOC 2007 trainval dataset. I'm using this command to start the training

./tools/train_net.py --gpu 1 --weights data/imagenet_models/ResNet-50-model.caffemodel --imdb voc_2007_trainval --cfg experiments/cfgs/faster_rcnn_end2end.yml --solver models/ResNet-50/faster_rcnn_end2end/solver.prototxt

I'm using the solver/train prototxt files from @twtygqyy repo

However, I'm getting this error:

Normalizing targets done WARNING: Logging before InitGoogleLogging() is written to STDERR I0222 13:59:58.538053 23076 solver.cpp:54] Initializing solver from parameters: test_iter: 100 test_interval: 1000 base_lr: 0.0001 display: 100 max_iter: 200000 lr_policy: "multistep" gamma: 0.1 momentum: 0.9 weight_decay: 0.0001 stepsize: 20000 snapshot: 10000
snapshot_prefix: "resnet50_train" solver_mode: GPU
net: "models/ResNet-50/faster_rcnn_end2end/ResNet-50-train_val.prototxt"
test_initialization: false
I0222 13:59:58.538121 23076 solver.cpp:96] Creating training net from net file: models/ResNet-50/faster_rcnn_end2end/ResNet-50-train_val.prototxt
[libprotobuf ERROR google/protobuf/text_format.cc:245] Error parsing text-format caffe.NetParameter: 74:26: Message type "caffe.LayerParameter" has no field named "batch_norm_param".
F0222 13:59:58.538242 23076 upgrade_proto.cpp:928] Check failed: ReadProtoFromTextFile(param_file, param) Failed to parse NetParameter file: models/ResNet-50/faster_rcnn_end2end/ResNet-50-train_val.prototxt
*** Check failure stack trace: *** Aborted (core dumped)

I'm on the latest commit of faster-rcnn branch of caffe-fast-rcnn

Pardon my lack of knowledge, but would you guys mind helping me resolve this error, please? Appreciate it. Thanks.

@hgaiser
Copy link

hgaiser commented Feb 23, 2017

That's a weird error, the error that it is giving is that the LayerParameter type has no field named batch_norm_param, but even upstream Caffe has that field..

Does VGG16 work properly then?

@onkarganjewar
Copy link

onkarganjewar commented Feb 23, 2017

@hgaiser

Does VGG16 work properly then?

No, I suppose not. I'm able to train and test VGG_CNN_M_1024 model successfully. However, the loss_cls and loss_bbox seems to remain 0 until the last iteration. I don't know whether that is relevant or not, in this case.

./tools/train_net.py --gpu 1 --weights data/imagenet_models/VGG_CNN_M_1024.v2.caffemodel --imdb voc_2007_trainval --cfg experiments/cfgs/faster_rcnn_end2end.yml --solver models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_end2end/solver.prototxt

Anyways, this is the output snippet from my VGG_CNN_M_1024 training:

Train net output #0: loss_bbox = 0 (* 1 = 0 loss)
I0223 14:28:28.146657 3834 solver.cpp:258] Train net output #1: loss_cls = 0 (* 1 = 0 loss)
I0223 14:28:28.146667 3834 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.121718 (* 1 = 0.121718 loss)

I0223 14:28:28.146678 3834 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.11191 (* 1 = 0.11191 loss)

I0223 14:28:28.146688 3834 solver.cpp:571] Iteration 100, lr = 0.001
I0223 14:28:30.319339 3834 solver.cpp:242] Iteration 120, loss = 0.286128

I0223 14:28:30.319385 3834 solver.cpp:258] Train net output #0: loss_bbox = 0 (* 1 = 0 loss)

I0223 14:28:30.319396 3834 solver.cpp:258] Train net output #1: loss_cls = 0 (* 1 = 0 loss)

##########################################################################

  • On the other hand, I'm getting the out of memory error in case of VGG16.

./tools/train_net.py --gpu 0 --weights data/imagenet_models/VGG16.v2.caffemodel --imdb voc_2007_trainval --cfg experiments/cfgs/faster_rcnn_end2end.yml --solver models/pascal_voc/VGG16/faster_rcnn_end2end/solver.prototxt

Iteration 0, loss = 3.53162
I0223 14:26:47.963392 3791 solver.cpp:258] Train net output #0: loss_bbox = 0 (* 1 = 0 loss)

I0223 14:26:47.963405 3791 solver.cpp:258] Train net output #1: loss_cls = 1.98189 (* 1 = 1.98189 loss)

I0223 14:26:47.963416 3791 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.79846 (* 1 = 0.79846 loss)

I0223 14:26:47.963426 3791 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.690695 (* 1 = 0.690695 loss)

I0223 14:26:47.963438 3791 solver.cpp:571] Iteration 0, lr = 0.001
F0223 14:26:47.970721 3791 syncedmem.cpp:58] Check failed: error == cudaSuccess (2 vs. 0) out of memory
*** Check failure stack trace: ***
Aborted (core dumped)

FYI, I'm using caffe-fast-rcnn from @rbgirshick repo and I think it does not have a batch_norm_param field.

Thanks for reaching out, though. Let me know if you need any other information. Thank you very much.

@DeepestNet
Copy link

@kshalini, how did you find the accuracy value for pyFasterRCNN trained by finetuning the VGGnet using the end2end method? What additional codes you used to get accuracy value?

@646677064
Copy link

@onkarganjewar ,hi,why do you remove the BN from the original ResNet? I.m confused

@tianzhi0549
Copy link

tianzhi0549 commented Jul 17, 2017

Hi everyone, I open source my implementation of Faster RCNN with ResNet for ImageNet Detection here. Hope it is useful for someone. Thanks!

@sulth
Copy link

sulth commented Oct 29, 2017

@tianzhi0549
I have used Resnet resnet101_faster_rcnn_bn_scale_merged_end2end_iter_70000.caffemodel pretrained weight tomake a pretrained model at 0th epoch and finetuned on it.
resnetTrain.txt
screen shot 2017-10-29 at 11 48 33 am

On testing, the mAP is very low.Also detecting only class with bigger boundingboxes correctly in majority cases.Also has issue with overlapping cases.Can you please help me to trigger the issue.

Thanks in advance.

screen shot 2017-10-29 at 11 48 33 am

@whmin
Copy link

whmin commented Oct 31, 2017

@twtygqyy Could you please give me your resnet files for faster rcnn?i saw someone find your train.prototxt and test.protxt,but i can not find them.I try to modify my .prototxt file,but failed.Thank you very much!!!

@nnop
Copy link

nnop commented Dec 1, 2017

Have found the reason of training very slow?I met the same problem. @twtygqyy

@xiaoming-qxm
Copy link

@sulth something wrong with the method of calculate the AP, you need to implement your owns.

@xiaoming-qxm
Copy link

@siddharthm83 Can we remain pooled_w & pooled_h params in the ROIpooling layer to equal 7 and just set spatio_scale=1/8 instead of spatio_scale=1/16?

@R1234A
Copy link

R1234A commented Feb 26, 2019

Hi I am trying to train Resnet-18 I have used train.prototxt as -

name: "ResNet-18"

layer {
name: 'input-data'
type: 'Python'
top: 'data'
top: 'im_info'
top: 'gt_boxes'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 21"
}
}

layer {
bottom: "data"
top: "conv1"
name: "conv1"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 7
pad: 3
stride: 2
}
}

layer {
bottom: "conv1"
top: "conv1"
name: "bn_conv1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "conv1"
top: "conv1"
name: "scale_conv1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "conv1"
top: "conv1"
name: "conv1_relu"
type: "ReLU"
}

layer {
bottom: "conv1"
top: "pool1"
name: "pool1"
type: "Pooling"
pooling_param {
kernel_size: 3
stride: 2
pool: MAX
}
}

layer {
bottom: "pool1"
top: "res2a_branch1"
name: "res2a_branch1"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 1
pad: 0
stride: 1
bias_term: false
}
}

layer {
bottom: "res2a_branch1"
top: "res2a_branch1"
name: "bn2a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2a_branch1"
top: "res2a_branch1"
name: "scale2a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "pool1"
top: "res2a_branch2a"
name: "res2a_branch2a"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2a"
name: "bn2a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2a"
name: "scale2a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2a"
name: "res2a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2b"
name: "res2a_branch2b"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2a_branch2b"
top: "res2a_branch2b"
name: "bn2a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2a_branch2b"
top: "res2a_branch2b"
name: "scale2a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2a_branch1"
bottom: "res2a_branch2b"
top: "res2a"
name: "res2a"
type: "Eltwise"
}

layer {
bottom: "res2a"
top: "res2a"
name: "res2a_relu"
type: "ReLU"
}

layer {
bottom: "res2a"
top: "res2b_branch2a"
name: "res2b_branch2a"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2a"
name: "bn2b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2a"
name: "scale2b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2a"
name: "res2b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2b"
name: "res2b_branch2b"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2b_branch2b"
top: "res2b_branch2b"
name: "bn2b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2b_branch2b"
top: "res2b_branch2b"
name: "scale2b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2a"
bottom: "res2b_branch2b"
top: "res2b"
name: "res2b"
type: "Eltwise"
}

layer {
bottom: "res2b"
top: "res2b"
name: "res2b_relu"
type: "ReLU"
}

layer {
bottom: "res2b"
top: "res3a_branch1"
name: "res3a_branch1"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 1
pad: 0
stride: 2
bias_term: false
}
}

layer {
bottom: "res3a_branch1"
top: "res3a_branch1"
name: "bn3a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3a_branch1"
top: "res3a_branch1"
name: "scale3a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2b"
top: "res3a_branch2a"
name: "res3a_branch2a"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 2
bias_term: false
}
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2a"
name: "bn3a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2a"
name: "scale3a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2a"
name: "res3a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2b"
name: "res3a_branch2b"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res3a_branch2b"
top: "res3a_branch2b"
name: "bn3a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3a_branch2b"
top: "res3a_branch2b"
name: "scale3a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3a_branch1"
bottom: "res3a_branch2b"
top: "res3a"
name: "res3a"
type: "Eltwise"
}

layer {
bottom: "res3a"
top: "res3a"
name: "res3a_relu"
type: "ReLU"
}

layer {
bottom: "res3a"
top: "res3b_branch2a"
name: "res3b_branch2a"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2a"
name: "bn3b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2a"
name: "scale3b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2a"
name: "res3b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2b"
name: "res3b_branch2b"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res3b_branch2b"
top: "res3b_branch2b"
name: "bn3b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3b_branch2b"
top: "res3b_branch2b"
name: "scale3b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3a"
bottom: "res3b_branch2b"
top: "res3b"
name: "res3b"
type: "Eltwise"
}

layer {
bottom: "res3b"
top: "res3b"
name: "res3b_relu"
type: "ReLU"
}

layer {
bottom: "res3b"
top: "res4a_branch1"
name: "res4a_branch1"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 1
pad: 0
stride: 2
bias_term: false
}
}

layer {
bottom: "res4a_branch1"
top: "res4a_branch1"
name: "bn4a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4a_branch1"
top: "res4a_branch1"
name: "scale4a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3b"
top: "res4a_branch2a"
name: "res4a_branch2a"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 2
bias_term: false
}
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2a"
name: "bn4a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2a"
name: "scale4a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2a"
name: "res4a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2b"
name: "res4a_branch2b"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res4a_branch2b"
top: "res4a_branch2b"
name: "bn4a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4a_branch2b"
top: "res4a_branch2b"
name: "scale4a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4a_branch1"
bottom: "res4a_branch2b"
top: "res4a"
name: "res4a"
type: "Eltwise"
}

layer {
bottom: "res4a"
top: "res4a"
name: "res4a_relu"
type: "ReLU"
}

layer {
bottom: "res4a"
top: "res4b_branch2a"
name: "res4b_branch2a"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2a"
name: "bn4b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2a"
name: "scale4b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2a"
name: "res4b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2b"
name: "res4b_branch2b"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res4b_branch2b"
top: "res4b_branch2b"
name: "bn4b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4b_branch2b"
top: "res4b_branch2b"
name: "scale4b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4a"
bottom: "res4b_branch2b"
top: "res4b"
name: "res4b"
type: "Eltwise"
}

layer {
bottom: "res4b"
top: "res4b"
name: "res4b_relu"
type: "ReLU"
}

Add RPN network

layer {
name: "rpn_conv/3x3"
type: "Convolution"
bottom: "res4b"
top: "rpn/output"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 512
kernel_size: 3 pad: 1 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "rpn_relu/3x3"
type: "ReLU"
bottom: "rpn/output"
top: "rpn/output"
}

layer {
name: "rpn_cls_score"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_cls_score"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 18 # 2(bg/fg) * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}

layer {
name: "rpn_bbox_pred"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_bbox_pred"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 36 # 4 * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}

layer {
bottom: "rpn_cls_score"
top: "rpn_cls_score_reshape"
name: "rpn_cls_score_reshape"
type: "Reshape"
reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
}

layer {
name: 'rpn-data'
type: 'Python'
bottom: 'rpn_cls_score'
bottom: 'gt_boxes'
bottom: 'im_info'
bottom: 'data'
top: 'rpn_labels'
top: 'rpn_bbox_targets'
top: 'rpn_bbox_inside_weights'
top: 'rpn_bbox_outside_weights'
python_param {
module: 'rpn.anchor_target_layer'
layer: 'AnchorTargetLayer'
param_str: "'feat_stride': 16"
}
}

layer {
name: "rpn_loss_cls"
type: "SoftmaxWithLoss"
bottom: "rpn_cls_score_reshape"
bottom: "rpn_labels"
propagate_down: 1
propagate_down: 0
top: "rpn_cls_loss"
loss_weight: 1
loss_param {
ignore_label: -1
normalize: true
}
}

layer {
name: "rpn_loss_bbox"
type: "SmoothL1Loss"
bottom: "rpn_bbox_pred"
bottom: "rpn_bbox_targets"
bottom: 'rpn_bbox_inside_weights'
bottom: 'rpn_bbox_outside_weights'
top: "rpn_loss_bbox"
loss_weight: 1
smooth_l1_loss_param { sigma: 3.0 }
}

#========= RoI Proposal ============

layer {
name: "rpn_cls_prob"
type: "Softmax"
bottom: "rpn_cls_score_reshape"
top: "rpn_cls_prob"
}

layer {
name: 'rpn_cls_prob_reshape'
type: 'Reshape'
bottom: 'rpn_cls_prob'
top: 'rpn_cls_prob_reshape'
reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
}

layer {
name: 'proposal'
type: 'Python'
bottom: 'rpn_cls_prob_reshape'
bottom: 'rpn_bbox_pred'
bottom: 'im_info'
top: 'rpn_rois'

top: 'rpn_scores'

python_param {
	module: 'rpn.proposal_layer'
	layer: 'ProposalLayer'
	param_str: "'feat_stride': 16"
}

}

#layer {

name: 'debug-data'

type: 'Python'

bottom: 'data'

bottom: 'rpn_rois'

bottom: 'rpn_scores'

python_param {

module: 'rpn.debug_layer'

layer: 'RPNDebugLayer'

}

#}

layer {
name: 'roi-data'
type: 'Python'
bottom: 'rpn_rois'
bottom: 'gt_boxes'
top: 'rois'
top: 'labels'
top: 'bbox_targets'
top: 'bbox_inside_weights'
top: 'bbox_outside_weights'
python_param {
module: 'rpn.proposal_target_layer'
layer: 'ProposalTargetLayer'
param_str: "'num_classes': 21"
}
}

Stop Resnet till conv4x and add ROIpooling

layer {
name: "roi_pool5"
type: "ROIPooling"
bottom: "res4b"
bottom: "rois"
top: "roipool5"
roi_pooling_param {
pooled_w: 14
pooled_h: 14
spatial_scale: 0.0625 # 1/16
}
}

Resume Resnet conv5x layers after ROIpooling layers

layer {
bottom: "roipool5"
top: "res5a_branch1"
name: "res5a_branch1"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 1
pad: 0
stride: 2
bias_term: false
}
}

layer {
bottom: "res5a_branch1"
top: "res5a_branch1"
name: "bn5a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5a_branch1"
top: "res5a_branch1"
name: "scale5a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "roipool5"
top: "res5a_branch2a"
name: "res5a_branch2a"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 2
bias_term: false
}
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2a"
name: "bn5a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2a"
name: "scale5a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2a"
name: "res5a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2b"
name: "res5a_branch2b"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res5a_branch2b"
top: "res5a_branch2b"
name: "bn5a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5a_branch2b"
top: "res5a_branch2b"
name: "scale5a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5a_branch1"
bottom: "res5a_branch2b"
top: "res5a"
name: "res5a"
type: "Eltwise"
}

layer {
bottom: "res5a"
top: "res5a"
name: "res5a_relu"
type: "ReLU"
}

layer {
bottom: "res5a"
top: "res5b_branch2a"
name: "res5b_branch2a"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2a"
name: "bn5b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2a"
name: "scale5b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2a"
name: "res5b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2b"
name: "res5b_branch2b"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res5b_branch2b"
top: "res5b_branch2b"
name: "bn5b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5b_branch2b"
top: "res5b_branch2b"
name: "scale5b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5a"
bottom: "res5b_branch2b"
top: "res5b"
name: "res5b"
type: "Eltwise"
}

layer {
bottom: "res5b"
top: "res5b"
name: "res5b_relu"
type: "ReLU"
}

layer {
bottom: "res5b"
top: "pool5"
name: "pool5"
type: "Pooling"
pooling_param {
kernel_size: 7
stride: 1
pool: AVE
}
}

######### Add faster RCNN cls and bbox layer

layer {
name: "cls_score_uefa"
type: "InnerProduct"
bottom: "pool5"
top: "cls_score_uefa"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 21
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}

layer {
name: "bbox_pred_uefa"
type: "InnerProduct"
bottom: "pool5"
top: "bbox_pred_uefa"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 84
weight_filler {
type: "gaussian"
std: 0.001
}
bias_filler {
type: "constant"
value: 0
}
}
}

layer {
name: "loss_cls"
type: "SoftmaxWithLoss"
bottom: "cls_score_uefa"
bottom: "labels"
propagate_down: 1
propagate_down: 0
top: "loss_cls"
loss_weight: 1
}

layer {
name: "loss_bbox"
type: "SmoothL1Loss"
bottom: "bbox_pred_uefa"
bottom: "bbox_targets"
bottom: "bbox_inside_weights"
bottom: "bbox_outside_weights"
top: "loss_bbox"
loss_weight: 1
}

And the test.prototxt file is -

name: "ResNet-18"

input: "data"
input_shape {
dim: 1
dim: 3
dim: 1280
dim: 720
}

input: "im_info"
input_shape {
dim: 1
dim: 3
}

layer {
bottom: "data"
top: "conv1"
name: "conv1"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 7
pad: 3
stride: 2
}
}

layer {
bottom: "conv1"
top: "conv1"
name: "bn_conv1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "conv1"
top: "conv1"
name: "scale_conv1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "conv1"
top: "conv1"
name: "conv1_relu"
type: "ReLU"
}

layer {
bottom: "conv1"
top: "pool1"
name: "pool1"
type: "Pooling"
pooling_param {
kernel_size: 3
stride: 2
pool: MAX
}
}

layer {
bottom: "pool1"
top: "res2a_branch1"
name: "res2a_branch1"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 1
pad: 0
stride: 1
bias_term: false
}
}

layer {
bottom: "res2a_branch1"
top: "res2a_branch1"
name: "bn2a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2a_branch1"
top: "res2a_branch1"
name: "scale2a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "pool1"
top: "res2a_branch2a"
name: "res2a_branch2a"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2a"
name: "bn2a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2a"
name: "scale2a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2a"
name: "res2a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2b"
name: "res2a_branch2b"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2a_branch2b"
top: "res2a_branch2b"
name: "bn2a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2a_branch2b"
top: "res2a_branch2b"
name: "scale2a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2a_branch1"
bottom: "res2a_branch2b"
top: "res2a"
name: "res2a"
type: "Eltwise"
}

layer {
bottom: "res2a"
top: "res2a"
name: "res2a_relu"
type: "ReLU"
}

layer {
bottom: "res2a"
top: "res2b_branch2a"
name: "res2b_branch2a"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2a"
name: "bn2b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2a"
name: "scale2b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2a"
name: "res2b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2b"
name: "res2b_branch2b"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2b_branch2b"
top: "res2b_branch2b"
name: "bn2b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2b_branch2b"
top: "res2b_branch2b"
name: "scale2b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2a"
bottom: "res2b_branch2b"
top: "res2b"
name: "res2b"
type: "Eltwise"
}

layer {
bottom: "res2b"
top: "res2b"
name: "res2b_relu"
type: "ReLU"
}

layer {
bottom: "res2b"
top: "res3a_branch1"
name: "res3a_branch1"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 1
pad: 0
stride: 2
bias_term: false
}
}

layer {
bottom: "res3a_branch1"
top: "res3a_branch1"
name: "bn3a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3a_branch1"
top: "res3a_branch1"
name: "scale3a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2b"
top: "res3a_branch2a"
name: "res3a_branch2a"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 2
bias_term: false
}
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2a"
name: "bn3a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2a"
name: "scale3a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2a"
name: "res3a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2b"
name: "res3a_branch2b"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res3a_branch2b"
top: "res3a_branch2b"
name: "bn3a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3a_branch2b"
top: "res3a_branch2b"
name: "scale3a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3a_branch1"
bottom: "res3a_branch2b"
top: "res3a"
name: "res3a"
type: "Eltwise"
}

layer {
bottom: "res3a"
top: "res3a"
name: "res3a_relu"
type: "ReLU"
}

layer {
bottom: "res3a"
top: "res3b_branch2a"
name: "res3b_branch2a"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2a"
name: "bn3b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2a"
name: "scale3b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2a"
name: "res3b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2b"
name: "res3b_branch2b"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res3b_branch2b"
top: "res3b_branch2b"
name: "bn3b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3b_branch2b"
top: "res3b_branch2b"
name: "scale3b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3a"
bottom: "res3b_branch2b"
top: "res3b"
name: "res3b"
type: "Eltwise"
}

layer {
bottom: "res3b"
top: "res3b"
name: "res3b_relu"
type: "ReLU"
}

layer {
bottom: "res3b"
top: "res4a_branch1"
name: "res4a_branch1"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 1
pad: 0
stride: 2
bias_term: false
}
}

layer {
bottom: "res4a_branch1"
top: "res4a_branch1"
name: "bn4a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4a_branch1"
top: "res4a_branch1"
name: "scale4a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3b"
top: "res4a_branch2a"
name: "res4a_branch2a"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 2
bias_term: false
}
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2a"
name: "bn4a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2a"
name: "scale4a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2a"
name: "res4a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2b"
name: "res4a_branch2b"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res4a_branch2b"
top: "res4a_branch2b"
name: "bn4a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4a_branch2b"
top: "res4a_branch2b"
name: "scale4a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4a_branch1"
bottom: "res4a_branch2b"
top: "res4a"
name: "res4a"
type: "Eltwise"
}

layer {
bottom: "res4a"
top: "res4a"
name: "res4a_relu"
type: "ReLU"
}

layer {
bottom: "res4a"
top: "res4b_branch2a"
name: "res4b_branch2a"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2a"
name: "bn4b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2a"
name: "scale4b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2a"
name: "res4b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2b"
name: "res4b_branch2b"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res4b_branch2b"
top: "res4b_branch2b"
name: "bn4b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4b_branch2b"
top: "res4b_branch2b"
name: "scale4b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4a"
bottom: "res4b_branch2b"
top: "res4b"
name: "res4b"
type: "Eltwise"
}

layer {
bottom: "res4b"
top: "res4b"
name: "res4b_relu"
type: "ReLU"
}

Add RPN network

layer {
name: "rpn_conv/3x3"
type: "Convolution"
bottom: "res4b"
top: "rpn/output"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 512
kernel_size: 3 pad: 1 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "rpn_relu/3x3"
type: "ReLU"
bottom: "rpn/output"
top: "rpn/output"
}

layer {
name: "rpn_cls_score"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_cls_score"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 18 # 2(bg/fg) * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}

layer {
name: "rpn_bbox_pred"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_bbox_pred"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 36 # 4 * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}

layer {
bottom: "rpn_cls_score"
top: "rpn_cls_score_reshape"
name: "rpn_cls_score_reshape"
type: "Reshape"
reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
}

layer {
name: 'rpn-data'
type: 'Python'
bottom: 'rpn_cls_score'
bottom: 'gt_boxes'
bottom: 'im_info'
bottom: 'data'
top: 'rpn_labels'
top: 'rpn_bbox_targets'
top: 'rpn_bbox_inside_weights'
top: 'rpn_bbox_outside_weights'
python_param {
module: 'rpn.anchor_target_layer'
layer: 'AnchorTargetLayer'
param_str: "'feat_stride': 16"
}
}

layer {
name: "rpn_loss_cls"
type: "SoftmaxWithLoss"
bottom: "rpn_cls_score_reshape"
bottom: "rpn_labels"
propagate_down: 1
propagate_down: 0
top: "rpn_cls_loss"
loss_weight: 1
loss_param {
ignore_label: -1
normalize: true
}
}

layer {
name: "rpn_loss_bbox"
type: "SmoothL1Loss"
bottom: "rpn_bbox_pred"
bottom: "rpn_bbox_targets"
bottom: 'rpn_bbox_inside_weights'
bottom: 'rpn_bbox_outside_weights'
top: "rpn_loss_bbox"
loss_weight: 1
smooth_l1_loss_param { sigma: 3.0 }
}

#========= RoI Proposal ============

layer {
name: "rpn_cls_prob"
type: "Softmax"
bottom: "rpn_cls_score_reshape"
top: "rpn_cls_prob"
}

layer {
name: 'rpn_cls_prob_reshape'
type: 'Reshape'
bottom: 'rpn_cls_prob'
top: 'rpn_cls_prob_reshape'
reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
}

layer {
name: 'proposal'
type: 'Python'
bottom: 'rpn_cls_prob_reshape'
bottom: 'rpn_bbox_pred'
bottom: 'im_info'
top: 'rpn_rois'

top: 'rpn_scores'

python_param {
	module: 'rpn.proposal_layer'
	layer: 'ProposalLayer'
	param_str: "'feat_stride': 16"
}

}

Stop Resnet till conv4x and add ROIpooling

layer {
name: "roi_pool5"
type: "ROIPooling"
bottom: "res4b"
bottom: "rois"
top: "roipool5"
roi_pooling_param {
pooled_w: 14
pooled_h: 14
spatial_scale: 0.0625 # 1/16
}
}

Resume Resnet conv5x layers after ROIpooling layers

layer {
bottom: "roipool5"
top: "res5a_branch1"
name: "res5a_branch1"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 1
pad: 0
stride: 2
bias_term: false
}
}

layer {
bottom: "res5a_branch1"
top: "res5a_branch1"
name: "bn5a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5a_branch1"
top: "res5a_branch1"
name: "scale5a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "roipool5"
top: "res5a_branch2a"
name: "res5a_branch2a"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 2
bias_term: false
}
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2a"
name: "bn5a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2a"
name: "scale5a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2a"
name: "res5a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2b"
name: "res5a_branch2b"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res5a_branch2b"
top: "res5a_branch2b"
name: "bn5a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5a_branch2b"
top: "res5a_branch2b"
name: "scale5a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5a_branch1"
bottom: "res5a_branch2b"
top: "res5a"
name: "res5a"
type: "Eltwise"
}

layer {
bottom: "res5a"
top: "res5a"
name: "res5a_relu"
type: "ReLU"
}

layer {
bottom: "res5a"
top: "res5b_branch2a"
name: "res5b_branch2a"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2a"
name: "bn5b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2a"
name: "scale5b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2a"
name: "res5b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2b"
name: "res5b_branch2b"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res5b_branch2b"
top: "res5b_branch2b"
name: "bn5b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5b_branch2b"
top: "res5b_branch2b"
name: "scale5b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5a"
bottom: "res5b_branch2b"
top: "res5b"
name: "res5b"
type: "Eltwise"
}

layer {
bottom: "res5b"
top: "res5b"
name: "res5b_relu"
type: "ReLU"
}

layer {
bottom: "res5b"
top: "pool5"
name: "pool5"
type: "Pooling"
pooling_param {
kernel_size: 7
stride: 1
pool: AVE
}
}

######### Add faster RCNN cls and bbox layer

layer {
name: "cls_score_uefa"
type: "InnerProduct"
bottom: "pool5"
top: "cls_score_uefa"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 21
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}

layer {
name: "bbox_pred_uefa"
type: "InnerProduct"
bottom: "pool5"
top: "bbox_pred_uefa"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 84
weight_filler {
type: "gaussian"
std: 0.001
}
bias_filler {
type: "constant"
value: 0
}
}
}

layer {
name: "cls_prob"
type: "Softmax"
bottom: "cls_score_uefa"
top: "cls_prob"
}

When running test.prototxt I am facing as issue as -
F0226 05:04:43.527575 429 insert_splits.cpp:29] Unknown bottom blob 'gt_boxes' (layer 'rpn-data', bottom index 1)
*** Check failure stack trace: ***
How to solve it???

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests