Just train 'person' class in the VOC2007 dataset,the final model can't detect anything #150

wait1988 · 2016-04-20T02:18:34Z

I just use the 'person' class in the VOC2007 dataset ,and also I use the difficult samples for keeping the trainning process woking fine.The final model,however,can't detect anything.Anyone else knows what's going on here?

deboc · 2016-04-20T06:38:29Z

Hi wait1988,
How was the loss during training?
Can we see your train prototxt?

wait1988 · 2016-04-20T07:02:50Z

train.prototxt:
name: "ZF"
layer {
name: 'input-data'
type: 'Python'
top: 'data'
top: 'im_info'
top: 'gt_boxes'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 2"
}
}

#========= conv1-conv5 ============

layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 96
kernel_size: 7
pad: 3
stride: 2
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "norm1"
type: "LRN"
bottom: "conv1"
top: "norm1"
lrn_param {
local_size: 3
alpha: 0.00005
beta: 0.75
norm_region: WITHIN_CHANNEL
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "norm1"
top: "pool1"
pooling_param {
kernel_size: 3
stride: 2
pad: 1
pool: MAX
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 256
kernel_size: 5
pad: 2
stride: 2
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "norm2"
type: "LRN"
bottom: "conv2"
top: "norm2"
lrn_param {
local_size: 3
alpha: 0.00005
beta: 0.75
norm_region: WITHIN_CHANNEL
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "norm2"
top: "pool2"
pooling_param {
kernel_size: 3
stride: 2
pad: 1
pool: MAX
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 384
kernel_size: 3
pad: 1
stride: 1
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 384
kernel_size: 3
pad: 1
stride: 1
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}

#========= RPN ============

layer {
name: "rpn_conv/3x3"
type: "Convolution"
bottom: "conv5"
top: "rpn/output"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 256
kernel_size: 3 pad: 1 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "rpn_relu/3x3"
type: "ReLU"
bottom: "rpn/output"
top: "rpn/output"
}
layer {
name: "rpn_cls_score"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_cls_score"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 18 # 2(bg/fg) * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "rpn_bbox_pred"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_bbox_pred"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 36 # 4 * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}
layer {
bottom: "rpn_cls_score"
top: "rpn_cls_score_reshape"
name: "rpn_cls_score_reshape"
type: "Reshape"
reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
}
layer {
name: 'rpn-data'
type: 'Python'
bottom: 'rpn_cls_score'
bottom: 'gt_boxes'
bottom: 'im_info'
bottom: 'data'
top: 'rpn_labels'
top: 'rpn_bbox_targets'
top: 'rpn_bbox_inside_weights'
top: 'rpn_bbox_outside_weights'
python_param {
module: 'rpn.anchor_target_layer'
layer: 'AnchorTargetLayer'
param_str: "'feat_stride': 16"
}
}
layer {
name: "rpn_loss_cls"
type: "SoftmaxWithLoss"
bottom: "rpn_cls_score_reshape"
bottom: "rpn_labels"
propagate_down: 1
propagate_down: 0
top: "rpn_cls_loss"
loss_weight: 1
loss_param {
ignore_label: -1
normalize: true
}
}
layer {
name: "rpn_loss_bbox"
type: "SmoothL1Loss"
bottom: "rpn_bbox_pred"
bottom: "rpn_bbox_targets"
bottom: 'rpn_bbox_inside_weights'
bottom: 'rpn_bbox_outside_weights'
top: "rpn_loss_bbox"
loss_weight: 1
smooth_l1_loss_param { sigma: 3.0 }
}

#========= RoI Proposal ============

layer {
name: "rpn_cls_prob"
type: "Softmax"
bottom: "rpn_cls_score_reshape"
top: "rpn_cls_prob"
}
layer {
name: 'rpn_cls_prob_reshape'
type: 'Reshape'
bottom: 'rpn_cls_prob'
top: 'rpn_cls_prob_reshape'
reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
}
layer {
name: 'proposal'
type: 'Python'
bottom: 'rpn_cls_prob_reshape'
bottom: 'rpn_bbox_pred'
bottom: 'im_info'
top: 'rpn_rois'
python_param {
module: 'rpn.proposal_layer'
layer: 'ProposalLayer'
param_str: "'feat_stride': 16"
}
}
layer {
name: 'roi-data'
type: 'Python'
bottom: 'rpn_rois'
bottom: 'gt_boxes'
top: 'rois'
top: 'labels'
top: 'bbox_targets'
top: 'bbox_inside_weights'
top: 'bbox_outside_weights'
python_param {
module: 'rpn.proposal_target_layer'
layer: 'ProposalTargetLayer'
param_str: "'num_classes': 2"
}
}

#========= RCNN ============

layer {
name: "roi_pool_conv5"
type: "ROIPooling"
bottom: "conv5"
bottom: "rois"
top: "roi_pool_conv5"
roi_pooling_param {
pooled_w: 6
pooled_h: 6
spatial_scale: 0.0625 # 1/16
}
}
layer {
name: "fc6"
type: "InnerProduct"
bottom: "roi_pool_conv5"
top: "fc6"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
inner_product_param {
num_output: 4096
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
scale_train: false
}
}
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
inner_product_param {
num_output: 4096
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
scale_train: false
}
}
layer {
name: "cls_score"
type: "InnerProduct"
bottom: "fc7"
top: "cls_score"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
inner_product_param {
num_output: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "bbox_pred"
type: "InnerProduct"
bottom: "fc7"
top: "bbox_pred"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
inner_product_param {
num_output: 8
weight_filler {
type: "gaussian"
std: 0.001
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "loss_cls"
type: "SoftmaxWithLoss"
bottom: "cls_score"
bottom: "labels"
propagate_down: 1
propagate_down: 0
top: "cls_loss"
loss_weight: 1
loss_param {
ignore_label: -1
normalize: true
}
}
layer {
name: "loss_bbox"
type: "SmoothL1Loss"
bottom: "bbox_pred"
bottom: "bbox_targets"
bottom: 'bbox_inside_weights'
bottom: 'bbox_outside_weights'
top: "bbox_loss"
loss_weight: 1

}

a piece of log:
I0420 14:35:15.158990 16768 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.0030837 (* 1 = 0.0030837 loss)
I0420 14:35:15.158994 16768 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.00924739 (* 1 = 0.00924739 loss)
I0420 14:35:15.158998 16768 solver.cpp:571] Iteration 69880, lr = 0.0001
I0420 14:35:20.064649 16768 solver.cpp:242] Iteration 69900, loss = 0.328746
I0420 14:35:20.064685 16768 solver.cpp:258] Train net output #0: bbox_loss = 0.0186957 (* 1 = 0.0186957 loss)
I0420 14:35:20.064692 16768 solver.cpp:258] Train net output #1: cls_loss = 0.0638745 (* 1 = 0.0638745 loss)
I0420 14:35:20.064695 16768 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.0173537 (* 1 = 0.0173537 loss)
I0420 14:35:20.064699 16768 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.0119759 (* 1 = 0.0119759 loss)
I0420 14:35:20.064703 16768 solver.cpp:571] Iteration 69900, lr = 0.0001
I0420 14:35:25.063719 16768 solver.cpp:242] Iteration 69920, loss = 0.245719
I0420 14:35:25.063753 16768 solver.cpp:258] Train net output #0: bbox_loss = 0.0571181 (* 1 = 0.0571181 loss)
I0420 14:35:25.063760 16768 solver.cpp:258] Train net output #1: cls_loss = 0.0622113 (* 1 = 0.0622113 loss)
I0420 14:35:25.063765 16768 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.00133641 (* 1 = 0.00133641 loss)
I0420 14:35:25.063768 16768 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.00168988 (* 1 = 0.00168988 loss)
I0420 14:35:25.063772 16768 solver.cpp:571] Iteration 69920, lr = 0.0001
I0420 14:35:29.993849 16768 solver.cpp:242] Iteration 69940, loss = 0.190783
I0420 14:35:29.993882 16768 solver.cpp:258] Train net output #0: bbox_loss = 0.0851794 (* 1 = 0.0851794 loss)
I0420 14:35:29.993888 16768 solver.cpp:258] Train net output #1: cls_loss = 0.0448895 (* 1 = 0.0448895 loss)
I0420 14:35:29.993893 16768 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.00884468 (* 1 = 0.00884468 loss)
I0420 14:35:29.993897 16768 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.0815758 (* 1 = 0.0815758 loss)
I0420 14:35:29.993901 16768 solver.cpp:571] Iteration 69940, lr = 0.0001
I0420 14:35:34.859596 16768 solver.cpp:242] Iteration 69960, loss = 0.241285
I0420 14:35:34.859629 16768 solver.cpp:258] Train net output #0: bbox_loss = 0.0548535 (* 1 = 0.0548535 loss)
I0420 14:35:34.859635 16768 solver.cpp:258] Train net output #1: cls_loss = 0.114608 (* 1 = 0.114608 loss)
I0420 14:35:34.859640 16768 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.0819508 (* 1 = 0.0819508 loss)
I0420 14:35:34.859644 16768 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.00594436 (* 1 = 0.00594436 loss)
I0420 14:35:34.859648 16768 solver.cpp:571] Iteration 69960, lr = 0.0001
I0420 14:35:39.879679 16768 solver.cpp:242] Iteration 69980, loss = 0.326018
I0420 14:35:39.879714 16768 solver.cpp:258] Train net output #0: bbox_loss = 0.12343 (* 1 = 0.12343 loss)
I0420 14:35:39.879720 16768 solver.cpp:258] Train net output #1: cls_loss = 0.110242 (* 1 = 0.110242 loss)
I0420 14:35:39.879724 16768 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.0487792 (* 1 = 0.0487792 loss)
I0420 14:35:39.879729 16768 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.12166 (* 1 = 0.12166 loss)
I0420 14:35:39.879732 16768 solver.cpp:571] Iteration 69980, lr = 0.0001
speed: 0.246s / iter

deboc · 2016-04-20T12:22:22Z

Have you built your own parser in lib/datasets to assign class 0 to background and class 1 to person ?
To debug I would try to use weights from a pre-trained model first.

wait1988 · 2016-04-20T12:59:40Z

Yes,I modified pascal_voc.py to change the original 21 classes to 2 classes,that is,background and person.All the other configurations are kept the same as training the original 21 classes.

deboc · 2016-04-20T13:02:01Z

Did you trained the same model for the 21 classes before ? Did it worked well ?

wait1988 · 2016-04-20T13:26:31Z

Yes,I did.It works well.So it's weird for the one class case.

deboc · 2016-04-20T13:28:09Z

Maybe you have the classical cache issue ?
$rm data/cache/train_gt_roidb.pkl

wait1988 · 2016-04-20T13:47:10Z

Hmm,I also deleted the cache folder.I really don't know what's going on.Bythe way,How do you prepare the train samples?I just use the person sample list to replace the original trainval.txt.

wait1988 · 2016-04-26T08:14:45Z

Solved.Be careful with the test.prototxt

limorbagizada · 2016-07-12T16:05:26Z

How was it solved? Which changes have you made in test.prototxt?

AlexofNTHU · 2017-11-17T11:07:11Z

@wait1988
Have you successfuly solved this problem? what performance did you achieved?
The official version could detect pedestrian with 76.7 AP (PASCAL 07 test) if the training data is PASCAL VOC07+12 trainval. However, I am quite satisfied with the detection results but have you tried to compare the performance of a single class pedestrian detector and a multi-class detection in the "pedestrian" class?
Is there any difference in terms of AP?

wait1988 closed this as completed Apr 26, 2016

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Just train 'person' class in the VOC2007 dataset,the final model can't detect anything #150

Just train 'person' class in the VOC2007 dataset,the final model can't detect anything #150

wait1988 commented Apr 20, 2016

deboc commented Apr 20, 2016

wait1988 commented Apr 20, 2016

deboc commented Apr 20, 2016

wait1988 commented Apr 20, 2016

deboc commented Apr 20, 2016

wait1988 commented Apr 20, 2016

deboc commented Apr 20, 2016

wait1988 commented Apr 20, 2016

wait1988 commented Apr 26, 2016

limorbagizada commented Jul 12, 2016

AlexofNTHU commented Nov 17, 2017

Just train 'person' class in the VOC2007 dataset,the final model can't detect anything #150

Just train 'person' class in the VOC2007 dataset,the final model can't detect anything #150

Comments

wait1988 commented Apr 20, 2016

deboc commented Apr 20, 2016

wait1988 commented Apr 20, 2016

}

deboc commented Apr 20, 2016

wait1988 commented Apr 20, 2016

deboc commented Apr 20, 2016

wait1988 commented Apr 20, 2016

deboc commented Apr 20, 2016

wait1988 commented Apr 20, 2016

wait1988 commented Apr 26, 2016

limorbagizada commented Jul 12, 2016

AlexofNTHU commented Nov 17, 2017