-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtwo-stage-det-unihead-resnet50-coco-1x.yaml
272 lines (261 loc) · 8.14 KB
/
two-stage-det-unihead-resnet50-coco-1x.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
num_classes: &num_classes 81
runtime:
task_names: det
flip: &flip
type: flip
kwargs:
flip_p: 0.5
resize: &train_resize
type: keep_ar_resize
kwargs:
scales: [800]
max_size: 1333
separate_wh: true
test_resize: &test_resize
type: keep_ar_resize
kwargs:
scales: [800]
max_size: 1333
separate_wh: true
to_tensor: &to_tensor
type: to_tensor
normalize: &normalize
type: normalize
kwargs:
mean: [0.485, 0.456, 0.406] # ImageNet pretrained statics
std: [0.229, 0.224, 0.225]
dataset: # Required.
train:
dataset:
type: coco
kwargs:
source: train # dataset id
meta_file: data/coco/annotations/instances_train2017.json
image_reader:
type: fs_opencv
kwargs:
image_dir: data/coco/train2017
color_mode: RGB
transformer: [*flip, *train_resize, *to_tensor, *normalize]
test:
dataset:
type: coco
kwargs:
source: val
meta_file: data/coco/annotations/instances_val2017.json
image_reader:
type: fs_opencv
kwargs:
image_dir: data/coco/val2017
color_mode: RGB
transformer: [*test_resize, *to_tensor, *normalize]
evaluator:
type: COCO # choices = {'COCO', 'VOC', 'MR'}
kwargs:
gt_file: data/coco/annotations/instances_val2017.json
iou_types: [bbox]
batch_sampler:
type: aspect_ratio_group
kwargs:
sampler:
type: dist
kwargs: {}
batch_size: 4
aspect_grouping: [1]
dataloader:
type: base
kwargs:
num_workers: 4
alignment: 32
trainer: # Required.
max_epoch: 13 # total epochs for the training
test_freq: 1
save_freq: 13
optimizer: # optimizer = SGD(params,lr=0.001,momentum=0.9,weight_decay=0.0001)
type: AdamW
kwargs:
lr: 6.25e-06
betas: [0.9, 0.999]
weight_decay: 0.05
lr_scheduler: # lr_scheduler = MultStepLR(optimizer, milestones=[9,14],gamma=0.1)
warmup_iter: 500 # set to be 0 to disable warmup. When warmup, target_lr = init_lr * total_batch_size
type: MultiStepLR
kwargs:
milestones: [9, 12] # epochs to decay lr
gamma: 0.1 # decay rate
saver: # Required.
save_dir: checkpoints # dir to save checkpoints
pretrain_model: pretrain/imagenet/resnet50-19c8e357.pth
results_dir: results_dir # dir to save detection results. i.e., bboxes, masks, keypoints
auto_resume: true # find last checkpoint from save_dir and resume from it automatically
# this option has the highest priority (auto_resume > opts > resume_model > pretrain_model)
hooks:
- type: auto_checkpoint
- type: train_val_logger
kwargs:
freq: 50 # 打印 log 频率
skip_first_k: 5 # 忽略前 5 轮的耗时
logdir: ./default/log # tennsorboard 日志路径
summary_writer: tensorboard
net:
- name: backbone # backbone = resnet50(frozen_layers, out_layers, out_strides)
type: resnet50
kwargs:
frozen_layers: [0, 1] # layer0...1 is fixed
out_layers: [1, 2, 3, 4] # layer1...4, commonly named Conv2...5
out_strides: [4, 8, 16, 32] # tell the strides of output features
normalize:
type: freeze_bn
initializer:
method: msra
- name: neck
prev: backbone
type: FPN
kwargs:
outplanes: 256
start_level: 2
num_level: 5 # if num_level>len(backbone.out_layers), additional conv with be stacked.
out_strides: [4, 8, 16, 32, 64] # strides of output features. aka., anchor strides for roi_head
downsample: pool # method to downsample, for FPN, it's pool, for RetienaNet, it's conv
upsample: nearest # method to interp, nearest or bilinear
initializer:
method: xavier
- name: roi_head
prev: neck
type: NaiveRPN
kwargs:
feat_planes: 256 # channels of intermediate conv
num_classes: 2 # number of classes including backgroudn. for rpn, it's 2; for RetinaNet, it's 81
num_anchors: 3
class_activation: sigmoid
num_level: 5
initializer:
method: normal
std: 0.01
- name: rpn_post_process
prev: roi_head
type: rpn_post
kwargs:
num_classes: 2 # number of classes including backgroudn. for rpn, it's 2; for RetinaNet, it's 81
cfg:
cls_loss:
type: sigmoid_cross_entropy
loc_loss:
type: l1_loss
anchor_generator:
type: hand_craft
kwargs:
anchor_ratios: [0.5, 1, 2] # anchor strides are provided as feature strides by feature extractor
anchor_scales: [8] # scale of anchors relative to feature map
roi_supervisor:
type: rpn
kwargs:
allowed_border: -1
matcher:
type: max_iou
kwargs:
positive_iou_thresh: 0.7
negative_iou_thresh: 0.3
ignore_iou_thresh: 0.5
allow_low_quality_match: true
low_quality_thresh: 0.3 # !this option is not supported yet, but we have future plan
sampler:
type: naive_a100 # support A100 training
kwargs:
batch_size: 256
positive_percent: 0.5
train:
roi_predictor:
type: base
kwargs:
pre_nms_score_thresh: 0.0
pre_nms_top_n: 2000
post_nms_top_n: 1000
roi_min_size: 0
nms:
type: naive
nms_iou_thresh: 0.7
merger:
type: rpn
kwargs:
top_n: 1000
test:
roi_predictor:
type: base
kwargs:
pre_nms_score_thresh: 0.0
pre_nms_top_n: 1000
post_nms_top_n: 1000
roi_min_size: 0
nms:
type: naive
nms_iou_thresh: 0.7
merger:
type: rpn
kwargs:
top_n: 1000
- name: bbox_head
prev: neck
type: UniBboxFC
kwargs:
feat_planes: 1024
num_classes: *num_classes
num_point: &num_point 16
offset_bias_val: 3.0
cls_block_num: 2
loc_block_num: 3
dropout: 0.1
initializer:
method: msra
cfg:
cls_loss:
type: softmax_cross_entropy
kwargs:
class_dim: -1 # last dim is the class dim
centerness_loss:
type: sigmoid_cross_entropy
kwargs:
loss_weight: 1.0
loc_loss_iou:
type: iou_loss
kwargs:
loss_type: giou
loss_weight: 1.0
fpn:
fpn_levels: [0,1,2,3] # indices of fpn features used for this stage. these levels are supposed to be continuous
base_scale: 56 # target level of a RoI is floor(log2((w*h)**0.5/base_scale))
roipooling:
method: 'roialignpool' # choices=['roialignpool', 'psroipool', 'roipool', 'psroimaskpool']. note that 'psroipool' is for RFCN head
pool_size: 7
sampling_ratio: 0 # number of sampling points in each bin. 0 means densely sampled
bbox_normalize: &bbox_norm # not used in UniHead
means: [0, 0, 0, 0]
stds: [0.1, 0.1, 0.2, 0.2]
share_location: &share_location False # is share location in bbox regression for all classes
bbox_supervisor:
type: uni_bbox_det
kwargs:
matcher:
type: max_iou
kwargs:
ignore_iou_thresh: 0.5 # Required if provide ignore_regions
positive_iou_thresh: 0.5
negative_iou_thresh: 0.5
allow_low_quality_match: False # positive if a anchor has highest iou with any gt
low_quality_thresh: 0.5
sampler:
type: naive_a100
kwargs:
batch_size: 512
positive_percent: 0.25
bbox_predictor:
type: uni_bbox_det
kwargs:
bbox_normalize: *bbox_norm
share_location: *share_location
num_loc_point: *num_point
nms:
type: naive
nms_iou_thresh: 0.5
bbox_score_thresh: 0.0
top_n: 100