forked from PaddlePaddle/PaddleDetection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
petr_resnet50_16x2_coco.yml
254 lines (243 loc) · 6.22 KB
/
petr_resnet50_16x2_coco.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
use_gpu: true
log_iter: 50
save_dir: output
snapshot_epoch: 1
weights: output/petr_resnet50_16x2_coco/model_final
epoch: 100
num_joints: &num_joints 17
pixel_std: &pixel_std 200
metric: COCO
num_classes: 1
trainsize: &trainsize 512
flip_perm: &flip_perm [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
find_unused_parameters: False
#####model
architecture: PETR
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PETR_pretrained.pdparams
PETR:
backbone:
name: ResNet
depth: 50
variant: b
norm_type: bn
freeze_norm: True
freeze_at: 0
return_idx: [1,2,3]
num_stages: 4
lr_mult_list: [0.1, 0.1, 0.1, 0.1]
neck:
name: ChannelMapper
in_channels: [512, 1024, 2048]
kernel_size: 1
out_channels: 256
norm_type: "gn"
norm_groups: 32
act: None
num_outs: 4
bbox_head:
name: PETRHead
num_query: 300
num_classes: 1 # only person
in_channels: 2048
sync_cls_avg_factor: true
with_kpt_refine: true
transformer:
name: PETRTransformer
as_two_stage: true
encoder:
name: TransformerEncoder
encoder_layer:
name: TransformerEncoderLayer
d_model: 256
attn:
name: MSDeformableAttention
embed_dim: 256
num_heads: 8
num_levels: 4
num_points: 4
dim_feedforward: 1024
dropout: 0.1
num_layers: 6
decoder:
name: PETR_TransformerDecoder
num_layers: 3
return_intermediate: true
decoder_layer:
name: PETR_TransformerDecoderLayer
d_model: 256
dim_feedforward: 1024
dropout: 0.1
self_attn:
name: MultiHeadAttention
embed_dim: 256
num_heads: 8
dropout: 0.1
cross_attn:
name: MultiScaleDeformablePoseAttention
embed_dims: 256
num_heads: 8
num_levels: 4
num_points: 17
hm_encoder:
name: TransformerEncoder
encoder_layer:
name: TransformerEncoderLayer
d_model: 256
attn:
name: MSDeformableAttention
embed_dim: 256
num_heads: 8
num_levels: 1
num_points: 4
dim_feedforward: 1024
dropout: 0.1
num_layers: 1
refine_decoder:
name: PETR_DeformableDetrTransformerDecoder
num_layers: 2
return_intermediate: true
decoder_layer:
name: PETR_TransformerDecoderLayer
d_model: 256
dim_feedforward: 1024
dropout: 0.1
self_attn:
name: MultiHeadAttention
embed_dim: 256
num_heads: 8
dropout: 0.1
cross_attn:
name: MSDeformableAttention
embed_dim: 256
num_levels: 4
positional_encoding:
name: PositionEmbedding
num_pos_feats: 128
normalize: true
offset: -0.5
loss_cls:
name: Weighted_FocalLoss
use_sigmoid: true
gamma: 2.0
alpha: 0.25
loss_weight: 2.0
reduction: "mean"
loss_kpt:
name: L1Loss
loss_weight: 70.0
loss_kpt_rpn:
name: L1Loss
loss_weight: 70.0
loss_oks:
name: OKSLoss
loss_weight: 2.0
loss_hm:
name: CenterFocalLoss
loss_weight: 4.0
loss_kpt_refine:
name: L1Loss
loss_weight: 80.0
loss_oks_refine:
name: OKSLoss
loss_weight: 3.0
assigner:
name: PoseHungarianAssigner
cls_cost:
name: FocalLossCost
weight: 2.0
kpt_cost:
name: KptL1Cost
weight: 70.0
oks_cost:
name: OksCost
weight: 7.0
#####optimizer
LearningRate:
base_lr: 0.0002
schedulers:
- !PiecewiseDecay
milestones: [80]
gamma: 0.1
use_warmup: false
# - !LinearWarmup
# start_factor: 0.001
# steps: 1000
OptimizerBuilder:
clip_grad_by_norm: 0.1
optimizer:
type: AdamW
regularizer:
factor: 0.0001
type: L2
#####data
TrainDataset:
!KeypointBottomUpCocoDataset
image_dir: train2017
anno_path: annotations/person_keypoints_train2017.json
dataset_dir: dataset/coco
num_joints: *num_joints
return_mask: false
EvalDataset:
!KeypointBottomUpCocoDataset
image_dir: val2017
anno_path: annotations/person_keypoints_val2017.json
dataset_dir: dataset/coco
num_joints: *num_joints
test_mode: true
return_mask: false
TestDataset:
!ImageFolder
anno_path: dataset/coco/keypoint_imagelist.txt
worker_num: 2
global_mean: &global_mean [0.485, 0.456, 0.406]
global_std: &global_std [0.229, 0.224, 0.225]
TrainReader:
sample_transforms:
- Decode: {}
- PhotoMetricDistortion:
brightness_delta: 32
contrast_range: [0.5, 1.5]
saturation_range: [0.5, 1.5]
hue_delta: 18
- KeyPointFlip:
flip_prob: 0.5
flip_permutation: *flip_perm
- RandomAffine:
max_degree: 30
scale: [1.0, 1.0]
max_shift: 0.
trainsize: -1
- RandomSelect: { transforms1: [ RandomShortSideRangeResize: { scales: [[400, 1400], [1400, 1400]]} ],
transforms2: [
RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
RandomSizeCrop: { min_size: 384, max_size: 600},
RandomShortSideRangeResize: { scales: [[400, 1400], [1400, 1400]]} ]}
batch_transforms:
- NormalizeImage: {mean: *global_mean, std: *global_std, is_scale: True}
- PadGT: {pad_img: True, minimum_gtnum: 1}
- Permute: {}
batch_size: 2
shuffle: true
drop_last: true
use_shared_memory: true
collate_batch: true
EvalReader:
sample_transforms:
- PETR_Resize: {img_scale: [[800, 1333]], keep_ratio: True}
# - MultiscaleTestResize: {origin_target_size: [[800, 1333]], use_flip: false}
- NormalizeImage:
mean: *global_mean
std: *global_std
is_scale: true
- Permute: {}
batch_size: 1
TestReader:
sample_transforms:
- Decode: {}
- EvalAffine: {size: 800}
- NormalizeImage:
mean: *global_mean
std: *global_std
is_scale: true
- Permute: {}
batch_size: 1