-
Notifications
You must be signed in to change notification settings - Fork 17
/
tevit_swin-l_mstrain.py
73 lines (67 loc) · 2.27 KB
/
tevit_swin-l_mstrain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
_base_ = './tevit_r50.py'
pretrained = 'https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_large_patch4_window7_224_22kto1k-5f0996db.pth' # noqa
num_proposals = 300
model = dict(
backbone=dict(
_delete_=True,
type='SwinTransformer',
embed_dims=192,
depths=[2, 2, 18, 2],
num_heads=[6, 12, 24, 48],
window_size=7,
mlp_ratio=4,
qkv_bias=True,
qk_scale=None,
drop_rate=0.,
attn_drop_rate=0.,
drop_path_rate=0.2,
patch_norm=True,
out_indices=(0, 1, 2, 3),
with_cp=True,
init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
neck=dict(in_channels=[192, 384, 768, 1536]),
rpn_head=dict(
type='EmbeddingRPNHead',
num_proposals=num_proposals,
proposal_feature_channel=256))
optimizer = dict(
paramwise_cfg=dict(
custom_keys={
'absolute_pos_embed': dict(decay_mult=0.),
'relative_position_bias_table': dict(decay_mult=0.),
'norm': dict(decay_mult=0.)
}))
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
min_values = (320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='LoadAnnotations',
with_bbox=True,
with_mask=True,
with_id=True,
bitmask=True),
dict(
type='Resize',
img_scale=[(1333, value) for value in min_values],
multiscale_mode='value',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_ids']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', img_scale=(1333, 480), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.0),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img']),
]
data = dict(train=dict(pipeline=train_pipeline), test=dict(pipeline=test_pipeline))