-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathspconv_clip_caption_openscene.yaml
155 lines (132 loc) · 5.52 KB
/
spconv_clip_caption_openscene.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
CLASS_NAMES: ['wall', 'chair', 'floor', 'table', 'door', 'couch', 'cabinet', 'shelf', 'desk', 'office chair',
'bed', 'pillow', 'sink', 'picture', 'window', 'toilet', 'bookshelf', 'monitor', 'curtain', 'book',
'armchair', 'coffee table', 'box', 'refrigerator', 'lamp', 'kitchen cabinet', 'towel', 'clothes', 'tv', 'nightstand',
'counter', 'dresser', 'stool', 'cushion', 'plant', 'ceiling', 'bathtub', 'end table', 'dining table', 'keyboard',
'bag', 'backpack', 'toilet paper', 'printer', 'tv stand', 'whiteboard', 'blanket', 'shower curtain', 'trash can', 'closet',
'stairs', 'microwave', 'stove', 'shoe', 'computer tower', 'bottle', 'bin', 'ottoman', 'bench', 'board',
'washing machine', 'mirror', 'copier', 'basket', 'sofa chair', 'file cabinet', 'fan', 'laptop', 'shower', 'paper',
'person', 'paper towel dispenser', 'oven', 'blinds', 'rack', 'plate', 'blackboard', 'piano', 'suitcase', 'rail',
'radiator', 'recycling bin', 'container', 'wardrobe', 'soap dispenser', 'telephone', 'bucket', 'clock', 'stand', 'light',
'laundry basket', 'pipe', 'clothes dryer', 'guitar', 'toilet paper holder', 'seat', 'speaker', 'column', 'bicycle', 'ladder',
'bathroom stall', 'shower wall', 'cup', 'jacket', 'storage bin', 'coffee maker', 'dishwasher', 'paper towel roll', 'machine', 'mat',
'windowsill', 'bar', 'toaster', 'bulletin board', 'ironing board', 'fireplace', 'soap dish', 'kitchen counter', 'doorframe', 'toilet paper dispenser',
'mini fridge', 'fire extinguisher', 'ball', 'hat', 'shower curtain rod', 'water cooler', 'paper cutter', 'tray', 'shower door', 'pillar',
'ledge', 'toaster oven', 'mouse', 'toilet seat cover dispenser', 'furniture', 'cart', 'storage container', 'scale', 'tissue box', 'light switch',
'crate', 'power outlet', 'decoration', 'sign', 'projector', 'closet door', 'vacuum cleaner', 'candle', 'plunger', 'stuffed animal',
'headphones', 'dish rack', 'broom', 'guitar case', 'range hood', 'dustpan', 'hair dryer', 'water bottle', 'handicap bar', 'purse',
'vent', 'shower floor', 'water pitcher', 'mailbox', 'bowl', 'paper bag', 'alarm clock', 'music stand', 'projector screen', 'divider',
'laundry detergent', 'bathroom counter', 'object', 'bathroom vanity', 'closet wall', 'laundry hamper', 'bathroom stall door', 'ceiling light', 'trash bin', 'dumbbell',
'stair rail', 'tube', 'bathroom cabinet', 'cd case', 'closet rod', 'coffee kettle', 'structure', 'shower head', 'keyboard piano', 'case of water bottles',
'coat rack', 'storage organizer', 'folded chair', 'fire alarm', 'power strip', 'calendar', 'poster', 'potted plant', 'luggage', 'mattress']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/scannet200_dataset.yaml
ignore_class_idx: [0, 2, 35, 98, 136, 147, 153, 159, 166, 167, 183, 186, 191, 198]
trainonly_class_idx: [98, 136, 147, 153, 159, 166, 167, 183, 186, 191, 198]
LOAD_KD_LABEL_TRAIN: True
KD_LABEL_DIR: ../data/scannetv2/scannet_multiview_lseg
KD_LABEL_NORM: True
CAPTION_INFO:
KEY: [SCENE, VIEW, ENTITY]
SCENE:
ENABLED: False
CAPTION_PATH: text_embed/caption.json
GATHER_CAPTION: False
VIEW:
ENABLED: True
CAPTION_PATH: text_embed/caption_kosmos_and_sw_125k_iou0.2-0.0.json
IMAGE_CORR_PATH: scannet_caption_idx_kosmos_and_sw_125k_iou0.2-0.0.pkl
SELECT: ratio
NUM: 1
RATIO: 0.2
SAMPLE: 1
GATHER_CAPTION: False
ENTITY:
ENABLED: False
CAPTION_PATH: text_embed/caption_detic_crop_matching_idx.json
IMAGE_CORR_PATH: scannetv2_detic_crop_matching_idx.pickle
SELECT: ratio
NUM: 1
RATIO: 0.5
GATHER_CAPTION: False
CAPTION_CORR_PATH_IN_ONE_FILE: True
FILTER_WITH_N_CAPTIONS: -1
MODEL:
NAME: SparseUNetTextSeg
REMAP_FROM_3DLANG: False
REMAP_FROM_NOADAPTER: False
VFE:
NAME: IndoorVFE
USE_XYZ: True
BACKBONE_3D:
NAME: SparseUNetIndoor
IN_CHANNEL: 6
MID_CHANNEL: 32
BLOCK_RESIDUAL: True
BLOCK_REPS: 2
NUM_BLOCKS: 7
CUSTOM_SP1X1: True
ADAPTER:
NAME: VLAdapter
EVAL_ONLY: False
NUM_ADAPTER_LAYERS: 2
TEXT_DIM: -1
LAST_NORM: False
FEAT_NORM: False
TASK_HEAD:
NAME: TextSegHead
EVAL_ONLY: True
FEAT_NORM: False
IN_FEAT_NAME: adapter_feats
IN_CHANNEL: 512
TEXT_EMBED:
NAME: CLIP
NORM: True
PATH: text_embed/scannet_clip-ViT-B32_lseg.pth
LOGIT_SCALE:
value: 1.0
learnable: False
LOSS_WEIGHT: 0.0
CAPTION_HEAD:
NAME: CaptionHead
IN_FEAT_NAME: adapter_feats
POOLING_TYPE: avg
FEAT_NORM: True
LOGIT_SCALE:
value: 100.0
learnable: True
CUDA_ENABLED: True
POOL_OBJ: score
LOSS_FUNC: NLL_NoReduce
LOSS_WEIGHT:
SCENE: 0.1
VIEW: 1.0
ENTITY: 0.1
KD_HEAD:
NAME: KDHeadTemplate
IN_FEAT_NAME: adapter_feats
FEAT_NORM: True
LOSS_CONFIG:
LOSS_WEIGHT: 1.0
TEXT_ENCODER:
NAME: CLIP
BACKBONE: ViT-B/32 # ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14']
TEMPLATE: lseg
EXTRACT_EMBED: False
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 4
NUM_EPOCHS: 128
LR: 0.004 # 4e-3
SCHEDULER: adam_onecycle
OPTIMIZER: adam_onecycle
WEIGHT_DECAY: 0.0001
MOMENTUM: 0.9
STEP_EPOCH: 50
MULTIPLIER: 0.1
CLIP_GRAD: False
PCT_START: 0.4
DIV_FACTOR: 2
MOMS: [0.95, 0.85]
OTHERS:
PRINT_FREQ: 20
SYNC_BN: False
USE_AMP: True