-
Notifications
You must be signed in to change notification settings - Fork 10
/
kmax_r50.yaml
96 lines (90 loc) · 2.43 KB
/
kmax_r50.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
MODEL:
# backbone part.
BACKBONE:
FREEZE_AT: 0
NAME: "custom_bn_build_resnet_backbone" # we customize the momentum and eps in syncbn, to align with tf implementation.
WEIGHTS: "../R-50.pkl"
PIXEL_MEAN: [127.5, 127.5, 127.5]
PIXEL_STD: [127.5, 127.5, 127.5]
RESNETS:
DEPTH: 50
STEM_TYPE: "basic" # not used
STEM_OUT_CHANNELS: 64
STRIDE_IN_1X1: False
OUT_FEATURES: ["stem", "res2", "res3", "res4", "res5"]
NORM: "SyncBN"
RES5_MULTI_GRID: [1, 1, 1] # not used
# kmax part.
META_ARCHITECTURE: "kMaXDeepLab"
SEM_SEG_HEAD:
NAME: "kMaXDeepLabHead"
IGNORE_VALUE: 255
NUM_CLASSES: 19
LOSS_WEIGHT: 1.0
KMAX_DEEPLAB:
SAVE_VIS_NUM: 0
SHARE_FINAL_MATCHING: True
DEEP_SUPERVISION: True
NO_OBJECT_WEIGHT: 1e-5
CLASS_WEIGHT: 3.0
DICE_WEIGHT: 3.0
MASK_WEIGHT: 0.3
INSDIS_WEIGHT: 1.0
AUX_SEMANTIC_WEIGHT: 1.0
USE_AUX_SEMANTIC_DECODER: False
PIXEL_INSDIS_TEMPERATURE: 0.6
PIXEL_INSDIS_SAMPLE_K: 4096
AUX_SEMANTIC_TEMPERATURE: 1.0
AUX_SEMANTIC_SAMPLE_K: 0
PIXEL_DEC:
NAME: "kMaXPixelDecoder"
IN_FEATURES: ["stem", "res2", "res3", "res4", "res5"]
DEC_LAYERS: [1, 5, 1, 1, 1]
LAYER_TYPES: ["axial", "axial", "bottleneck", "bottleneck", "bottleneck"]
DEC_CHANNELS: [512, 256, 128, 64, 32]
TRANS_DEC:
NAME: "kMaXTransformerDecoder"
DEC_LAYERS: [2, 2, 2]
NUM_OBJECT_QUERIES: 256
IN_CHANNELS: [2048, 1024, 512, 128] # [512 * 4, 256 * 4, 128 * 4], the last dim is for final prediction
DROP_PATH_PROB: 0.2
TEST:
SEMANTIC_ON: False
INSTANCE_ON: False
PANOPTIC_ON: True
PIXEL_CONFIDENCE_THRESHOLD: 0.4
CLASS_THRESHOLD_THING: 0.5
CLASS_THRESHOLD_STUFF: 0.3
REORDER_CLASS_WEIGHT: 1.0
REORDER_MASK_WEIGHT: 0.0
OVERLAP_THRESHOLD: 0.9
DATASETS:
TRAIN: ("cityscapes_fine_panoptic_train",)
TEST: ("cityscapes_fine_panoptic_val",)
SOLVER:
IMS_PER_BATCH: 32
BASE_LR: 0.0003
LR_SCHEDULER_NAME: "TF2WarmupPolyLR"
MAX_ITER: 60000
WARMUP_ITERS: 5000
WEIGHT_DECAY: 0.05
OPTIMIZER: "ADAMW"
BACKBONE_MULTIPLIER: 0.1
CLIP_GRADIENTS:
ENABLED: False
AMP:
ENABLED: True
INPUT:
IMAGE_SIZE: [1025, 2049]
MIN_SCALE: 0.5
MAX_SCALE: 2.0
FORMAT: "RGB"
DATASET_MAPPER_NAME: "cityscapes_panoptic_lsj"
MIN_SIZE_TEST: 1025
MAX_SIZE_TEST: 2049
TEST:
EVAL_PERIOD: 5000
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: True
NUM_WORKERS: 4
VERSION: 2