diff --git a/.dev_scripts/train_rtts.sh b/.dev_scripts/train_rtts.sh new file mode 100755 index 0000000..531f083 --- /dev/null +++ b/.dev_scripts/train_rtts.sh @@ -0,0 +1,16 @@ +PARTITION=$1 +GPUS=${GPUS:-8} +GPUS_PER_NODE=${GPUS_PER_NODE:-8} +CPUS_PER_TASK=${CPUS_PER_TASK:-5} +SRUN_ARGS=${SRUN_ARGS:-""} + +log_dir="work_dirs/slurm_logs" +mkdir -p "$log_dir" + +SRUN_ARGS=${SRUN_ARGS} GPUS=$GPUS GPUS_PER_NODE=$GPUS_PER_NODE CPUS_PER_TASK=$CPUS_PRE_TASK ./tools/slurm_train.sh llmit2 rtts-atss_r50_1x configs/detection/rtts_dataset/atss_r50_fpn_1x_rtts-coco.py work_dirs/rtts/atss_r50_fpn_1x_rtts-coco --cfg-options default_hooks.checkpoint.max_keep_ckpts=1 randomness.seed=0 val_evaluator.outfile_prefix=work_dirs/rtts/atss_r50_fpn_1x_rtts-coco/eval_result > "$log_dir/rtts_atss_r50_fpn_1x_rtts-coco.log" & +SRUN_ARGS=${SRUN_ARGS} GPUS=$GPUS GPUS_PER_NODE=$GPUS_PER_NODE CPUS_PER_TASK=$CPUS_PRE_TASK ./tools/slurm_train.sh llmit2 rtts-cascade_r50_1x configs/detection/rtts_dataset/cascade-rcnn_r50_fpn_1x_rtts-coco.py work_dirs/rtts/cascade-rcnn_r50_fpn_1x_rtts-coco --cfg-options default_hooks.checkpoint.max_keep_ckpts=1 randomness.seed=0 val_evaluator.outfile_prefix=work_dirs/rtts/cascade-rcnn_r50_fpn_1x_rtts-coco/eval_result > "$log_dir/rtts_cascade-rcnn_r50_fpn_1x_rtts-coco.log" & +SRUN_ARGS=${SRUN_ARGS} GPUS=$GPUS GPUS_PER_NODE=$GPUS_PER_NODE CPUS_PER_TASK=$CPUS_PRE_TASK ./tools/slurm_train.sh llmit2 rtts-faster_r50_1x configs/detection/rtts_dataset/faster-rcnn_r50_fpn_1x_rtts-coco.py work_dirs/rtts/faster-rcnn_r50_fpn_1x_rtts-coco --cfg-options default_hooks.checkpoint.max_keep_ckpts=1 randomness.seed=0 val_evaluator.outfile_prefix=work_dirs/rtts/faster-rcnn_r50_fpn_1x_rtts-coco/eval_result > "$log_dir/rtts_faster-rcnn_r50_fpn_1x_rtts-coco.log" & +SRUN_ARGS=${SRUN_ARGS} GPUS=$GPUS GPUS_PER_NODE=$GPUS_PER_NODE CPUS_PER_TASK=$CPUS_PRE_TASK ./tools/slurm_train.sh llmit2 rtts-fcos_r50_1x configs/detection/rtts_dataset/fcos_r50-caffe_fpn_gn-head_1x_rtts-coco.py work_dirs/rtts/fcos_r50-caffe_fpn_gn-head_1x_rtts-coco --cfg-options default_hooks.checkpoint.max_keep_ckpts=1 randomness.seed=0 val_evaluator.outfile_prefix=work_dirs/rtts/fcos_r50-caffe_fpn_gn-head_1x_rtts-coco/eval_result > "$log_dir/rtts_fcos_r50-caffe_fpn_gn-head_1x_rtts-coco.log" & +SRUN_ARGS=${SRUN_ARGS} GPUS=$GPUS GPUS_PER_NODE=$GPUS_PER_NODE CPUS_PER_TASK=$CPUS_PRE_TASK ./tools/slurm_train.sh llmit2 rtts-paa_r50_1x configs/detection/rtts_dataset/paa_r50_fpn_1x_rtts-coco.py work_dirs/rtts/paa_r50_fpn_1x_rtts-coco --cfg-options default_hooks.checkpoint.max_keep_ckpts=1 randomness.seed=0 val_evaluator.outfile_prefix=work_dirs/rtts/paa_r50_fpn_1x_rtts-coco/eval_result > "$log_dir/rtts_paa_r50_fpn_1x_rtts-coco.log" & +SRUN_ARGS=${SRUN_ARGS} GPUS=$GPUS GPUS_PER_NODE=$GPUS_PER_NODE CPUS_PER_TASK=$CPUS_PRE_TASK ./tools/slurm_train.sh llmit2 rtts-retinanet_r50_1x configs/detection/rtts_dataset/retinanet_r50_fpn_1x_rtts-coco.py work_dirs/rtts/retinanet_r50_fpn_1x_rtts-coco --cfg-options default_hooks.checkpoint.max_keep_ckpts=1 randomness.seed=0 val_evaluator.outfile_prefix=work_dirs/rtts/retinanet_r50_fpn_1x_rtts-coco/eval_result > "$log_dir/rtts_retinanet_r50_fpn_1x_rtts-coco.log" & +SRUN_ARGS=${SRUN_ARGS} GPUS=$GPUS GPUS_PER_NODE=$GPUS_PER_NODE CPUS_PER_TASK=$CPUS_PRE_TASK ./tools/slurm_train.sh llmit2 rtts-tood_r50_1x configs/detection/rtts_dataset/tood_r50_fpn_1x_rtts-coco.py work_dirs/rtts/tood_r50_fpn_1x_rtts-coco --cfg-options default_hooks.checkpoint.max_keep_ckpts=1 randomness.seed=0 val_evaluator.outfile_prefix=work_dirs/rtts/tood_r50_fpn_1x_rtts-coco/eval_result > "$log_dir/rtts_tood_r50_fpn_1x_rtts-coco.log" & diff --git a/configs/detection/rtts_dataset/README.md b/configs/detection/rtts_dataset/README.md new file mode 100644 index 0000000..e69de29 diff --git a/configs/detection/rtts_dataset/atss_r50_fpn_1x_rtts-coco.py b/configs/detection/rtts_dataset/atss_r50_fpn_1x_rtts-coco.py new file mode 100644 index 0000000..a274ead --- /dev/null +++ b/configs/detection/rtts_dataset/atss_r50_fpn_1x_rtts-coco.py @@ -0,0 +1,86 @@ +_base_ = [ + '../_base_/datasets/rtts_coco.py', '../_base_/schedules/schedule_1x.py', + '../_base_/default_runtime.py' +] + +# model settings +model = dict( + type='ATSS', + data_preprocessor=dict( + type='DetDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_size_divisor=32), + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5), + bbox_head=dict( + type='ATSSHead', + num_classes=5, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + octave_base_scale=8, + scales_per_octave=1, + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=2.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + # training and testing settings + train_cfg=dict( + assigner=dict(type='ATSSAssigner', topk=9), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) + +# optimizer +optim_wrapper = dict( + optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) + +# add WandbVisBackend +# vis_backends = [ +# dict(type='LocalVisBackend'), +# dict(type='WandbVisBackend', +# init_kwargs=dict( +# project='rtts_detection', +# name='atss_r50_fpn_1x_rtts', +# entity='lqit', +# ) +# ) +# ] +# visualizer = dict( +# type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') diff --git a/configs/detection/rtts_dataset/cascade-rcnn_r50_fpn_1x_rtts-coco.py b/configs/detection/rtts_dataset/cascade-rcnn_r50_fpn_1x_rtts-coco.py new file mode 100644 index 0000000..b575435 --- /dev/null +++ b/configs/detection/rtts_dataset/cascade-rcnn_r50_fpn_1x_rtts-coco.py @@ -0,0 +1,204 @@ +_base_ = [ + '../_base_/datasets/rtts_coco.py', '../_base_/schedules/schedule_1x.py', + '../_base_/default_runtime.py' +] + +# model settings +model = dict( + type='CascadeRCNN', + data_preprocessor=dict( + type='DetDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_size_divisor=32), + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + roi_head=dict( + type='CascadeRoIHead', + num_stages=3, + stage_loss_weights=[1, 0.5, 0.25], + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=[ + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=5, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=5, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=5, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) + ]), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=[ + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.6, + neg_iou_thr=0.6, + min_pos_iou=0.6, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.7, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False) + ]), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100))) + +# add WandbVisBackend +# vis_backends = [ +# dict(type='LocalVisBackend'), +# dict(type='WandbVisBackend', +# init_kwargs=dict( +# project='rtts_detection', +# name='cascade-rcnn_r50_fpn_1x_rtts', +# entity='lqit', +# ) +# ) +# ] +# visualizer = dict( +# type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') diff --git a/configs/detection/rtts_dataset/faster-rcnn_r50_fpn_1x_rtts-coco.py b/configs/detection/rtts_dataset/faster-rcnn_r50_fpn_1x_rtts-coco.py new file mode 100644 index 0000000..02f15e5 --- /dev/null +++ b/configs/detection/rtts_dataset/faster-rcnn_r50_fpn_1x_rtts-coco.py @@ -0,0 +1,133 @@ +_base_ = [ + '../_base_/datasets/rtts_coco.py', '../_base_/schedules/schedule_1x.py', + '../_base_/default_runtime.py' +] + +# model settings +model = dict( + type='FasterRCNN', + data_preprocessor=dict( + type='DetDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_size_divisor=32), + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=5, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100) + # soft-nms is also supported for rcnn testing + # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) + )) + +# add WandbVisBackend +# vis_backends = [ +# dict(type='LocalVisBackend'), +# dict(type='WandbVisBackend', +# init_kwargs=dict( +# project='rtts_detection', +# name='faster-rcnn_r50_fpn_1x_rtts', +# entity='lqit', +# ) +# ) +# ] +# visualizer = dict( +# type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') diff --git a/configs/detection/rtts_dataset/fcos_r50-caffe_fpn_gn-head_1x_rtts-coco.py b/configs/detection/rtts_dataset/fcos_r50-caffe_fpn_gn-head_1x_rtts-coco.py new file mode 100644 index 0000000..b57a75a --- /dev/null +++ b/configs/detection/rtts_dataset/fcos_r50-caffe_fpn_gn-head_1x_rtts-coco.py @@ -0,0 +1,91 @@ +_base_ = [ + '../_base_/datasets/rtts_coco.py', '../_base_/schedules/schedule_1x.py', + '../_base_/default_runtime.py' +] + +# model settings +model = dict( + type='FCOS', + data_preprocessor=dict( + type='DetDataPreprocessor', + mean=[102.9801, 115.9465, 122.7717], + std=[1.0, 1.0, 1.0], + bgr_to_rgb=False, + pad_size_divisor=32), + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron/resnet50_caffe')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', # use P5 + num_outs=5, + relu_before_extra_convs=True), + bbox_head=dict( + type='FCOSHead', + num_classes=5, + in_channels=256, + stacked_convs=4, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='IoULoss', loss_weight=1.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + # testing settings + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=0, + end=12, + by_epoch=True, + milestones=[8, 11], + gamma=0.1) +] + +# optimizer +optim_wrapper = dict( + optimizer=dict(lr=0.01), + paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.), + clip_grad=dict(max_norm=35, norm_type=2)) # loss may NaN without clip_grad + +# add WandbVisBackend +# vis_backends = [ +# dict(type='LocalVisBackend'), +# dict(type='WandbVisBackend', +# init_kwargs=dict( +# project='rtts_detection', +# name='fcos_r50-caffe_fpn_gn-head_1x_rtts', +# entity='lqit', +# ) +# ) +# ] +# visualizer = dict( +# type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') diff --git a/configs/detection/rtts_dataset/paa_r50_fpn_1x_rtts.py b/configs/detection/rtts_dataset/paa_r50_fpn_1x_rtts-coco.py similarity index 100% rename from configs/detection/rtts_dataset/paa_r50_fpn_1x_rtts.py rename to configs/detection/rtts_dataset/paa_r50_fpn_1x_rtts-coco.py diff --git a/configs/detection/rtts_dataset/retinanet_r50_fpn_1x_rtts-coco.py b/configs/detection/rtts_dataset/retinanet_r50_fpn_1x_rtts-coco.py new file mode 100644 index 0000000..5d6c8ba --- /dev/null +++ b/configs/detection/rtts_dataset/retinanet_r50_fpn_1x_rtts-coco.py @@ -0,0 +1,105 @@ +_base_ = [ + '../_base_/datasets/rtts_coco.py', '../_base_/schedules/schedule_1x.py', + '../_base_/default_runtime.py' +] + +# model settings +model = dict( + type='RetinaNet', + data_preprocessor=dict( + type='DetDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_size_divisor=32), + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_input', + num_outs=5), + bbox_head=dict( + type='RetinaHead', + num_classes=5, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + # model training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1), + sampler=dict( + type='PseudoSampler'), # Focal loss should use PseudoSampler + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100)) + +# optimizer +optim_wrapper = dict( + optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001), + clip_grad=dict(max_norm=35, norm_type=2)) # loss may NaN without clip_grad + +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=0, + end=12, + by_epoch=True, + milestones=[8, 11], + gamma=0.1) +] + +# add WandbVisBackend +# vis_backends = [ +# dict(type='LocalVisBackend'), +# dict(type='WandbVisBackend', +# init_kwargs=dict( +# project='rtts_detection', +# name='retinanet_r50_fpn_1x_rtts', +# entity='lqit', +# ) +# ) +# ] +# visualizer = dict( +# type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') diff --git a/configs/detection/rtts_dataset/tood_r50_fpn_1x_rtts-coco.py b/configs/detection/rtts_dataset/tood_r50_fpn_1x_rtts-coco.py new file mode 100644 index 0000000..7f41d2a --- /dev/null +++ b/configs/detection/rtts_dataset/tood_r50_fpn_1x_rtts-coco.py @@ -0,0 +1,95 @@ +_base_ = [ + '../_base_/datasets/rtts_coco.py', '../_base_/schedules/schedule_1x.py', + '../_base_/default_runtime.py' +] + +# model settings +model = dict( + type='TOOD', + data_preprocessor=dict( + type='DetDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_size_divisor=32), + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5), + bbox_head=dict( + type='TOODHead', + num_classes=5, + in_channels=256, + stacked_convs=6, + feat_channels=256, + anchor_type='anchor_free', + anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + octave_base_scale=8, + scales_per_octave=1, + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2]), + initial_loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + activated=True, # use probability instead of logit as input + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_cls=dict( + type='QualityFocalLoss', + use_sigmoid=True, + activated=True, # use probability instead of logit as input + beta=2.0, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=2.0)), + train_cfg=dict( + initial_epoch=4, + initial_assigner=dict(type='ATSSAssigner', topk=9), + assigner=dict(type='TaskAlignedAssigner', topk=13), + alpha=1, + beta=6, + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) + +# optimizer +optim_wrapper = dict( + optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) + +# add WandbVisBackend +# vis_backends = [ +# dict(type='LocalVisBackend'), +# dict(type='WandbVisBackend', +# init_kwargs=dict( +# project='rtts_detection', +# name='tood_r50_fpn_1x_rtts', +# entity='lqit', +# ) +# ) +# ] +# visualizer = dict( +# type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')