support h2rboxv2p

yangxue0827 · Jun 13, 2024 · 2c68020 · 2c68020
1 parent 3d8d493
commit 2c68020
Show file tree

Hide file tree

Showing 47 changed files with 3,008 additions and 52 deletions.
diff --git a/README.md b/README.md
@@ -54,7 +54,7 @@ If you find this work helpful for your research, please consider giving this rep
 ```bibtex
 @article{li2024scene,
   title={Scene Graph Generation in Large-Size VHR Satellite Imagery: A Large-Scale Dataset and A Context-Aware Approach},
-  author={L1, Yansheng and Wang, Linlin and Wang, Tingzhu and Yang, Xue and Wang, Qi and Sun, Xian and Wang, Wenbin and Luo, Junwei and Deng, Youming and Li, Haifeng and Dang, Bo and Zhang, Yongjun and Yan Junchi},
+  author={L1, Yansheng and Wang, Linlin and Wang, Tingzhu and Yang, Xue and Wang, Qi and Sun, Xian and Wang, Wenbin and Luo, Junwei and Deng, Youming and Li, Haifeng and Dang, Bo and Zhang, Yongjun and Yi, Yu and Yan, Junchi},
   journal={arXiv preprint arXiv:},
   year={2024}
 }

diff --git a/configs/ars_detr/csl_detr_r50_1x_rsg.py b/configs/ars_detr/csl_detr_r50_1x_rsg.py
@@ -6,7 +6,7 @@
     '../_base_/default_runtime.py'
 ]
 model = dict(
-    type='ARSDETR',
+    type='ARSDETRCrop',
     backbone=dict(
         type='ResNet',
         depth=50,

diff --git a/configs/ars_detr/deformable_detr_r50_rsg.py b/configs/ars_detr/deformable_detr_r50_rsg.py
@@ -5,7 +5,7 @@
     '../_base_/default_runtime.py'
 ]
 model = dict(
-    type='RotatedDeformableDETR',
+    type='RotatedDeformableDETRCrop',
     backbone=dict(
         type='ResNet',
         depth=50,

diff --git a/configs/gliding_vertex/gliding_vertex_r50_fpn_1x_rsg_le90.py b/configs/gliding_vertex/gliding_vertex_r50_fpn_1x_rsg_le90.py
@@ -5,7 +5,7 @@
 
 angle_version = 'le90'
 model = dict(
-    type='GlidingVertex',
+    type='GlidingVertexCrop',
     backbone=dict(
         type='ResNet',
         depth=50,

diff --git a/configs/gliding_vertex/gliding_vertex_r50_fpn_3x_rsg_le90.py b/configs/gliding_vertex/gliding_vertex_r50_fpn_3x_rsg_le90.py
@@ -5,7 +5,7 @@
 
 angle_version = 'le90'
 model = dict(
-    type='GlidingVertex',
+    type='GlidingVertexCrop',
     backbone=dict(
         type='ResNet',
         depth=50,

diff --git a/configs/h2rbox/h2rbox_r50_fpn_1x_rsg_le90.py b/configs/h2rbox/h2rbox_r50_fpn_1x_rsg_le90.py
@@ -7,7 +7,7 @@
 
 # model settings
 model = dict(
-    type='H2RBox',
+    type='H2RBoxCrop',
     crop_size=(1024, 1024),
     backbone=dict(
         type='ResNet',

diff --git a/configs/h2rbox/h2rbox_r50_fpn_3x_rsg_le90.py b/configs/h2rbox/h2rbox_r50_fpn_3x_rsg_le90.py
@@ -7,7 +7,7 @@
 
 # model settings
 model = dict(
-    type='H2RBox',
+    type='H2RBoxCrop',
     crop_size=(1024, 1024),
     backbone=dict(
         type='ResNet',

diff --git a/configs/h2rbox_v2p/h2rbox_v2p_r50_fpn_1x_dota_le90.py b/configs/h2rbox_v2p/h2rbox_v2p_r50_fpn_1x_dota_le90.py
@@ -0,0 +1,91 @@
+_base_ = [
+    '../_base_/datasets/dotav1.py', '../_base_/schedules/schedule_1x.py',
+    '../_base_/default_runtime.py'
+]
+angle_version = 'le90'
+
+# model settings
+model = dict(
+    type='H2RBoxV2PDetector',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        zero_init_residual=False,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',  # use P5
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='H2RBoxV2PHead',
+        num_classes=15,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        strides=[8, 16, 32, 64, 128],
+        center_sampling=True,
+        center_sample_radius=1.5,
+        norm_on_bbox=True,
+        centerness_on_reg=True,
+        square_cls=[1, 9, 11],
+        resize_cls=[1],
+        scale_angle=False,
+        bbox_coder=dict(
+            type='DistanceAnglePointCoder', angle_version=angle_version),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='IoULoss', loss_weight=1.0),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_ss_symmetry=dict(
+            type='SmoothL1Loss', loss_weight=0.2, beta=0.1)),
+    # training and testing settings
+    train_cfg=None,
+    test_cfg=dict(
+        nms_pre=2000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(iou_thr=0.1),
+        max_per_img=2000))
+
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='RResize', img_scale=(1024, 1024)),
+    dict(
+        type='RRandomFlip',
+        flip_ratio=[0.25, 0.25, 0.25],
+        direction=['horizontal', 'vertical', 'diagonal'],
+        version=angle_version),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
+]
+data = dict(
+    train=dict(pipeline=train_pipeline, version=angle_version),
+    val=dict(version=angle_version),
+    test=dict(version=angle_version))
+
+optimizer = dict(
+    _delete_=True,
+    type='AdamW',
+    lr=0.00005,
+    betas=(0.9, 0.999),
+    weight_decay=0.05)
diff --git a/configs/h2rbox_v2p/h2rbox_v2p_r50_fpn_1x_rsg_le90.py b/configs/h2rbox_v2p/h2rbox_v2p_r50_fpn_1x_rsg_le90.py
@@ -0,0 +1,125 @@
+_base_ = [
+    '../_base_/datasets/rsg.py', '../_base_/schedules/schedule_1x.py',
+    '../_base_/default_runtime.py'
+]
+angle_version = 'le90'
+
+# model settings
+model = dict(
+    type='H2RBoxV2PDetectorCrop',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        zero_init_residual=False,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',  # use P5
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='H2RBoxV2PHead',
+        num_classes=48,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        strides=[8, 16, 32, 64, 128],
+        center_sampling=True,
+        center_sample_radius=1.5,
+        norm_on_bbox=True,
+        centerness_on_reg=True,
+        square_cls=[4, 44],
+        # resize_cls=[1],
+        scale_angle=False,
+        bbox_coder=dict(
+            type='DistanceAnglePointCoder', angle_version=angle_version),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='IoULoss', loss_weight=1.0),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_ss_symmetry=dict(
+            type='SmoothL1Loss', loss_weight=0.2, beta=0.1)),
+    # training and testing settings
+    train_cfg=None,
+    test_cfg=dict(
+        nms_pre=2000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(iou_thr=0.1),
+        max_per_img=2000))
+
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='RResize', img_scale=(1024, 1024)),
+    dict(
+        type='RRandomFlip',
+        flip_ratio=[0.25, 0.25, 0.25],
+        direction=['horizontal', 'vertical', 'diagonal'],
+        version=angle_version),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
+]
+
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        scale_factor=1.0,
+        flip=False,
+        transforms=[
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=64),
+            dict(type='DefaultFormatBundle'),
+            dict(type='Collect', keys=['img'])
+        ])
+]
+
+data_root = 'data/RSG/'
+data = dict(
+    train=dict(type='RSGWSOODDataset', pipeline=train_pipeline,
+               ann_file=data_root + 'train/annfiles/',
+               img_prefix=data_root + 'train/images/',
+               version=angle_version),
+    val=dict(type='RSGWSOODDataset', pipeline=test_pipeline,
+             ann_file=data_root + 'test/annfiles/',
+             img_prefix=data_root + 'test/images/',
+             version=angle_version),
+    test=dict(type='RSGWSOODDataset', pipeline=test_pipeline,
+              ann_file=data_root + 'test/annfiles/',
+              img_prefix=data_root + 'test/images/',
+              version=angle_version))
+
+data = dict(
+    train=dict(pipeline=train_pipeline, version=angle_version),
+    val=dict(version=angle_version),
+    test=dict(version=angle_version))
+
+optimizer = dict(
+    _delete_=True,
+    type='AdamW',
+    lr=0.0001,
+    betas=(0.9, 0.999),
+    weight_decay=0.05)
+
+checkpoint_config = dict(interval=1, max_keep_ckpts=1)
+evaluation = dict(interval=6, metric='mAP')
+