From b146957ca8d37b9262da22f0385166b51cce90d9 Mon Sep 17 00:00:00 2001
From: Xin Li <7219519+xin-li-67@users.noreply.github.com>
Date: Mon, 24 Apr 2023 11:18:05 +0800
Subject: [PATCH] [MMSIG-87] Migrate SoftWingLoss config to 1.x (#2287)
---
.../topdown_regression/README.md | 1 +
.../wflw/resnet_softwingloss_wflw.md | 75 +++++++++++
.../wflw/resnet_softwingloss_wflw.yml | 16 +++
...50_softwingloss_8xb64-210e_wflw-256x256.py | 122 ++++++++++++++++++
4 files changed, 214 insertions(+)
create mode 100644 configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.md
create mode 100644 configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.yml
create mode 100644 configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py
diff --git a/configs/face_2d_keypoint/topdown_regression/README.md b/configs/face_2d_keypoint/topdown_regression/README.md
index fef409acd3..b410a5e59c 100644
--- a/configs/face_2d_keypoint/topdown_regression/README.md
+++ b/configs/face_2d_keypoint/topdown_regression/README.md
@@ -15,3 +15,4 @@ Result on WFLW test set
| Model | Input Size | NME | ckpt | log |
| :-------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :-----------------------------------------------------------: |
| [ResNet-50](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8xb64-210e_wflw-256x256.py) | 256x256 | 4.88 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256-92d0ba7f_20210303.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_20210303.log.json) |
+| [ResNet-50+SoftWingLoss](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py) | 256x256 | 4.67 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss-4d34f22a_20211212.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss_20211212.log.json) |
diff --git a/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.md b/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.md
new file mode 100644
index 0000000000..f1d9629d0a
--- /dev/null
+++ b/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.md
@@ -0,0 +1,75 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+SoftWingloss (TIP'2021)
+
+```bibtex
+@article{lin2021structure,
+ title={Structure-Coherent Deep Feature Learning for Robust Face Alignment},
+ author={Lin, Chunze and Zhu, Beier and Wang, Quan and Liao, Renjie and Qian, Chen and Lu, Jiwen and Zhou, Jie},
+ journal={IEEE Transactions on Image Processing},
+ year={2021},
+ publisher={IEEE}
+}
+```
+
+
+
+
+
+
+WFLW (CVPR'2018)
+
+```bibtex
+@inproceedings{wu2018look,
+ title={Look at boundary: A boundary-aware face alignment algorithm},
+ author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2129--2138},
+ year={2018}
+}
+```
+
+
+
+Results on WFLW dataset
+
+The model is trained on WFLW train set.
+
+| Model | Input Size | NME | ckpt | log |
+| :-------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :-----------------------------------------------------------: |
+| [ResNet-50+SoftWingLoss](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py) | 256x256 | 4.44 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss-4d34f22a_20211212.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss_20211212.log.json) |
diff --git a/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.yml b/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.yml
new file mode 100644
index 0000000000..7c65215ccc
--- /dev/null
+++ b/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.yml
@@ -0,0 +1,16 @@
+Models:
+- Config: configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py
+ In Collection: ResNet
+ Metadata:
+ Architecture:
+ - DeepPose
+ - ResNet
+ - SoftWingloss
+ Training Data: WFLW
+ Name: td-reg_res50_softwingloss_8xb64-210e_wflw-256x256
+ Results:
+ - Dataset: WFLW
+ Metrics:
+ NME: 4.44
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss-4d34f22a_20211212.pth
diff --git a/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py b/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py
new file mode 100644
index 0000000000..eb4199073d
--- /dev/null
+++ b/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(256, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RegressionHead',
+ in_channels=2048,
+ num_joints=98,
+ loss=dict(type='SoftWingLoss', use_target_weight=True),
+ decoder=codec),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'WFLWDataset'
+data_mode = 'topdown'
+data_root = 'data/wflw/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# dataloaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_wflw_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_wflw_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less'))
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator