diff --git a/configs/face_2d_keypoint/topdown_regression/README.md b/configs/face_2d_keypoint/topdown_regression/README.md index fef409acd3..b410a5e59c 100644 --- a/configs/face_2d_keypoint/topdown_regression/README.md +++ b/configs/face_2d_keypoint/topdown_regression/README.md @@ -15,3 +15,4 @@ Result on WFLW test set | Model | Input Size | NME | ckpt | log | | :-------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :-----------------------------------------------------------: | | [ResNet-50](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8xb64-210e_wflw-256x256.py) | 256x256 | 4.88 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256-92d0ba7f_20210303.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_20210303.log.json) | +| [ResNet-50+SoftWingLoss](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py) | 256x256 | 4.67 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss-4d34f22a_20211212.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss_20211212.log.json) | diff --git a/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.md b/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.md new file mode 100644 index 0000000000..f1d9629d0a --- /dev/null +++ b/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.md @@ -0,0 +1,75 @@ + + +
+DeepPose (CVPR'2014) + +```bibtex +@inproceedings{toshev2014deeppose, + title={Deeppose: Human pose estimation via deep neural networks}, + author={Toshev, Alexander and Szegedy, Christian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={1653--1660}, + year={2014} +} +``` + +
+ + + +
+ResNet (CVPR'2016) + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +
+ + + +
+SoftWingloss (TIP'2021) + +```bibtex +@article{lin2021structure, + title={Structure-Coherent Deep Feature Learning for Robust Face Alignment}, + author={Lin, Chunze and Zhu, Beier and Wang, Quan and Liao, Renjie and Qian, Chen and Lu, Jiwen and Zhou, Jie}, + journal={IEEE Transactions on Image Processing}, + year={2021}, + publisher={IEEE} +} +``` + +
+ + + +
+WFLW (CVPR'2018) + +```bibtex +@inproceedings{wu2018look, + title={Look at boundary: A boundary-aware face alignment algorithm}, + author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={2129--2138}, + year={2018} +} +``` + +
+ +Results on WFLW dataset + +The model is trained on WFLW train set. + +| Model | Input Size | NME | ckpt | log | +| :-------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :-----------------------------------------------------------: | +| [ResNet-50+SoftWingLoss](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py) | 256x256 | 4.44 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss-4d34f22a_20211212.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss_20211212.log.json) | diff --git a/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.yml b/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.yml new file mode 100644 index 0000000000..7c65215ccc --- /dev/null +++ b/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.yml @@ -0,0 +1,16 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py + In Collection: ResNet + Metadata: + Architecture: + - DeepPose + - ResNet + - SoftWingloss + Training Data: WFLW + Name: td-reg_res50_softwingloss_8xb64-210e_wflw-256x256 + Results: + - Dataset: WFLW + Metrics: + NME: 4.44 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss-4d34f22a_20211212.pth diff --git a/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py b/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py new file mode 100644 index 0000000000..eb4199073d --- /dev/null +++ b/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(256, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RegressionHead', + in_channels=2048, + num_joints=98, + loss=dict(type='SoftWingLoss', use_target_weight=True), + decoder=codec), + train_cfg=dict(), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'WFLWDataset' +data_mode = 'topdown' +data_root = 'data/wflw/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# dataloaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less')) + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator