From bf4d2a5338ff9e2cc7a34f1f5e4199e3901b0b67 Mon Sep 17 00:00:00 2001 From: xin-li-67 Date: Thu, 20 Apr 2023 18:53:12 +0800 Subject: [PATCH 1/3] integrate init --- .../topdown_regression/README.md | 1 + .../td-reg_res50_8x64e-210e_wflw-256x256.py | 121 ++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 configs/face_2d_keypoint/topdown_regression/README.md create mode 100644 configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8x64e-210e_wflw-256x256.py diff --git a/configs/face_2d_keypoint/topdown_regression/README.md b/configs/face_2d_keypoint/topdown_regression/README.md new file mode 100644 index 0000000000..db638c4f10 --- /dev/null +++ b/configs/face_2d_keypoint/topdown_regression/README.md @@ -0,0 +1 @@ +# Top-down regression-based pose estimation diff --git a/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8x64e-210e_wflw-256x256.py b/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8x64e-210e_wflw-256x256.py new file mode 100644 index 0000000000..998f363b96 --- /dev/null +++ b/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8x64e-210e_wflw-256x256.py @@ -0,0 +1,121 @@ +_base_ = [ + '../../../_base_/default_runtime.py', '../../../_base_/datasets/wflw.py' +] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(256, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RegressionHead', + in_channels=2048, + num_joints=98, + loss=dict(type='SmoothL1Loss', use_target_weight=True), + decoder=codec), + train_cfg=dict(), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'WFLWDataset' +data_mode = 'topdown' +data_root = 'data/wflw/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', scale_factor=[0.25], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# dataloaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='greater')) + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator From 868eef4558f6445156dc4e663ff3d578cc9778ea Mon Sep 17 00:00:00 2001 From: xin-li-67 Date: Fri, 21 Apr 2023 13:18:53 +0800 Subject: [PATCH 2/3] 1. remove redundant base file 2. fix the bug in RandomBBoxTransform --- .../wflw/td-reg_res50_8x64e-210e_wflw-256x256.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8x64e-210e_wflw-256x256.py b/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8x64e-210e_wflw-256x256.py index 998f363b96..ecee86e66c 100644 --- a/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8x64e-210e_wflw-256x256.py +++ b/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8x64e-210e_wflw-256x256.py @@ -1,6 +1,4 @@ -_base_ = [ - '../../../_base_/default_runtime.py', '../../../_base_/datasets/wflw.py' -] +_base_ = ['../../../_base_/default_runtime.py'] # runtime train_cfg = dict(max_epochs=210, val_interval=1) @@ -67,7 +65,10 @@ dict(type='LoadImage'), dict(type='GetBBoxCenterScale'), dict(type='RandomFlip', direction='horizontal'), - dict(type='RandomBBoxTransform', scale_factor=[0.25], rotate_factor=80), + dict( + type='RandomBBoxTransform', + scale_factor=[0.75, 1.25], + rotate_factor=60), dict(type='TopdownAffine', input_size=codec['input_size']), dict(type='GenerateTarget', encoder=codec), dict(type='PackPoseInputs') From b163af0033fc28e580705dd96016309176ec42dc Mon Sep 17 00:00:00 2001 From: xin-li-67 Date: Fri, 21 Apr 2023 18:27:45 +0800 Subject: [PATCH 3/3] add readme and reset_wflw model intro --- .../topdown_regression/README.md | 16 +++++ .../topdown_regression/wflw/resnet_wflw.md | 58 +++++++++++++++++++ .../topdown_regression/wflw/resnet_wflw.yml | 21 +++++++ 3 files changed, 95 insertions(+) create mode 100644 configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.md create mode 100644 configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.yml diff --git a/configs/face_2d_keypoint/topdown_regression/README.md b/configs/face_2d_keypoint/topdown_regression/README.md index db638c4f10..e5550ae2a8 100644 --- a/configs/face_2d_keypoint/topdown_regression/README.md +++ b/configs/face_2d_keypoint/topdown_regression/README.md @@ -1 +1,17 @@ # Top-down regression-based pose estimation + +Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. At the 2nd stage, regression based methods directly regress the keypoint coordinates given the features extracted from the bounding box area, following the paradigm introduced in [Deeppose: Human pose estimation via deep neural networks](http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html). + +
+ +
+ +## Results and Models + +### WFLW Dataset + +Result on WFLW test set + +| Model | Input Size | NME*test* | NME*pose* | NME*illumination* | NME*occlusion* | NME*blur* | NME*makeup* | NME*expression* | ckpt | log | +| :--------- | :--------: | :------------------: | :------------------: | :--------------------------: | :-----------------------: | :------------------: | :--------------------: | :------------------------: | :--------: | :-------: | +| [ResNet-50](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8x64e-210e_wflw-256x256.py) | 256x256 | 4.85 | 8.50 | 4.81 | 5.69 | 5.45 | 4.82 | 5.20 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256-92d0ba7f_20210303.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_20210303.log.json) | diff --git a/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.md b/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.md new file mode 100644 index 0000000000..7dc1ddb2ce --- /dev/null +++ b/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.md @@ -0,0 +1,58 @@ + + +
+DeepPose (CVPR'2014) + +```bibtex +@inproceedings{toshev2014deeppose, + title={Deeppose: Human pose estimation via deep neural networks}, + author={Toshev, Alexander and Szegedy, Christian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={1653--1660}, + year={2014} +} +``` + +
+ + + +
+ResNet (CVPR'2016) + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +
+ + + +
+WFLW (CVPR'2018) + +```bibtex +@inproceedings{wu2018look, + title={Look at boundary: A boundary-aware face alignment algorithm}, + author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={2129--2138}, + year={2018} +} +``` + +
+ +Results on WFLW dataset + +The model is trained on WFLW train. + +| Arch | Input Size | NME*test* | NME*pose* | NME*illumination* | NME*occlusion* | NME*blur* | NME*makeup* | NME*expression* | ckpt | log | +| :--------- | :--------: | :------------------: | :------------------: | :--------------------------: | :-----------------------: | :------------------: | :--------------------: | :------------------------: | :--------: | :-------: | +| [deeppose_res50](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8x64e-210e_wflw-256x256.py) | 256x256 | 4.85 | 8.50 | 4.81 | 5.69 | 5.45 | 4.82 | 5.20 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256-92d0ba7f_20210303.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_20210303.log.json) | diff --git a/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.yml b/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.yml new file mode 100644 index 0000000000..5ee39e9446 --- /dev/null +++ b/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.yml @@ -0,0 +1,21 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8x64e-210e_wflw-256x256.py + In Collection: ResNet + Metadata: + Architecture: + - DeepPose + - ResNet + Training Data: WFLW + Name: td-reg_res50_8x64e-210e_wflw-256x256 + Results: + - Dataset: WFLW + Metrics: + NME blur: 5.45 + NME expression: 5.2 + NME illumination: 4.81 + NME makeup: 4.82 + NME occlusion: 5.69 + NME pose: 8.5 + NME test: 4.85 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256-92d0ba7f_20210303.pth