From 93119ae8d850c9fba56adf07c11212070bdfbd17 Mon Sep 17 00:00:00 2001 From: Tau Date: Sun, 14 May 2023 23:40:36 +0800 Subject: [PATCH] [Feature] Upload RTMPose Hand (alpha version) (#2353) --- configs/face_2d_keypoint/rtmpose/README.md | 2 +- .../rtmpose/lapa/rtmpose_lapa.md | 2 +- .../rtmpose-m_8xb256-210e_hand5-256x256.py | 381 ++++++++++++++++++ .../rtmpose/hand5/rtmpose_hand5.md | 67 +++ .../rtmpose/hand5/rtmpose_hand5.yml | 27 ++ projects/rtmpose/README.md | 42 +- projects/rtmpose/README_CN.md | 44 +- .../rtmdet/hand/rtmdet_nano_320-8xb32_hand.py | 171 ++++++++ 8 files changed, 721 insertions(+), 15 deletions(-) create mode 100644 configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py create mode 100644 configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.md create mode 100644 configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.yml create mode 100644 projects/rtmpose/rtmdet/hand/rtmdet_nano_320-8xb32_hand.py diff --git a/configs/face_2d_keypoint/rtmpose/README.md b/configs/face_2d_keypoint/rtmpose/README.md index 5381e966f6..d0c7f55fb4 100644 --- a/configs/face_2d_keypoint/rtmpose/README.md +++ b/configs/face_2d_keypoint/rtmpose/README.md @@ -29,4 +29,4 @@ Results on LaPa dataset | Model | Input Size | NME | Details and Download | | :-------: | :--------: | :--: | :---------------------------------------: | -| RTMPose-m | 256x256 | 1.29 | [rtmpose_lapa.md](./wflw/rtmpose_lapa.md) | +| RTMPose-m | 256x256 | 1.29 | [rtmpose_lapa.md](./lapa/rtmpose_lapa.md) | diff --git a/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md b/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md index 62a3f25157..9638de7551 100644 --- a/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md +++ b/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md @@ -33,7 +33,7 @@ -Results on COCO-WholeBody-Face val set +Results on LaPa val set | Arch | Input Size | NME | ckpt | log | | :------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :------------------------------------------------------------: | diff --git a/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py b/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py new file mode 100644 index 0000000000..689dc68096 --- /dev/null +++ b/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py @@ -0,0 +1,381 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# coco-hand onehand10k freihand2d rhd2d halpehand + +# runtime +max_epochs = 210 +stage2_num_epochs = 10 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(256, 256), + sigma=(5.66, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=21, + input_size=codec['input_size'], + in_featuremap_size=(8, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoWholeBodyHandDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + # dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.5, 1.5], + rotate_factor=180), + dict(type='RandomFlip', direction='horizontal'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + # dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=180), + dict(type='RandomFlip', direction='horizontal'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.2), + dict(type='MedianBlur', p=0.2), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# train datasets +dataset_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[], +) + +dataset_onehand10k = dict( + type='OneHand10KDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='onehand10k/annotations/onehand10k_train.json', + data_prefix=dict(img='pose/OneHand10K/'), + pipeline=[], +) + +dataset_freihand = dict( + type='FreiHandDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='freihand/annotations/freihand_train.json', + data_prefix=dict(img='pose/FreiHand/'), + pipeline=[], +) + +dataset_rhd = dict( + type='Rhd2DDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='rhd/annotations/rhd_train.json', + data_prefix=dict(img='pose/RHD/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=21, + mapping=[ + (0, 0), + (1, 4), + (2, 3), + (3, 2), + (4, 1), + (5, 8), + (6, 7), + (7, 6), + (8, 5), + (9, 12), + (10, 11), + (11, 10), + (12, 9), + (13, 16), + (14, 15), + (15, 14), + (16, 13), + (17, 20), + (18, 19), + (19, 18), + (20, 17), + ]) + ], +) + +dataset_halpehand = dict( + type='HalpeHandDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='halpe/annotations/halpe_train_v1.json', + data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015/'), + pipeline=[], +) + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict( + from_file='configs/_base_/datasets/coco_wholebody_hand.py'), + datasets=[ + dataset_coco, dataset_onehand10k, dataset_freihand, dataset_rhd, + dataset_halpehand + ], + pipeline=train_pipeline, + test_mode=False, + )) + +# test datasets +val_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='detection/coco/val2017/'), + pipeline=[], +) + +val_onehand10k = dict( + type='OneHand10KDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='onehand10k/annotations/onehand10k_test.json', + data_prefix=dict(img='pose/OneHand10K/'), + pipeline=[], +) + +val_freihand = dict( + type='FreiHandDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='freihand/annotations/freihand_test.json', + data_prefix=dict(img='pose/FreiHand/'), + pipeline=[], +) + +val_rhd = dict( + type='Rhd2DDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='rhd/annotations/rhd_test.json', + data_prefix=dict(img='pose/RHD/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=21, + mapping=[ + (0, 0), + (1, 4), + (2, 3), + (3, 2), + (4, 1), + (5, 8), + (6, 7), + (7, 6), + (8, 5), + (9, 12), + (10, 11), + (11, 10), + (12, 9), + (13, 16), + (14, 15), + (15, 14), + (16, 13), + (17, 20), + (18, 19), + (19, 18), + (20, 17), + ]) + ], +) + +val_halpehand = dict( + type='HalpeHandDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='halpe/annotations/halpe_val_v1.json', + data_prefix=dict(img='detection/coco/val2017/'), + pipeline=[], +) + +test_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type='CombinedDataset', + metainfo=dict( + from_file='configs/_base_/datasets/coco_wholebody_hand.py'), + datasets=[ + val_coco, val_onehand10k, val_freihand, val_rhd, val_halpehand + ], + pipeline=val_pipeline, + test_mode=True, + )) + +val_dataloader = test_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE') +] +test_evaluator = val_evaluator diff --git a/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.md b/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.md new file mode 100644 index 0000000000..361770dad2 --- /dev/null +++ b/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.md @@ -0,0 +1,67 @@ + + +
+RTMPose (arXiv'2023) + +```bibtex +@misc{https://doi.org/10.48550/arxiv.2303.07399, + doi = {10.48550/ARXIV.2303.07399}, + url = {https://arxiv.org/abs/2303.07399}, + author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose}, + publisher = {arXiv}, + year = {2023}, + copyright = {Creative Commons Attribution 4.0 International} +} + +``` + +
+ + + +
+RTMDet (arXiv'2022) + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +
+ + + +
+COCO (ECCV'2014) + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +
+ +- `Hand5` and `*` denote model trained on 5 public datasets: + - [COCO-Wholebody-Hand](https://github.com/jin-s13/COCO-WholeBody/) + - [OneHand10K](https://www.yangangwang.com/papers/WANG-MCC-2018-10.html) + - [FreiHand2d](https://lmb.informatik.uni-freiburg.de/projects/freihand/) + - [RHD2d](https://lmb.informatik.uni-freiburg.de/resources/datasets/RenderedHandposeDataset.en.html) + - [Halpe](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_wholebody_keypoint.html#halpe) + +| Config | Input Size | PCK@0.2
(COCO-Wholebody-Hand) | PCK@0.2
(Hand5) | AUC
(Hand5) | EPE
(Hand5) | FLOPS(G) | Download | +| :---------------------------------------: | :--------: | :-----------------------------------: | :---------------------: | :-----------------: | :-----------------: | :------: | :-----------------------------------------: | +| [RTMPose-m\*
(alpha version)](./rtmpose/hand_2d_keypoint/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 81.5 | 96.4 | 83.9 | 5.06 | 2.581 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth) | diff --git a/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.yml b/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.yml new file mode 100644 index 0000000000..a8dfd42e39 --- /dev/null +++ b/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.yml @@ -0,0 +1,27 @@ +Collections: +- Name: RTMPose + Paper: + Title: "RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose" + URL: https://arxiv.org/abs/2303.07399 + README: https://github.com/open-mmlab/mmpose/blob/main/projects/rtmpose/README.md +Models: +- Config: configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py + In Collection: RTMPose + Metadata: + Architecture: &id001 + - RTMPose + Training Data: &id002 + - COCO-Wholebody-Hand + - OneHand10K + - FreiHand2d + - RHD2d + - Halpe + Name: rtmpose-m_8xb256-210e_hand5-256x256 + Results: + - Dataset: Hand5 + Metrics: + PCK@0.2: 0.964 + AUC: 0.839 + EPE: 5.06 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth diff --git a/projects/rtmpose/README.md b/projects/rtmpose/README.md index de367448ed..cb4e742b7c 100644 --- a/projects/rtmpose/README.md +++ b/projects/rtmpose/README.md @@ -157,7 +157,8 @@ Feel free to join our community group for more help: ### Body 2d (17 Keypoints) -#### AIC+COCO +
+AIC+COCO | Config | Input Size | AP
(COCO) | PCK@0.1
(Body8) | AUC
(Body8) | EPE
(Body8) | Params(M) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | ncnn-FP16-Latency(ms)
(Snapdragon 865) | Download | | :---------------------------------------------------------------------------: | :--------: | :---------------: | :---------------------: | :-----------------: | :-----------------: | :-------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------: | @@ -168,7 +169,10 @@ Feel free to join our community group for more help: | [RTMPose-m](./rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py) | 384x288 | 77.0 | 94.32 | 69.85 | 14.64 | 13.72 | 4.33 | 24.78 | 3.66 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-384x288-a62a0b32_20230228.pth) | | [RTMPose-l](./rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py) | 384x288 | 77.3 | 94.54 | 70.14 | 14.30 | 27.79 | 9.35 | - | 6.05 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-384x288-97d6cb0f_20230228.pth) | -#### Body8 +
+ +
+Body8 - `*` denotes model trained on 7 public datasets: - [AI Challenger](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#aic) @@ -189,6 +193,8 @@ Feel free to join our community group for more help: | [RTMPose-m\*](./rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py) | 384x288 | 76.6 | 94.64 | 70.38 | 13.98 | 13.72 | 4.33 | 24.78 | 3.66 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth) | | [RTMPose-l\*](./rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py) | 384x288 | 78.3 | 95.36 | 71.58 | 13.08 | 27.79 | 9.35 | - | 6.05 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-body7_pt-body7_420e-384x288-3f5a1437_20230504.pth) | +
+ #### Model Pruning **Notes** @@ -225,15 +231,34 @@ For more details, please refer to [GroupFisher Pruning for RTMPose](./rtmpose/pr | :----------------------------------------------------------------------------: | :--------: | :----------------: | :------: | :--------------------------------: | :---------------------------------------: | :---------: | | [RTMPose-m (alpha version)](./rtmpose/face_2d_keypoint/rtmpose-m_8xb64-120e_lapa-256x256.py) | 256x256 | 1.70 | - | - | - | Coming soon | -### Hand 2d +### Hand 2d (21 Keypoints) + +| Detection Config | Input Size | Model AP
(OneHand10K) | Flops
(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Download | +| :------------------------------------: | :--------: | :---------------------------: | :---------------: | :--------------------------------: | :---------------------------------------: | :-----------------------------: | +| [RTMDet-nano
(alpha version)](./rtmdet/hand/rtmdet_nano_320-8xb32_hand.py) | 320x320 | 76.0 | 0.31 | - | - | [Det Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmdet_nano_8xb32-300e_hand-267f9c8f.pth) | -Coming soon +
+Hand5 + +- `Hand5` and `*` denote model trained on 5 public datasets: + - [COCO-Wholebody-Hand](https://github.com/jin-s13/COCO-WholeBody/) + - [OneHand10K](https://www.yangangwang.com/papers/WANG-MCC-2018-10.html) + - [FreiHand2d](https://lmb.informatik.uni-freiburg.de/projects/freihand/) + - [RHD2d](https://lmb.informatik.uni-freiburg.de/resources/datasets/RenderedHandposeDataset.en.html) + - [Halpe](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_wholebody_keypoint.html#halpe) + +| Config | Input Size | PCK@0.2
(COCO-Wholebody-Hand) | PCK@0.2
(Hand5) | AUC
(Hand5) | EPE
(Hand5) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Download | +| :-------------------------------------------------------------------------------------------------------------------: | :--------: | :-----------------------------------: | :---------------------: | :-----------------: | :-----------------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------: | +| [RTMPose-m\*
(alpha version)](./rtmpose/hand_2d_keypoint/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 81.5 | 96.4 | 83.9 | 5.06 | 2.581 | - | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth) | + +
### Pretrained Models We provide the UDP pretraining configs of the CSPNeXt backbone. Find more details in the [pretrain_cspnext_udp folder](./rtmpose/pretrain_cspnext_udp/). -#### AIC+COCO +
+AIC+COCO | Model | Input Size | Params(M) | Flops(G) | AP
(GT) | AR
(GT) | Download | | :----------: | :--------: | :-------: | :------: | :-------------: | :-------------: | :-----------------------------------------------------------------------------------------------------------------------------: | @@ -242,7 +267,10 @@ We provide the UDP pretraining configs of the CSPNeXt backbone. Find more detail | CSPNeXt-m | 256x192 | 17.53 | 3.05 | 74.8 | 77.7 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth) | | CSPNeXt-l | 256x192 | 32.44 | 5.32 | 77.2 | 79.9 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth) | -#### Body8 +
+ +
+Body8 - `*` denotes model trained on 7 public datasets: - [AI Challenger](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#aic) @@ -263,6 +291,8 @@ We provide the UDP pretraining configs of the CSPNeXt backbone. Find more detail | CSPNeXt-m\* | 384x288 | 17.53 | 6.86 | 75.8 | 97.60 | 70.18 | 14.04 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-body7_210e-384x288-b9bc2b57_20230504.pth) | | CSPNeXt-l\* | 384x288 | 32.44 | 11.96 | 77.2 | 97.89 | 71.23 | 13.05 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-l_udp-body7_210e-384x288-b15bc30d_20230504.pth) | +
+ #### ImageNet We also provide the ImageNet classification pre-trained weights of the CSPNeXt backbone. Find more details in [RTMDet](https://github.com/open-mmlab/mmdetection/blob/latest/configs/rtmdet/README.md#classification). diff --git a/projects/rtmpose/README_CN.md b/projects/rtmpose/README_CN.md index 10b3a4484b..72d8c08b7b 100644 --- a/projects/rtmpose/README_CN.md +++ b/projects/rtmpose/README_CN.md @@ -148,7 +148,8 @@ RTMPose 是一个长期优化迭代的项目,致力于业务场景下的高性 ### 人体 2d 关键点 (17 Keypoints) -#### AIC+COCO +
+AIC+COCO | Config | Input Size | AP
(COCO) | PCK@0.1
(Body8) | AUC
(Body8) | EPE
(Body8) | Params(M) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | ncnn-FP16-Latency(ms)
(Snapdragon 865) | Download | | :---------------------------------------------------------------------------: | :--------: | :---------------: | :---------------------: | :-----------------: | :-----------------: | :-------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------: | @@ -159,7 +160,10 @@ RTMPose 是一个长期优化迭代的项目,致力于业务场景下的高性 | [RTMPose-m](./rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py) | 384x288 | 77.0 | 94.32 | 69.85 | 14.64 | 13.72 | 4.33 | 24.78 | 3.66 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-384x288-a62a0b32_20230228.pth) | | [RTMPose-l](./rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py) | 384x288 | 77.3 | 94.54 | 70.14 | 14.30 | 27.79 | 9.35 | - | 6.05 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-384x288-97d6cb0f_20230228.pth) | -#### Body8 +
+ +
+Body8 - `*` 代表模型在 7 个开源数据集上训练得到: - [AI Challenger](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#aic) @@ -180,6 +184,8 @@ RTMPose 是一个长期优化迭代的项目,致力于业务场景下的高性 | [RTMPose-m\*](./rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py) | 384x288 | 76.6 | 94.64 | 70.38 | 13.98 | 13.72 | 4.33 | 24.78 | 3.66 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth) | | [RTMPose-l\*](./rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py) | 384x288 | 78.3 | 95.36 | 71.58 | 13.08 | 27.79 | 9.35 | - | 6.05 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-body7_pt-body7_420e-384x288-3f5a1437_20230504.pth) | +
+ #### 模型剪枝 **说明** @@ -214,17 +220,36 @@ RTMPose 是一个长期优化迭代的项目,致力于业务场景下的高性 | Config | Input Size | NME
(LaPa) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Download | | :----------------------------------------------------------------------------: | :--------: | :----------------: | :------: | :--------------------------------: | :---------------------------------------: | :---------: | -| [RTMPose-m (alpha version)](./rtmpose/face_2d_keypoint/rtmpose-m_8xb64-120e_lapa-256x256.py) | 256x256 | 1.70 | - | - | - | Coming soon | +| [RTMPose-m (试用)](./rtmpose/face_2d_keypoint/rtmpose-m_8xb64-120e_lapa-256x256.py) | 256x256 | 1.70 | - | - | - | Coming soon | + +### 手部 2d 关键点 (21 Keypoints) -### 手部 2d 关键点 +| Detection Config | Input Size | Model AP
(OneHand10K) | Flops
(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Download | +| :------------------------------------: | :--------: | :---------------------------: | :---------------: | :--------------------------------: | :---------------------------------------: | :-----------------------------: | +| [RTMDet-nano (试用)](./rtmdet/hand/rtmdet_nano_320-8xb32_hand.py) | 320x320 | 76.0 | 0.31 | - | - | [Det Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmdet_nano_8xb32-300e_hand-267f9c8f.pth) | -Coming soon +
+Hand5 + +- `Hand5` and `*` 代表模型在 5 个开源数据集上训练得到: + - [COCO-Wholebody-Hand](https://github.com/jin-s13/COCO-WholeBody/) + - [OneHand10K](https://www.yangangwang.com/papers/WANG-MCC-2018-10.html) + - [FreiHand2d](https://lmb.informatik.uni-freiburg.de/projects/freihand/) + - [RHD2d](https://lmb.informatik.uni-freiburg.de/resources/datasets/RenderedHandposeDataset.en.html) + - [Halpe](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_wholebody_keypoint.html#halpe) + +| Config | Input Size | PCK@0.2
(COCO-Wholebody-Hand) | PCK@0.2
(Hand5) | AUC
(Hand5) | EPE
(Hand5) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Download | +| :--------------------------------------------------------------------------------------------------: | :--------: | :-----------------------------------: | :---------------------: | :-----------------: | :-----------------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------: | +| [RTMPose-m\* (试用)](./rtmpose/hand_2d_keypoint/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 81.5 | 96.4 | 83.9 | 5.06 | 2.581 | - | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth) | + +
### 预训练模型 我们提供了 UDP 预训练的 CSPNeXt 模型参数,训练配置请参考 [pretrain_cspnext_udp folder](./rtmpose/pretrain_cspnext_udp/)。 -#### AIC+COCO +
+AIC+COCO | Model | Input Size | Params(M) | Flops(G) | AP
(GT) | AR
(GT) | Download | | :----------: | :--------: | :-------: | :------: | :-------------: | :-------------: | :-----------------------------------------------------------------------------------------------------------------------------: | @@ -233,7 +258,10 @@ Coming soon | CSPNeXt-m | 256x192 | 17.53 | 3.05 | 74.8 | 77.7 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth) | | CSPNeXt-l | 256x192 | 32.44 | 5.32 | 77.2 | 79.9 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth) | -#### Body8 +
+ +
+Body8 - `*` 代表模型在 7 个开源数据集上训练得到: - [AI Challenger](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#aic) @@ -254,6 +282,8 @@ Coming soon | CSPNeXt-m\* | 384x288 | 17.53 | 6.86 | 75.8 | 97.60 | 70.18 | 14.04 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-body7_210e-384x288-b9bc2b57_20230504.pth) | | CSPNeXt-l\* | 384x288 | 32.44 | 11.96 | 77.2 | 97.89 | 71.23 | 13.05 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-l_udp-body7_210e-384x288-b15bc30d_20230504.pth) | +
+ #### ImageNet 我们提供了 ImageNet 分类训练的 CSPNeXt 模型参数,更多细节请参考 [RTMDet](https://github.com/open-mmlab/mmdetection/blob/latest/configs/rtmdet/README.md#classification)。 diff --git a/projects/rtmpose/rtmdet/hand/rtmdet_nano_320-8xb32_hand.py b/projects/rtmpose/rtmdet/hand/rtmdet_nano_320-8xb32_hand.py new file mode 100644 index 0000000000..278cc0bfe8 --- /dev/null +++ b/projects/rtmpose/rtmdet/hand/rtmdet_nano_320-8xb32_hand.py @@ -0,0 +1,171 @@ +_base_ = 'mmdet::rtmdet/rtmdet_l_8xb32-300e_coco.py' + +input_shape = 320 + +model = dict( + backbone=dict( + deepen_factor=0.33, + widen_factor=0.25, + use_depthwise=True, + ), + neck=dict( + in_channels=[64, 128, 256], + out_channels=64, + num_csp_blocks=1, + use_depthwise=True, + ), + bbox_head=dict( + in_channels=64, + feat_channels=64, + share_conv=False, + exp_on_reg=False, + use_depthwise=True, + num_classes=1), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) + +# file_client_args = dict( +# backend='petrel', +# path_mapping=dict({'data/': 's3://openmmlab/datasets/'})) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='CachedMosaic', + img_scale=(input_shape, input_shape), + pad_val=114.0, + max_cached_images=20, + random_pop=False), + dict( + type='RandomResize', + scale=(input_shape * 2, input_shape * 2), + ratio_range=(0.5, 1.5), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(input_shape, input_shape)), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + dict( + type='Pad', + size=(input_shape, input_shape), + pad_val=dict(img=(114, 114, 114))), + dict(type='PackDetInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='RandomResize', + scale=(input_shape, input_shape), + ratio_range=(0.5, 1.5), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(input_shape, input_shape)), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + dict( + type='Pad', + size=(input_shape, input_shape), + pad_val=dict(img=(114, 114, 114))), + dict(type='PackDetInputs') +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(input_shape, input_shape), keep_ratio=True), + dict( + type='Pad', + size=(input_shape, input_shape), + pad_val=dict(img=(114, 114, 114))), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] + +data_mode = 'topdown' +data_root = 'data/' + +train_dataset = dict( + _delete_=True, + type='ConcatDataset', + datasets=[ + dict( + type='mmpose.OneHand10KDataset', + data_root=data_root, + data_mode=data_mode, + pipeline=train_pipeline, + ann_file='onehand10k/annotations/onehand10k_train.json', + data_prefix=dict(img='pose/OneHand10K/')), + dict( + type='mmpose.FreiHandDataset', + data_root=data_root, + data_mode=data_mode, + pipeline=train_pipeline, + ann_file='freihand/annotations/freihand_train.json', + data_prefix=dict(img='pose/FreiHand/')), + dict( + type='mmpose.Rhd2DDataset', + data_root=data_root, + data_mode=data_mode, + pipeline=train_pipeline, + ann_file='rhd/annotations/rhd_train.json', + data_prefix=dict(img='pose/RHD/')), + dict( + type='mmpose.HalpeHandDataset', + data_root=data_root, + data_mode=data_mode, + pipeline=train_pipeline, + ann_file='halpe/annotations/halpe_train_v1.json', + data_prefix=dict( + img='pose/Halpe/hico_20160224_det/images/train2015/') # noqa + ) + ], + ignore_keys=[ + 'CLASSES', 'dataset_keypoint_weights', 'dataset_name', 'flip_indices', + 'flip_pairs', 'keypoint_colors', 'keypoint_id2name', + 'keypoint_name2id', 'lower_body_ids', 'num_keypoints', + 'num_skeleton_links', 'sigmas', 'skeleton_link_colors', + 'skeleton_links', 'upper_body_ids' + ], +) + +test_dataset = dict( + _delete_=True, + type='mmpose.OneHand10KDataset', + data_root=data_root, + data_mode=data_mode, + pipeline=test_pipeline, + ann_file='onehand10k/annotations/onehand10k_test.json', + data_prefix=dict(img='pose/OneHand10K/'), +) + +train_dataloader = dict(dataset=train_dataset) +val_dataloader = dict(dataset=test_dataset) +test_dataloader = val_dataloader + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='PipelineSwitchHook', + switch_epoch=280, + switch_pipeline=train_pipeline_stage2) +] + +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'onehand10k/annotations/onehand10k_test.json', + metric='bbox', + format_only=False) +test_evaluator = val_evaluator + +train_cfg = dict(val_interval=1)