diff --git a/configs/face_2d_keypoint/rtmpose/README.md b/configs/face_2d_keypoint/rtmpose/README.md
index 5381e966f6..d0c7f55fb4 100644
--- a/configs/face_2d_keypoint/rtmpose/README.md
+++ b/configs/face_2d_keypoint/rtmpose/README.md
@@ -29,4 +29,4 @@ Results on LaPa dataset
| Model | Input Size | NME | Details and Download |
| :-------: | :--------: | :--: | :---------------------------------------: |
-| RTMPose-m | 256x256 | 1.29 | [rtmpose_lapa.md](./wflw/rtmpose_lapa.md) |
+| RTMPose-m | 256x256 | 1.29 | [rtmpose_lapa.md](./lapa/rtmpose_lapa.md) |
diff --git a/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md b/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md
index 62a3f25157..9638de7551 100644
--- a/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md
+++ b/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md
@@ -33,7 +33,7 @@
-Results on COCO-WholeBody-Face val set
+Results on LaPa val set
| Arch | Input Size | NME | ckpt | log |
| :------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :------------------------------------------------------------: |
diff --git a/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py b/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py
new file mode 100644
index 0000000000..689dc68096
--- /dev/null
+++ b/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py
@@ -0,0 +1,381 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# coco-hand onehand10k freihand2d rhd2d halpehand
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 10
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 150 to 300 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(256, 256),
+ sigma=(5.66, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=768,
+ out_channels=21,
+ input_size=codec['input_size'],
+ in_featuremap_size=(8, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyHandDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ # dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.5, 1.5],
+ rotate_factor=180),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ # dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=180),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.2),
+ dict(type='MedianBlur', p=0.2),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# train datasets
+dataset_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[],
+)
+
+dataset_onehand10k = dict(
+ type='OneHand10KDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='onehand10k/annotations/onehand10k_train.json',
+ data_prefix=dict(img='pose/OneHand10K/'),
+ pipeline=[],
+)
+
+dataset_freihand = dict(
+ type='FreiHandDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='freihand/annotations/freihand_train.json',
+ data_prefix=dict(img='pose/FreiHand/'),
+ pipeline=[],
+)
+
+dataset_rhd = dict(
+ type='Rhd2DDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='rhd/annotations/rhd_train.json',
+ data_prefix=dict(img='pose/RHD/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=21,
+ mapping=[
+ (0, 0),
+ (1, 4),
+ (2, 3),
+ (3, 2),
+ (4, 1),
+ (5, 8),
+ (6, 7),
+ (7, 6),
+ (8, 5),
+ (9, 12),
+ (10, 11),
+ (11, 10),
+ (12, 9),
+ (13, 16),
+ (14, 15),
+ (15, 14),
+ (16, 13),
+ (17, 20),
+ (18, 19),
+ (19, 18),
+ (20, 17),
+ ])
+ ],
+)
+
+dataset_halpehand = dict(
+ type='HalpeHandDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='halpe/annotations/halpe_train_v1.json',
+ data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015/'),
+ pipeline=[],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(
+ from_file='configs/_base_/datasets/coco_wholebody_hand.py'),
+ datasets=[
+ dataset_coco, dataset_onehand10k, dataset_freihand, dataset_rhd,
+ dataset_halpehand
+ ],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+
+# test datasets
+val_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ pipeline=[],
+)
+
+val_onehand10k = dict(
+ type='OneHand10KDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='onehand10k/annotations/onehand10k_test.json',
+ data_prefix=dict(img='pose/OneHand10K/'),
+ pipeline=[],
+)
+
+val_freihand = dict(
+ type='FreiHandDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='freihand/annotations/freihand_test.json',
+ data_prefix=dict(img='pose/FreiHand/'),
+ pipeline=[],
+)
+
+val_rhd = dict(
+ type='Rhd2DDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='rhd/annotations/rhd_test.json',
+ data_prefix=dict(img='pose/RHD/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=21,
+ mapping=[
+ (0, 0),
+ (1, 4),
+ (2, 3),
+ (3, 2),
+ (4, 1),
+ (5, 8),
+ (6, 7),
+ (7, 6),
+ (8, 5),
+ (9, 12),
+ (10, 11),
+ (11, 10),
+ (12, 9),
+ (13, 16),
+ (14, 15),
+ (15, 14),
+ (16, 13),
+ (17, 20),
+ (18, 19),
+ (19, 18),
+ (20, 17),
+ ])
+ ],
+)
+
+val_halpehand = dict(
+ type='HalpeHandDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='halpe/annotations/halpe_val_v1.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ pipeline=[],
+)
+
+test_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(
+ from_file='configs/_base_/datasets/coco_wholebody_hand.py'),
+ datasets=[
+ val_coco, val_onehand10k, val_freihand, val_rhd, val_halpehand
+ ],
+ pipeline=val_pipeline,
+ test_mode=True,
+ ))
+
+val_dataloader = test_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE')
+]
+test_evaluator = val_evaluator
diff --git a/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.md b/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.md
new file mode 100644
index 0000000000..361770dad2
--- /dev/null
+++ b/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.md
@@ -0,0 +1,67 @@
+
+
+
+RTMPose (arXiv'2023)
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2303.07399,
+ doi = {10.48550/ARXIV.2303.07399},
+ url = {https://arxiv.org/abs/2303.07399},
+ author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
+ keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
+ title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
+ publisher = {arXiv},
+ year = {2023},
+ copyright = {Creative Commons Attribution 4.0 International}
+}
+
+```
+
+
+
+
+
+
+RTMDet (arXiv'2022)
+
+```bibtex
+@misc{lyu2022rtmdet,
+ title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+ author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+ year={2022},
+ eprint={2212.07784},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+- `Hand5` and `*` denote model trained on 5 public datasets:
+ - [COCO-Wholebody-Hand](https://github.com/jin-s13/COCO-WholeBody/)
+ - [OneHand10K](https://www.yangangwang.com/papers/WANG-MCC-2018-10.html)
+ - [FreiHand2d](https://lmb.informatik.uni-freiburg.de/projects/freihand/)
+ - [RHD2d](https://lmb.informatik.uni-freiburg.de/resources/datasets/RenderedHandposeDataset.en.html)
+ - [Halpe](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_wholebody_keypoint.html#halpe)
+
+| Config | Input Size | PCK@0.2
(COCO-Wholebody-Hand) | PCK@0.2
(Hand5) | AUC
(Hand5) | EPE
(Hand5) | FLOPS(G) | Download |
+| :---------------------------------------: | :--------: | :-----------------------------------: | :---------------------: | :-----------------: | :-----------------: | :------: | :-----------------------------------------: |
+| [RTMPose-m\*
(alpha version)](./rtmpose/hand_2d_keypoint/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 81.5 | 96.4 | 83.9 | 5.06 | 2.581 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth) |
diff --git a/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.yml b/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.yml
new file mode 100644
index 0000000000..a8dfd42e39
--- /dev/null
+++ b/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.yml
@@ -0,0 +1,27 @@
+Collections:
+- Name: RTMPose
+ Paper:
+ Title: "RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose"
+ URL: https://arxiv.org/abs/2303.07399
+ README: https://github.com/open-mmlab/mmpose/blob/main/projects/rtmpose/README.md
+Models:
+- Config: configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: &id001
+ - RTMPose
+ Training Data: &id002
+ - COCO-Wholebody-Hand
+ - OneHand10K
+ - FreiHand2d
+ - RHD2d
+ - Halpe
+ Name: rtmpose-m_8xb256-210e_hand5-256x256
+ Results:
+ - Dataset: Hand5
+ Metrics:
+ PCK@0.2: 0.964
+ AUC: 0.839
+ EPE: 5.06
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth
diff --git a/projects/rtmpose/README.md b/projects/rtmpose/README.md
index de367448ed..cb4e742b7c 100644
--- a/projects/rtmpose/README.md
+++ b/projects/rtmpose/README.md
@@ -157,7 +157,8 @@ Feel free to join our community group for more help:
### Body 2d (17 Keypoints)
-#### AIC+COCO
+
+AIC+COCO
| Config | Input Size | AP
(COCO) | PCK@0.1
(Body8) | AUC
(Body8) | EPE
(Body8) | Params(M) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | ncnn-FP16-Latency(ms)
(Snapdragon 865) | Download |
| :---------------------------------------------------------------------------: | :--------: | :---------------: | :---------------------: | :-----------------: | :-----------------: | :-------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------: |
@@ -168,7 +169,10 @@ Feel free to join our community group for more help:
| [RTMPose-m](./rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py) | 384x288 | 77.0 | 94.32 | 69.85 | 14.64 | 13.72 | 4.33 | 24.78 | 3.66 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-384x288-a62a0b32_20230228.pth) |
| [RTMPose-l](./rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py) | 384x288 | 77.3 | 94.54 | 70.14 | 14.30 | 27.79 | 9.35 | - | 6.05 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-384x288-97d6cb0f_20230228.pth) |
-#### Body8
+
+
+
+Body8
- `*` denotes model trained on 7 public datasets:
- [AI Challenger](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#aic)
@@ -189,6 +193,8 @@ Feel free to join our community group for more help:
| [RTMPose-m\*](./rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py) | 384x288 | 76.6 | 94.64 | 70.38 | 13.98 | 13.72 | 4.33 | 24.78 | 3.66 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth) |
| [RTMPose-l\*](./rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py) | 384x288 | 78.3 | 95.36 | 71.58 | 13.08 | 27.79 | 9.35 | - | 6.05 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-body7_pt-body7_420e-384x288-3f5a1437_20230504.pth) |
+
+
#### Model Pruning
**Notes**
@@ -225,15 +231,34 @@ For more details, please refer to [GroupFisher Pruning for RTMPose](./rtmpose/pr
| :----------------------------------------------------------------------------: | :--------: | :----------------: | :------: | :--------------------------------: | :---------------------------------------: | :---------: |
| [RTMPose-m (alpha version)](./rtmpose/face_2d_keypoint/rtmpose-m_8xb64-120e_lapa-256x256.py) | 256x256 | 1.70 | - | - | - | Coming soon |
-### Hand 2d
+### Hand 2d (21 Keypoints)
+
+| Detection Config | Input Size | Model AP
(OneHand10K) | Flops
(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Download |
+| :------------------------------------: | :--------: | :---------------------------: | :---------------: | :--------------------------------: | :---------------------------------------: | :-----------------------------: |
+| [RTMDet-nano
(alpha version)](./rtmdet/hand/rtmdet_nano_320-8xb32_hand.py) | 320x320 | 76.0 | 0.31 | - | - | [Det Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmdet_nano_8xb32-300e_hand-267f9c8f.pth) |
-Coming soon
+
+Hand5
+
+- `Hand5` and `*` denote model trained on 5 public datasets:
+ - [COCO-Wholebody-Hand](https://github.com/jin-s13/COCO-WholeBody/)
+ - [OneHand10K](https://www.yangangwang.com/papers/WANG-MCC-2018-10.html)
+ - [FreiHand2d](https://lmb.informatik.uni-freiburg.de/projects/freihand/)
+ - [RHD2d](https://lmb.informatik.uni-freiburg.de/resources/datasets/RenderedHandposeDataset.en.html)
+ - [Halpe](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_wholebody_keypoint.html#halpe)
+
+| Config | Input Size | PCK@0.2
(COCO-Wholebody-Hand) | PCK@0.2
(Hand5) | AUC
(Hand5) | EPE
(Hand5) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Download |
+| :-------------------------------------------------------------------------------------------------------------------: | :--------: | :-----------------------------------: | :---------------------: | :-----------------: | :-----------------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------: |
+| [RTMPose-m\*
(alpha version)](./rtmpose/hand_2d_keypoint/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 81.5 | 96.4 | 83.9 | 5.06 | 2.581 | - | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth) |
+
+
### Pretrained Models
We provide the UDP pretraining configs of the CSPNeXt backbone. Find more details in the [pretrain_cspnext_udp folder](./rtmpose/pretrain_cspnext_udp/).
-#### AIC+COCO
+
+AIC+COCO
| Model | Input Size | Params(M) | Flops(G) | AP
(GT) | AR
(GT) | Download |
| :----------: | :--------: | :-------: | :------: | :-------------: | :-------------: | :-----------------------------------------------------------------------------------------------------------------------------: |
@@ -242,7 +267,10 @@ We provide the UDP pretraining configs of the CSPNeXt backbone. Find more detail
| CSPNeXt-m | 256x192 | 17.53 | 3.05 | 74.8 | 77.7 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth) |
| CSPNeXt-l | 256x192 | 32.44 | 5.32 | 77.2 | 79.9 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth) |
-#### Body8
+
+
+
+Body8
- `*` denotes model trained on 7 public datasets:
- [AI Challenger](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#aic)
@@ -263,6 +291,8 @@ We provide the UDP pretraining configs of the CSPNeXt backbone. Find more detail
| CSPNeXt-m\* | 384x288 | 17.53 | 6.86 | 75.8 | 97.60 | 70.18 | 14.04 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-body7_210e-384x288-b9bc2b57_20230504.pth) |
| CSPNeXt-l\* | 384x288 | 32.44 | 11.96 | 77.2 | 97.89 | 71.23 | 13.05 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-l_udp-body7_210e-384x288-b15bc30d_20230504.pth) |
+
+
#### ImageNet
We also provide the ImageNet classification pre-trained weights of the CSPNeXt backbone. Find more details in [RTMDet](https://github.com/open-mmlab/mmdetection/blob/latest/configs/rtmdet/README.md#classification).
diff --git a/projects/rtmpose/README_CN.md b/projects/rtmpose/README_CN.md
index 10b3a4484b..72d8c08b7b 100644
--- a/projects/rtmpose/README_CN.md
+++ b/projects/rtmpose/README_CN.md
@@ -148,7 +148,8 @@ RTMPose 是一个长期优化迭代的项目,致力于业务场景下的高性
### 人体 2d 关键点 (17 Keypoints)
-#### AIC+COCO
+
+AIC+COCO
| Config | Input Size | AP
(COCO) | PCK@0.1
(Body8) | AUC
(Body8) | EPE
(Body8) | Params(M) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | ncnn-FP16-Latency(ms)
(Snapdragon 865) | Download |
| :---------------------------------------------------------------------------: | :--------: | :---------------: | :---------------------: | :-----------------: | :-----------------: | :-------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------: |
@@ -159,7 +160,10 @@ RTMPose 是一个长期优化迭代的项目,致力于业务场景下的高性
| [RTMPose-m](./rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py) | 384x288 | 77.0 | 94.32 | 69.85 | 14.64 | 13.72 | 4.33 | 24.78 | 3.66 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-384x288-a62a0b32_20230228.pth) |
| [RTMPose-l](./rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py) | 384x288 | 77.3 | 94.54 | 70.14 | 14.30 | 27.79 | 9.35 | - | 6.05 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-384x288-97d6cb0f_20230228.pth) |
-#### Body8
+
+
+
+Body8
- `*` 代表模型在 7 个开源数据集上训练得到:
- [AI Challenger](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#aic)
@@ -180,6 +184,8 @@ RTMPose 是一个长期优化迭代的项目,致力于业务场景下的高性
| [RTMPose-m\*](./rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py) | 384x288 | 76.6 | 94.64 | 70.38 | 13.98 | 13.72 | 4.33 | 24.78 | 3.66 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth) |
| [RTMPose-l\*](./rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py) | 384x288 | 78.3 | 95.36 | 71.58 | 13.08 | 27.79 | 9.35 | - | 6.05 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-body7_pt-body7_420e-384x288-3f5a1437_20230504.pth) |
+
+
#### 模型剪枝
**说明**
@@ -214,17 +220,36 @@ RTMPose 是一个长期优化迭代的项目,致力于业务场景下的高性
| Config | Input Size | NME
(LaPa) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Download |
| :----------------------------------------------------------------------------: | :--------: | :----------------: | :------: | :--------------------------------: | :---------------------------------------: | :---------: |
-| [RTMPose-m (alpha version)](./rtmpose/face_2d_keypoint/rtmpose-m_8xb64-120e_lapa-256x256.py) | 256x256 | 1.70 | - | - | - | Coming soon |
+| [RTMPose-m (试用)](./rtmpose/face_2d_keypoint/rtmpose-m_8xb64-120e_lapa-256x256.py) | 256x256 | 1.70 | - | - | - | Coming soon |
+
+### 手部 2d 关键点 (21 Keypoints)
-### 手部 2d 关键点
+| Detection Config | Input Size | Model AP
(OneHand10K) | Flops
(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Download |
+| :------------------------------------: | :--------: | :---------------------------: | :---------------: | :--------------------------------: | :---------------------------------------: | :-----------------------------: |
+| [RTMDet-nano (试用)](./rtmdet/hand/rtmdet_nano_320-8xb32_hand.py) | 320x320 | 76.0 | 0.31 | - | - | [Det Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmdet_nano_8xb32-300e_hand-267f9c8f.pth) |
-Coming soon
+
+Hand5
+
+- `Hand5` and `*` 代表模型在 5 个开源数据集上训练得到:
+ - [COCO-Wholebody-Hand](https://github.com/jin-s13/COCO-WholeBody/)
+ - [OneHand10K](https://www.yangangwang.com/papers/WANG-MCC-2018-10.html)
+ - [FreiHand2d](https://lmb.informatik.uni-freiburg.de/projects/freihand/)
+ - [RHD2d](https://lmb.informatik.uni-freiburg.de/resources/datasets/RenderedHandposeDataset.en.html)
+ - [Halpe](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_wholebody_keypoint.html#halpe)
+
+| Config | Input Size | PCK@0.2
(COCO-Wholebody-Hand) | PCK@0.2
(Hand5) | AUC
(Hand5) | EPE
(Hand5) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Download |
+| :--------------------------------------------------------------------------------------------------: | :--------: | :-----------------------------------: | :---------------------: | :-----------------: | :-----------------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------: |
+| [RTMPose-m\* (试用)](./rtmpose/hand_2d_keypoint/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 81.5 | 96.4 | 83.9 | 5.06 | 2.581 | - | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth) |
+
+
### 预训练模型
我们提供了 UDP 预训练的 CSPNeXt 模型参数,训练配置请参考 [pretrain_cspnext_udp folder](./rtmpose/pretrain_cspnext_udp/)。
-#### AIC+COCO
+
+AIC+COCO
| Model | Input Size | Params(M) | Flops(G) | AP
(GT) | AR
(GT) | Download |
| :----------: | :--------: | :-------: | :------: | :-------------: | :-------------: | :-----------------------------------------------------------------------------------------------------------------------------: |
@@ -233,7 +258,10 @@ Coming soon
| CSPNeXt-m | 256x192 | 17.53 | 3.05 | 74.8 | 77.7 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth) |
| CSPNeXt-l | 256x192 | 32.44 | 5.32 | 77.2 | 79.9 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth) |
-#### Body8
+
+
+
+Body8
- `*` 代表模型在 7 个开源数据集上训练得到:
- [AI Challenger](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#aic)
@@ -254,6 +282,8 @@ Coming soon
| CSPNeXt-m\* | 384x288 | 17.53 | 6.86 | 75.8 | 97.60 | 70.18 | 14.04 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-body7_210e-384x288-b9bc2b57_20230504.pth) |
| CSPNeXt-l\* | 384x288 | 32.44 | 11.96 | 77.2 | 97.89 | 71.23 | 13.05 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-l_udp-body7_210e-384x288-b15bc30d_20230504.pth) |
+
+
#### ImageNet
我们提供了 ImageNet 分类训练的 CSPNeXt 模型参数,更多细节请参考 [RTMDet](https://github.com/open-mmlab/mmdetection/blob/latest/configs/rtmdet/README.md#classification)。
diff --git a/projects/rtmpose/rtmdet/hand/rtmdet_nano_320-8xb32_hand.py b/projects/rtmpose/rtmdet/hand/rtmdet_nano_320-8xb32_hand.py
new file mode 100644
index 0000000000..278cc0bfe8
--- /dev/null
+++ b/projects/rtmpose/rtmdet/hand/rtmdet_nano_320-8xb32_hand.py
@@ -0,0 +1,171 @@
+_base_ = 'mmdet::rtmdet/rtmdet_l_8xb32-300e_coco.py'
+
+input_shape = 320
+
+model = dict(
+ backbone=dict(
+ deepen_factor=0.33,
+ widen_factor=0.25,
+ use_depthwise=True,
+ ),
+ neck=dict(
+ in_channels=[64, 128, 256],
+ out_channels=64,
+ num_csp_blocks=1,
+ use_depthwise=True,
+ ),
+ bbox_head=dict(
+ in_channels=64,
+ feat_channels=64,
+ share_conv=False,
+ exp_on_reg=False,
+ use_depthwise=True,
+ num_classes=1),
+ test_cfg=dict(
+ nms_pre=1000,
+ min_bbox_size=0,
+ score_thr=0.05,
+ nms=dict(type='nms', iou_threshold=0.6),
+ max_per_img=100))
+
+# file_client_args = dict(
+# backend='petrel',
+# path_mapping=dict({'data/': 's3://openmmlab/datasets/'}))
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ type='CachedMosaic',
+ img_scale=(input_shape, input_shape),
+ pad_val=114.0,
+ max_cached_images=20,
+ random_pop=False),
+ dict(
+ type='RandomResize',
+ scale=(input_shape * 2, input_shape * 2),
+ ratio_range=(0.5, 1.5),
+ keep_ratio=True),
+ dict(type='RandomCrop', crop_size=(input_shape, input_shape)),
+ dict(type='YOLOXHSVRandomAug'),
+ dict(type='RandomFlip', prob=0.5),
+ dict(
+ type='Pad',
+ size=(input_shape, input_shape),
+ pad_val=dict(img=(114, 114, 114))),
+ dict(type='PackDetInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ type='RandomResize',
+ scale=(input_shape, input_shape),
+ ratio_range=(0.5, 1.5),
+ keep_ratio=True),
+ dict(type='RandomCrop', crop_size=(input_shape, input_shape)),
+ dict(type='YOLOXHSVRandomAug'),
+ dict(type='RandomFlip', prob=0.5),
+ dict(
+ type='Pad',
+ size=(input_shape, input_shape),
+ pad_val=dict(img=(114, 114, 114))),
+ dict(type='PackDetInputs')
+]
+
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='Resize', scale=(input_shape, input_shape), keep_ratio=True),
+ dict(
+ type='Pad',
+ size=(input_shape, input_shape),
+ pad_val=dict(img=(114, 114, 114))),
+ dict(
+ type='PackDetInputs',
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+ 'scale_factor'))
+]
+
+data_mode = 'topdown'
+data_root = 'data/'
+
+train_dataset = dict(
+ _delete_=True,
+ type='ConcatDataset',
+ datasets=[
+ dict(
+ type='mmpose.OneHand10KDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ pipeline=train_pipeline,
+ ann_file='onehand10k/annotations/onehand10k_train.json',
+ data_prefix=dict(img='pose/OneHand10K/')),
+ dict(
+ type='mmpose.FreiHandDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ pipeline=train_pipeline,
+ ann_file='freihand/annotations/freihand_train.json',
+ data_prefix=dict(img='pose/FreiHand/')),
+ dict(
+ type='mmpose.Rhd2DDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ pipeline=train_pipeline,
+ ann_file='rhd/annotations/rhd_train.json',
+ data_prefix=dict(img='pose/RHD/')),
+ dict(
+ type='mmpose.HalpeHandDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ pipeline=train_pipeline,
+ ann_file='halpe/annotations/halpe_train_v1.json',
+ data_prefix=dict(
+ img='pose/Halpe/hico_20160224_det/images/train2015/') # noqa
+ )
+ ],
+ ignore_keys=[
+ 'CLASSES', 'dataset_keypoint_weights', 'dataset_name', 'flip_indices',
+ 'flip_pairs', 'keypoint_colors', 'keypoint_id2name',
+ 'keypoint_name2id', 'lower_body_ids', 'num_keypoints',
+ 'num_skeleton_links', 'sigmas', 'skeleton_link_colors',
+ 'skeleton_links', 'upper_body_ids'
+ ],
+)
+
+test_dataset = dict(
+ _delete_=True,
+ type='mmpose.OneHand10KDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ pipeline=test_pipeline,
+ ann_file='onehand10k/annotations/onehand10k_test.json',
+ data_prefix=dict(img='pose/OneHand10K/'),
+)
+
+train_dataloader = dict(dataset=train_dataset)
+val_dataloader = dict(dataset=test_dataset)
+test_dataloader = val_dataloader
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='PipelineSwitchHook',
+ switch_epoch=280,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'onehand10k/annotations/onehand10k_test.json',
+ metric='bbox',
+ format_only=False)
+test_evaluator = val_evaluator
+
+train_cfg = dict(val_interval=1)