From 0d835d8dbcf7af8287f3a3afb546bf5725c3744d Mon Sep 17 00:00:00 2001 From: Qikai Li <87690686+liqikai9@users.noreply.github.com> Date: Mon, 6 Dec 2021 02:38:39 -0600 Subject: [PATCH] [Doc] Add related docs for PoseWarper (#1036) * add related docs for PoseWarper * add related readme docs for posewarper * modify related args in posewarper stage2 config * modify posewarper stage2 config path --- .dev_scripts/github/update_model_index.py | 1 + configs/body/2d_kpt_sview_rgb_vid/README.md | 9 + .../2d_kpt_sview_rgb_vid/posewarper/README.md | 25 +++ .../hrnet_posetrack18_posewarper.md | 86 ++++++++ .../hrnet_posetrack18_posewarper.yml | 48 ++++ ...8_posetrack18_384x288_posewarper_stage1.py | 174 +++++++++++++++ ...8_posetrack18_384x288_posewarper_stage2.py | 205 ++++++++++++++++++ docs/papers/algorithms/posewarper.md | 17 ++ model-index.yml | 1 + 9 files changed, 566 insertions(+) create mode 100644 configs/body/2d_kpt_sview_rgb_vid/README.md create mode 100644 configs/body/2d_kpt_sview_rgb_vid/posewarper/README.md create mode 100644 configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.md create mode 100644 configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.yml create mode 100644 configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage1.py create mode 100644 configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py create mode 100644 docs/papers/algorithms/posewarper.md diff --git a/.dev_scripts/github/update_model_index.py b/.dev_scripts/github/update_model_index.py index 37f9e1b443..1b1498e039 100755 --- a/.dev_scripts/github/update_model_index.py +++ b/.dev_scripts/github/update_model_index.py @@ -121,6 +121,7 @@ def parse_config_path(path): # convert task name to readable version task2readable = { '2d_kpt_sview_rgb_img': '2D Keypoint', + '2d_kpt_sview_rgb_vid': '2D Keypoint', '3d_kpt_sview_rgb_img': '3D Keypoint', '3d_kpt_sview_rgb_vid': '3D Keypoint', '3d_mesh_sview_rgb_img': '3D Mesh', diff --git a/configs/body/2d_kpt_sview_rgb_vid/README.md b/configs/body/2d_kpt_sview_rgb_vid/README.md new file mode 100644 index 0000000000..614c6d9f89 --- /dev/null +++ b/configs/body/2d_kpt_sview_rgb_vid/README.md @@ -0,0 +1,9 @@ +# Video-based Single-view 2D Human Body Pose Estimation + +Multi-person 2D human pose estimation in video is defined as the task of detecting the poses (or keypoints) of all people from an input video. + +For this task, we currently support [PoseWarper](/configs/body/2d_kpt_sview_rgb_vid/posewarper). + +## Data preparation + +Please follow [DATA Preparation](/docs/tasks/2d_body_keypoint.md) to prepare data. diff --git a/configs/body/2d_kpt_sview_rgb_vid/posewarper/README.md b/configs/body/2d_kpt_sview_rgb_vid/posewarper/README.md new file mode 100644 index 0000000000..425d116704 --- /dev/null +++ b/configs/body/2d_kpt_sview_rgb_vid/posewarper/README.md @@ -0,0 +1,25 @@ +# Learning Temporal Pose Estimation from Sparsely-Labeled Videos + + + +
+PoseWarper (NeurIPS'2019) + +```bibtex +@inproceedings{NIPS2019_gberta, +title = {Learning Temporal Pose Estimation from Sparsely Labeled Videos}, +author = {Bertasius, Gedas and Feichtenhofer, Christoph, and Tran, Du and Shi, Jianbo, and Torresani, Lorenzo}, +booktitle = {Advances in Neural Information Processing Systems 33}, +year = {2019}, +} +``` + +
+ +PoseWarper proposes a network that leverages training videos with sparse annotations (every k frames) to learn to perform dense temporal pose propagation and estimation. Given a pair of video frames, a labeled Frame A and an unlabeled Frame B, the model is trained to predict human pose in Frame A using the features from Frame B by means of deformable convolutions to implicitly learn the pose warping between A and B. + +The training of PoseWarper can be split into two stages. + +The first-stage is trained with the pre-trained model and the main backbone is fine-tuned in a single-frame setting. + +The second-stage is trained with the model from the first stage, and the warping offsets are learned in a multi-frame setting while the backbone is frozen. diff --git a/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.md b/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.md new file mode 100644 index 0000000000..9c9f01abb8 --- /dev/null +++ b/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.md @@ -0,0 +1,86 @@ + + + +
+PoseWarper (NeurIPS'2019) + +```bibtex +@inproceedings{NIPS2019_gberta, +title = {Learning Temporal Pose Estimation from Sparsely Labeled Videos}, +author = {Bertasius, Gedas and Feichtenhofer, Christoph, and Tran, Du and Shi, Jianbo, and Torresani, Lorenzo}, +booktitle = {Advances in Neural Information Processing Systems 33}, +year = {2019}, +} +``` + +
+ + + +
+HRNet (CVPR'2019) + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +
+ + + +
+PoseTrack18 (CVPR'2018) + +```bibtex +@inproceedings{andriluka2018posetrack, + title={Posetrack: A benchmark for human pose estimation and tracking}, + author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt}, + booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, + pages={5167--5176}, + year={2018} +} +``` + +
+ + + +
+COCO (ECCV'2014) + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +
+ +Note that the training of PoseWarper can be split into two stages. + +The first-stage is trained with the pre-trained [checkpoint](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288-314c8528_20200708.pth) on COCO dataset, and the main backbone is fine-tuned on PoseTrack18 in a single-frame setting. + +The second-stage is trained with the last [checkpoint](https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage1-08b632aa_20211130.pth) from the first stage, and the warping offsets are learned in a multi-frame setting while the backbone is frozen. + +Results on PoseTrack2018 val with ground-truth bounding boxes + +| Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log | +| :--- | :--------: | :------: |:------: |:------: |:------: |:------: |:------: | :------: | :------: |:------: |:------: | +| [pose_hrnet_w48](/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py) | 384x288 | 88.2 | 90.3 | 86.1 | 81.6 | 81.8 | 83.8 | 81.5 | 85.0 | [ckpt](https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2-4abf88db_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2_20211130.log.json) | + +Results on PoseTrack2018 val with precomputed human bounding boxes from PoseWarper supplementary data files from [this link](https://www.dropbox.com/s/ygfy6r8nitoggfq/PoseWarper_supp_files.zip?dl=0). + +| Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log | +| :--- | :--------: | :------: |:------: |:------: |:------: |:------: |:------: | :------: | :------: |:------: |:------: | +| [pose_hrnet_w48](/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py) | 384x288 | 81.8 | 85.6 | 82.7 | 77.2 | 76.8 | 79.0 | 74.4 | 79.8 | [ckpt](https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2-4abf88db_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2_20211130.log.json) | diff --git a/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.yml b/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.yml new file mode 100644 index 0000000000..257945423c --- /dev/null +++ b/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.yml @@ -0,0 +1,48 @@ +Collections: +- Name: PoseWarper + Paper: + Title: Learning Temporal Pose Estimation from Sparsely Labeled Videos + URL: https://arxiv.org/abs/1906.04016 +Models: +- Config: configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py + In Collection: PoseWarper + Metadata: + Architecture: &id001 + - PoseWarper + - HRNet + Training Data: COCO + Name: posewarper_hrnet_w48_posetrack18_384x288_posewarper_stage2 + README: configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.md + Results: + - Dataset: COCO + Metrics: + Ankl: 81.5 + Elb: 86.1 + Head: 88.2 + Hip: 81.8 + Knee: 83.8 + Shou: 90.3 + Total: 85.0 + Wri: 81.6 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2-4abf88db_20211130.pth +- Config: configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py + In Collection: PoseWarper + Metadata: + Architecture: *id001 + Training Data: COCO + Name: posewarper_hrnet_w48_posetrack18_384x288_posewarper_stage2 + README: configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.md + Results: + - Dataset: COCO + Metrics: + Ankl: 74.4 + Elb: 82.7 + Head: 81.8 + Hip: 76.8 + Knee: 79.0 + Shou: 85.6 + Total: 79.8 + Wri: 77.2 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2-4abf88db_20211130.pth diff --git a/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage1.py b/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage1.py new file mode 100644 index 0000000000..3216dc1a87 --- /dev/null +++ b/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage1.py @@ -0,0 +1,174 @@ +_base_ = ['../../../../_base_/datasets/posetrack18.py'] +log_level = 'INFO' +load_from = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288-314c8528_20200708.pth' # noqa: E501 +resume_from = None +dist_params = dict(backend='nccl') +cudnn_benchmark = True +workflow = [('train', 1)] +checkpoint_config = dict(interval=1) +evaluation = dict(interval=1, metric='mAP', save_best='Total AP') + +optimizer = dict( + type='Adam', + lr=0.0001, +) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy='step', step=[5, 7]) +total_epochs = 10 +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) + +channel_cfg = dict( + num_output_channels=17, + dataset_joints=17, + dataset_channel=[ + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + ], + inference_channel=[ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + ]) + +# model settings +model = dict( + type='TopDown', + pretrained=None, + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + ), + keypoint_head=dict( + type='TopdownHeatmapSimpleHead', + in_channels=48, + out_channels=channel_cfg['num_output_channels'], + num_deconv_layers=0, + extra=dict(final_conv_kernel=1, ), + loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)), + train_cfg=dict(), + test_cfg=dict( + flip_test=True, + post_process='default', + shift_heatmap=True, + modulate_kernel=11)) + +data_cfg = dict( + image_size=[288, 384], + heatmap_size=[72, 96], + num_output_channels=channel_cfg['num_output_channels'], + num_joints=channel_cfg['dataset_joints'], + dataset_channel=channel_cfg['dataset_channel'], + inference_channel=channel_cfg['inference_channel'], + soft_nms=False, + nms_thr=1.0, + oks_thr=0.9, + vis_thr=0.2, + use_gt_bbox=True, + det_bbox_thr=0.2, + bbox_file='data/posetrack18/annotations/' + 'posetrack18_val_human_detections.json', +) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='TopDownHalfBodyTransform', + num_joints_half_body=8, + prob_half_body=0.3), + dict( + type='TopDownGetRandomScaleRotation', rot_factor=45, + scale_factor=0.35), + dict(type='TopDownRandomFlip', flip_prob=0.5), + dict(type='TopDownAffine'), + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict(type='TopDownGenerateTarget', sigma=3), + dict( + type='Collect', + keys=['img', 'target', 'target_weight'], + meta_keys=[ + 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', + 'rotation', 'bbox_score', 'flip_pairs' + ]), +] + +val_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownAffine'), + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict( + type='Collect', + keys=[ + 'img', + ], + meta_keys=[ + 'image_file', 'center', 'scale', 'rotation', 'bbox_score', + 'flip_pairs' + ]), +] + +test_pipeline = val_pipeline + +data_root = 'data/posetrack18' +data = dict( + samples_per_gpu=16, + workers_per_gpu=3, + val_dataloader=dict(samples_per_gpu=16), + test_dataloader=dict(samples_per_gpu=16), + train=dict( + type='TopDownPoseTrack18Dataset', + ann_file=f'{data_root}/annotations/posetrack18_train.json', + img_prefix=f'{data_root}/', + data_cfg=data_cfg, + pipeline=train_pipeline, + dataset_info={{_base_.dataset_info}}), + val=dict( + type='TopDownPoseTrack18Dataset', + ann_file=f'{data_root}/annotations/posetrack18_val.json', + img_prefix=f'{data_root}/', + data_cfg=data_cfg, + pipeline=val_pipeline, + dataset_info={{_base_.dataset_info}}), + test=dict( + type='TopDownPoseTrack18Dataset', + ann_file=f'{data_root}/annotations/posetrack18_val.json', + img_prefix=f'{data_root}/', + data_cfg=data_cfg, + pipeline=test_pipeline, + dataset_info={{_base_.dataset_info}}), +) diff --git a/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py b/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py new file mode 100644 index 0000000000..c19ccc7530 --- /dev/null +++ b/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py @@ -0,0 +1,205 @@ +_base_ = ['../../../../_base_/datasets/posetrack18.py'] +log_level = 'INFO' +load_from = 'https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage1-08b632aa_20211130.pth' # noqa: E501 +resume_from = None +dist_params = dict(backend='nccl') +cudnn_benchmark = True +workflow = [('train', 1)] +checkpoint_config = dict(interval=1) +evaluation = dict(interval=1, metric='mAP', save_best='Total AP') + +optimizer = dict( + type='Adam', + lr=0.0001, +) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy='step', step=[10, 15]) +total_epochs = 20 +log_config = dict( + interval=100, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) + +channel_cfg = dict( + num_output_channels=17, + dataset_joints=17, + dataset_channel=[ + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + ], + inference_channel=[ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + ]) + +# model settings +model = dict( + type='PoseWarper', + pretrained=None, + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + frozen_stages=4, + ), + concat_tensors=True, + neck=dict( + type='PoseWarperNeck', + in_channels=48, + out_channels=channel_cfg['num_output_channels'], + inner_channels=128, + deform_groups=channel_cfg['num_output_channels'], + dilations=(3, 6, 12, 18, 24), + trans_conv_kernel=1, + res_blocks_cfg=dict(block='BASIC', num_blocks=20), + offsets_kernel=3, + deform_conv_kernel=3, + freeze_trans_layer=True, + im2col_step=80), + keypoint_head=dict( + type='TopdownHeatmapSimpleHead', + in_channels=channel_cfg['num_output_channels'], + out_channels=channel_cfg['num_output_channels'], + num_deconv_layers=0, + extra=dict(final_conv_kernel=0, ), + loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)), + train_cfg=dict(), + test_cfg=dict( + flip_test=False, + post_process='default', + shift_heatmap=True, + modulate_kernel=11)) + +data_cfg = dict( + image_size=[288, 384], + heatmap_size=[72, 96], + num_output_channels=channel_cfg['num_output_channels'], + num_joints=channel_cfg['dataset_joints'], + dataset_channel=channel_cfg['dataset_channel'], + inference_channel=channel_cfg['inference_channel'], + use_nms=True, + soft_nms=False, + nms_thr=1.0, + oks_thr=0.9, + vis_thr=0.2, + use_gt_bbox=False, + det_bbox_thr=0.2, + bbox_file='data/posetrack18/posetrack18_precomputed_boxes/' + 'val_boxes.json', + # frame_indices_train=[-1, 0], + frame_index_rand=True, + frame_index_range=[-2, 2], + num_adj_frames=1, + frame_indices_test=[-2, -1, 0, 1, 2], + # the first weight is the current frame, + # then on ascending order of frame indices + frame_weight_train=(0.0, 1.0), + frame_weight_test=(0.3, 0.1, 0.25, 0.25, 0.1), +) + +# take care of orders of the transforms +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='TopDownHalfBodyTransform', + num_joints_half_body=8, + prob_half_body=0.3), + dict( + type='TopDownGetRandomScaleRotation', rot_factor=45, + scale_factor=0.35), + dict(type='TopDownRandomFlip', flip_prob=0.5), + dict(type='TopDownAffine'), + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict(type='TopDownGenerateTarget', sigma=3), + dict( + type='Collect', + keys=['img', 'target', 'target_weight'], + meta_keys=[ + 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', + 'rotation', 'bbox_score', 'flip_pairs', 'frame_weight' + ]), +] + +val_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownAffine'), + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict( + type='Collect', + keys=[ + 'img', + ], + meta_keys=[ + 'image_file', + 'center', + 'scale', + 'rotation', + 'bbox_score', + 'flip_pairs', + 'frame_weight', + ]), +] + +test_pipeline = val_pipeline + +data_root = 'data/posetrack18' +data = dict( + samples_per_gpu=8, + workers_per_gpu=2, + val_dataloader=dict(samples_per_gpu=4), + test_dataloader=dict(samples_per_gpu=4), + train=dict( + type='TopDownPoseTrack18VideoDataset', + ann_file=f'{data_root}/annotations/posetrack18_train.json', + img_prefix=f'{data_root}/', + data_cfg=data_cfg, + pipeline=train_pipeline, + dataset_info={{_base_.dataset_info}}), + val=dict( + type='TopDownPoseTrack18VideoDataset', + ann_file=f'{data_root}/annotations/posetrack18_val.json', + img_prefix=f'{data_root}/', + data_cfg=data_cfg, + pipeline=val_pipeline, + dataset_info={{_base_.dataset_info}}), + test=dict( + type='TopDownPoseTrack18VideoDataset', + ann_file=f'{data_root}/annotations/posetrack18_val.json', + img_prefix=f'{data_root}/', + data_cfg=data_cfg, + pipeline=test_pipeline, + dataset_info={{_base_.dataset_info}}), +) diff --git a/docs/papers/algorithms/posewarper.md b/docs/papers/algorithms/posewarper.md new file mode 100644 index 0000000000..0ec8441f16 --- /dev/null +++ b/docs/papers/algorithms/posewarper.md @@ -0,0 +1,17 @@ +# Learning Temporal Pose Estimation from Sparsely-Labeled Videos + + + +
+PoseWarper (NeurIPS'2019) + +```bibtex +@inproceedings{NIPS2019_gberta, +title = {Learning Temporal Pose Estimation from Sparsely Labeled Videos}, +author = {Bertasius, Gedas and Feichtenhofer, Christoph, and Tran, Du and Shi, Jianbo, and Torresani, Lorenzo}, +booktitle = {Advances in Neural Information Processing Systems 33}, +year = {2019}, +} +``` + +
diff --git a/model-index.yml b/model-index.yml index f6b26e9c1d..e928776228 100644 --- a/model-index.yml +++ b/model-index.yml @@ -75,6 +75,7 @@ Import: - configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/resnet_ochuman.yml - configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_posetrack18.yml - configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/resnet_posetrack18.yml +- configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.yml - configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.yml - configs/body/3d_kpt_sview_rgb_img/pose_lift/mpi_inf_3dhp/simplebaseline3d_mpi-inf-3dhp.yml - configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m.yml