From 0d835d8dbcf7af8287f3a3afb546bf5725c3744d Mon Sep 17 00:00:00 2001
From: Qikai Li <87690686+liqikai9@users.noreply.github.com>
Date: Mon, 6 Dec 2021 02:38:39 -0600
Subject: [PATCH] [Doc] Add related docs for PoseWarper (#1036)
* add related docs for PoseWarper
* add related readme docs for posewarper
* modify related args in posewarper stage2 config
* modify posewarper stage2 config path
---
.dev_scripts/github/update_model_index.py | 1 +
configs/body/2d_kpt_sview_rgb_vid/README.md | 9 +
.../2d_kpt_sview_rgb_vid/posewarper/README.md | 25 +++
.../hrnet_posetrack18_posewarper.md | 86 ++++++++
.../hrnet_posetrack18_posewarper.yml | 48 ++++
...8_posetrack18_384x288_posewarper_stage1.py | 174 +++++++++++++++
...8_posetrack18_384x288_posewarper_stage2.py | 205 ++++++++++++++++++
docs/papers/algorithms/posewarper.md | 17 ++
model-index.yml | 1 +
9 files changed, 566 insertions(+)
create mode 100644 configs/body/2d_kpt_sview_rgb_vid/README.md
create mode 100644 configs/body/2d_kpt_sview_rgb_vid/posewarper/README.md
create mode 100644 configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.md
create mode 100644 configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.yml
create mode 100644 configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage1.py
create mode 100644 configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py
create mode 100644 docs/papers/algorithms/posewarper.md
diff --git a/.dev_scripts/github/update_model_index.py b/.dev_scripts/github/update_model_index.py
index 37f9e1b443..1b1498e039 100755
--- a/.dev_scripts/github/update_model_index.py
+++ b/.dev_scripts/github/update_model_index.py
@@ -121,6 +121,7 @@ def parse_config_path(path):
# convert task name to readable version
task2readable = {
'2d_kpt_sview_rgb_img': '2D Keypoint',
+ '2d_kpt_sview_rgb_vid': '2D Keypoint',
'3d_kpt_sview_rgb_img': '3D Keypoint',
'3d_kpt_sview_rgb_vid': '3D Keypoint',
'3d_mesh_sview_rgb_img': '3D Mesh',
diff --git a/configs/body/2d_kpt_sview_rgb_vid/README.md b/configs/body/2d_kpt_sview_rgb_vid/README.md
new file mode 100644
index 0000000000..614c6d9f89
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_vid/README.md
@@ -0,0 +1,9 @@
+# Video-based Single-view 2D Human Body Pose Estimation
+
+Multi-person 2D human pose estimation in video is defined as the task of detecting the poses (or keypoints) of all people from an input video.
+
+For this task, we currently support [PoseWarper](/configs/body/2d_kpt_sview_rgb_vid/posewarper).
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/tasks/2d_body_keypoint.md) to prepare data.
diff --git a/configs/body/2d_kpt_sview_rgb_vid/posewarper/README.md b/configs/body/2d_kpt_sview_rgb_vid/posewarper/README.md
new file mode 100644
index 0000000000..425d116704
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_vid/posewarper/README.md
@@ -0,0 +1,25 @@
+# Learning Temporal Pose Estimation from Sparsely-Labeled Videos
+
+
+
+
+PoseWarper (NeurIPS'2019)
+
+```bibtex
+@inproceedings{NIPS2019_gberta,
+title = {Learning Temporal Pose Estimation from Sparsely Labeled Videos},
+author = {Bertasius, Gedas and Feichtenhofer, Christoph, and Tran, Du and Shi, Jianbo, and Torresani, Lorenzo},
+booktitle = {Advances in Neural Information Processing Systems 33},
+year = {2019},
+}
+```
+
+
+
+PoseWarper proposes a network that leverages training videos with sparse annotations (every k frames) to learn to perform dense temporal pose propagation and estimation. Given a pair of video frames, a labeled Frame A and an unlabeled Frame B, the model is trained to predict human pose in Frame A using the features from Frame B by means of deformable convolutions to implicitly learn the pose warping between A and B.
+
+The training of PoseWarper can be split into two stages.
+
+The first-stage is trained with the pre-trained model and the main backbone is fine-tuned in a single-frame setting.
+
+The second-stage is trained with the model from the first stage, and the warping offsets are learned in a multi-frame setting while the backbone is frozen.
diff --git a/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.md b/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.md
new file mode 100644
index 0000000000..9c9f01abb8
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.md
@@ -0,0 +1,86 @@
+
+
+
+
+PoseWarper (NeurIPS'2019)
+
+```bibtex
+@inproceedings{NIPS2019_gberta,
+title = {Learning Temporal Pose Estimation from Sparsely Labeled Videos},
+author = {Bertasius, Gedas and Feichtenhofer, Christoph, and Tran, Du and Shi, Jianbo, and Torresani, Lorenzo},
+booktitle = {Advances in Neural Information Processing Systems 33},
+year = {2019},
+}
+```
+
+
+
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+PoseTrack18 (CVPR'2018)
+
+```bibtex
+@inproceedings{andriluka2018posetrack,
+ title={Posetrack: A benchmark for human pose estimation and tracking},
+ author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={5167--5176},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Note that the training of PoseWarper can be split into two stages.
+
+The first-stage is trained with the pre-trained [checkpoint](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288-314c8528_20200708.pth) on COCO dataset, and the main backbone is fine-tuned on PoseTrack18 in a single-frame setting.
+
+The second-stage is trained with the last [checkpoint](https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage1-08b632aa_20211130.pth) from the first stage, and the warping offsets are learned in a multi-frame setting while the backbone is frozen.
+
+Results on PoseTrack2018 val with ground-truth bounding boxes
+
+| Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log |
+| :--- | :--------: | :------: |:------: |:------: |:------: |:------: |:------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w48](/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py) | 384x288 | 88.2 | 90.3 | 86.1 | 81.6 | 81.8 | 83.8 | 81.5 | 85.0 | [ckpt](https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2-4abf88db_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2_20211130.log.json) |
+
+Results on PoseTrack2018 val with precomputed human bounding boxes from PoseWarper supplementary data files from [this link](https://www.dropbox.com/s/ygfy6r8nitoggfq/PoseWarper_supp_files.zip?dl=0).
+
+| Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log |
+| :--- | :--------: | :------: |:------: |:------: |:------: |:------: |:------: | :------: | :------: |:------: |:------: |
+| [pose_hrnet_w48](/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py) | 384x288 | 81.8 | 85.6 | 82.7 | 77.2 | 76.8 | 79.0 | 74.4 | 79.8 | [ckpt](https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2-4abf88db_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2_20211130.log.json) |
diff --git a/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.yml b/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.yml
new file mode 100644
index 0000000000..257945423c
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.yml
@@ -0,0 +1,48 @@
+Collections:
+- Name: PoseWarper
+ Paper:
+ Title: Learning Temporal Pose Estimation from Sparsely Labeled Videos
+ URL: https://arxiv.org/abs/1906.04016
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py
+ In Collection: PoseWarper
+ Metadata:
+ Architecture: &id001
+ - PoseWarper
+ - HRNet
+ Training Data: COCO
+ Name: posewarper_hrnet_w48_posetrack18_384x288_posewarper_stage2
+ README: configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.md
+ Results:
+ - Dataset: COCO
+ Metrics:
+ Ankl: 81.5
+ Elb: 86.1
+ Head: 88.2
+ Hip: 81.8
+ Knee: 83.8
+ Shou: 90.3
+ Total: 85.0
+ Wri: 81.6
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2-4abf88db_20211130.pth
+- Config: configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py
+ In Collection: PoseWarper
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: posewarper_hrnet_w48_posetrack18_384x288_posewarper_stage2
+ README: configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.md
+ Results:
+ - Dataset: COCO
+ Metrics:
+ Ankl: 74.4
+ Elb: 82.7
+ Head: 81.8
+ Hip: 76.8
+ Knee: 79.0
+ Shou: 85.6
+ Total: 79.8
+ Wri: 77.2
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage2-4abf88db_20211130.pth
diff --git a/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage1.py b/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage1.py
new file mode 100644
index 0000000000..3216dc1a87
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage1.py
@@ -0,0 +1,174 @@
+_base_ = ['../../../../_base_/datasets/posetrack18.py']
+log_level = 'INFO'
+load_from = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_384x288-314c8528_20200708.pth' # noqa: E501
+resume_from = None
+dist_params = dict(backend='nccl')
+cudnn_benchmark = True
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric='mAP', save_best='Total AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0001,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[5, 7])
+total_epochs = 10
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='TopDown',
+ pretrained=None,
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=1, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=True,
+ det_bbox_thr=0.2,
+ bbox_file='data/posetrack18/annotations/'
+ 'posetrack18_val_human_detections.json',
+)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=45,
+ scale_factor=0.35),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+ 'flip_pairs'
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/posetrack18'
+data = dict(
+ samples_per_gpu=16,
+ workers_per_gpu=3,
+ val_dataloader=dict(samples_per_gpu=16),
+ test_dataloader=dict(samples_per_gpu=16),
+ train=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownPoseTrack18Dataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py b/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py
new file mode 100644
index 0000000000..c19ccc7530
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_w48_posetrack18_384x288_posewarper_stage2.py
@@ -0,0 +1,205 @@
+_base_ = ['../../../../_base_/datasets/posetrack18.py']
+log_level = 'INFO'
+load_from = 'https://download.openmmlab.com/mmpose/top_down/posewarper/hrnet_w48_posetrack18_384x288_posewarper_stage1-08b632aa_20211130.pth' # noqa: E501
+resume_from = None
+dist_params = dict(backend='nccl')
+cudnn_benchmark = True
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=1)
+evaluation = dict(interval=1, metric='mAP', save_best='Total AP')
+
+optimizer = dict(
+ type='Adam',
+ lr=0.0001,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[10, 15])
+total_epochs = 20
+log_config = dict(
+ interval=100,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
+channel_cfg = dict(
+ num_output_channels=17,
+ dataset_joints=17,
+ dataset_channel=[
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ ],
+ inference_channel=[
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ ])
+
+# model settings
+model = dict(
+ type='PoseWarper',
+ pretrained=None,
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ frozen_stages=4,
+ ),
+ concat_tensors=True,
+ neck=dict(
+ type='PoseWarperNeck',
+ in_channels=48,
+ out_channels=channel_cfg['num_output_channels'],
+ inner_channels=128,
+ deform_groups=channel_cfg['num_output_channels'],
+ dilations=(3, 6, 12, 18, 24),
+ trans_conv_kernel=1,
+ res_blocks_cfg=dict(block='BASIC', num_blocks=20),
+ offsets_kernel=3,
+ deform_conv_kernel=3,
+ freeze_trans_layer=True,
+ im2col_step=80),
+ keypoint_head=dict(
+ type='TopdownHeatmapSimpleHead',
+ in_channels=channel_cfg['num_output_channels'],
+ out_channels=channel_cfg['num_output_channels'],
+ num_deconv_layers=0,
+ extra=dict(final_conv_kernel=0, ),
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=False,
+ post_process='default',
+ shift_heatmap=True,
+ modulate_kernel=11))
+
+data_cfg = dict(
+ image_size=[288, 384],
+ heatmap_size=[72, 96],
+ num_output_channels=channel_cfg['num_output_channels'],
+ num_joints=channel_cfg['dataset_joints'],
+ dataset_channel=channel_cfg['dataset_channel'],
+ inference_channel=channel_cfg['inference_channel'],
+ use_nms=True,
+ soft_nms=False,
+ nms_thr=1.0,
+ oks_thr=0.9,
+ vis_thr=0.2,
+ use_gt_bbox=False,
+ det_bbox_thr=0.2,
+ bbox_file='data/posetrack18/posetrack18_precomputed_boxes/'
+ 'val_boxes.json',
+ # frame_indices_train=[-1, 0],
+ frame_index_rand=True,
+ frame_index_range=[-2, 2],
+ num_adj_frames=1,
+ frame_indices_test=[-2, -1, 0, 1, 2],
+ # the first weight is the current frame,
+ # then on ascending order of frame indices
+ frame_weight_train=(0.0, 1.0),
+ frame_weight_test=(0.3, 0.1, 0.25, 0.25, 0.1),
+)
+
+# take care of orders of the transforms
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='TopDownHalfBodyTransform',
+ num_joints_half_body=8,
+ prob_half_body=0.3),
+ dict(
+ type='TopDownGetRandomScaleRotation', rot_factor=45,
+ scale_factor=0.35),
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(type='TopDownGenerateTarget', sigma=3),
+ dict(
+ type='Collect',
+ keys=['img', 'target', 'target_weight'],
+ meta_keys=[
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+ 'rotation', 'bbox_score', 'flip_pairs', 'frame_weight'
+ ]),
+]
+
+val_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='TopDownAffine'),
+ dict(type='ToTensor'),
+ dict(
+ type='NormalizeTensor',
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]),
+ dict(
+ type='Collect',
+ keys=[
+ 'img',
+ ],
+ meta_keys=[
+ 'image_file',
+ 'center',
+ 'scale',
+ 'rotation',
+ 'bbox_score',
+ 'flip_pairs',
+ 'frame_weight',
+ ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/posetrack18'
+data = dict(
+ samples_per_gpu=8,
+ workers_per_gpu=2,
+ val_dataloader=dict(samples_per_gpu=4),
+ test_dataloader=dict(samples_per_gpu=4),
+ train=dict(
+ type='TopDownPoseTrack18VideoDataset',
+ ann_file=f'{data_root}/annotations/posetrack18_train.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=train_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ val=dict(
+ type='TopDownPoseTrack18VideoDataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=val_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+ test=dict(
+ type='TopDownPoseTrack18VideoDataset',
+ ann_file=f'{data_root}/annotations/posetrack18_val.json',
+ img_prefix=f'{data_root}/',
+ data_cfg=data_cfg,
+ pipeline=test_pipeline,
+ dataset_info={{_base_.dataset_info}}),
+)
diff --git a/docs/papers/algorithms/posewarper.md b/docs/papers/algorithms/posewarper.md
new file mode 100644
index 0000000000..0ec8441f16
--- /dev/null
+++ b/docs/papers/algorithms/posewarper.md
@@ -0,0 +1,17 @@
+# Learning Temporal Pose Estimation from Sparsely-Labeled Videos
+
+
+
+
+PoseWarper (NeurIPS'2019)
+
+```bibtex
+@inproceedings{NIPS2019_gberta,
+title = {Learning Temporal Pose Estimation from Sparsely Labeled Videos},
+author = {Bertasius, Gedas and Feichtenhofer, Christoph, and Tran, Du and Shi, Jianbo, and Torresani, Lorenzo},
+booktitle = {Advances in Neural Information Processing Systems 33},
+year = {2019},
+}
+```
+
+
diff --git a/model-index.yml b/model-index.yml
index f6b26e9c1d..e928776228 100644
--- a/model-index.yml
+++ b/model-index.yml
@@ -75,6 +75,7 @@ Import:
- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/resnet_ochuman.yml
- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/hrnet_posetrack18.yml
- configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/posetrack18/resnet_posetrack18.yml
+- configs/body/2d_kpt_sview_rgb_vid/posewarper/posetrack18/hrnet_posetrack18_posewarper.yml
- configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.yml
- configs/body/3d_kpt_sview_rgb_img/pose_lift/mpi_inf_3dhp/simplebaseline3d_mpi-inf-3dhp.yml
- configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/videopose3d_h36m.yml