Skip to content

Commit

Permalink
Merge 56b0c1a into b647b8c
Browse files Browse the repository at this point in the history
  • Loading branch information
LareinaM authored Jun 1, 2023
2 parents b647b8c + 56b0c1a commit e80a82b
Show file tree
Hide file tree
Showing 59 changed files with 5,905 additions and 173 deletions.
17 changes: 17 additions & 0 deletions configs/body_3d_keypoint/video_pose_lift/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# 3D human pose estimation in video with temporal convolutions and semi-supervised training

Based on the success of 2d human pose estimation, it directly "lifts" a sequence of 2d keypoints to 3d keypoints.

## Results and Models

### Human3.6m Dataset

| Arch | Receptive Field | MPJPE | P-MPJPE | N-MPJPE | ckpt | log |
| :------------------------------------------------------ | :-------------: | :---: | :-----: | :-----: | :------------------------------------------------------: | :-----------------------------------------------------: |
| [VideoPose3D-supervised](/configs/body_3d_keypoint/video_pose_lift/h36m/vid-pl_videopose3d-27frm-supv_8xb128-80e_h36m.py) | 27 | 40.1 | 30.1 | / | [ckpt](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_27frames_fullconv_supervised-fe8fbba9_20210527.pth) | [log](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_27frames_fullconv_supervised_20210527.log.json) |
| [VideoPose3D-supervised](/configs/body_3d_keypoint/video_pose_lift/h36m/vid-pl_videopose3d-81frm-supv_8xb128-80e_h36m.py) | 81 | 39.1 | 29.3 | / | [ckpt](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_81frames_fullconv_supervised-1f2d1104_20210527.pth) | [log](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_81frames_fullconv_supervised_20210527.log.json) |
| [VideoPose3D-supervised](/configs/body_3d_keypoint/video_pose_lift/h36m/vid-pl_videopose3d-243frm-supv_8xb128-80e_h36m.py) | 243 | | | / | [ckpt](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_243frames_fullconv_supervised-880bea25_20210527.pth) | [log](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_243frames_fullconv_supervised_20210527.log.json) |
| [VideoPose3D-supervised-CPN](/configs/body_3d_keypoint/video_pose_lift/h36m/vid-pl_videopose3d-1frm-supv-cpn-ft_8xb128-80e_h36m.py) | 1 | 53.0 | 41.3 | / | [ckpt](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_1frame_fullconv_supervised_cpn_ft-5c3afaed_20210527.pth) | [log](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_1frame_fullconv_supervised_cpn_ft_20210527.log.json) |
| [VideoPose3D-supervised-CPN](/configs/body_3d_keypoint/video_pose_lift/h36m/vid-pl_videopose3d-243frm-supv-cpn-ft_8xb128-200e_h36m.py) | 243 | | | / | [ckpt](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_243frames_fullconv_supervised_cpn_ft-88f5abbb_20210527.pth) | [log](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_243frames_fullconv_supervised_cpn_ft_20210527.log.json) |
| [VideoPose3D-semi-supervised](/configs/body_3d_keypoint/video_pose_lift/h36m/vid-pl_videopose3d-27frm-semi-supv_8xb64-200e_h36m.py) | 27 | 57.2 | 42.4 | 54.2 | [ckpt](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_27frames_fullconv_semi-supervised-54aef83b_20210527.pth) | [log](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_27frames_fullconv_semi-supervised_20210527.log.json) |
| [VideoPose3D-semi-supervised-CPN](/configs/body_3d_keypoint/video_pose_lift/h36m/vid-pl_videopose3d-27frm-semi-supv-cpn-ft_8xb64-200e_h36m.py) | 27 | 67.3 | 50.4 | 63.6 | [ckpt](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_27frames_fullconv_semi-supervised_cpn_ft-71be9cde_20210527.pth) | [log](https://download.openmmlab.com/mmpose/body3d/videopose/videopose_h36m_27frames_fullconv_semi-supervised_cpn_ft_20210527.log.json) |
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
_base_ = ['../../../_base_/default_runtime.py']

vis_backends = [
dict(type='LocalVisBackend'),
]
visualizer = dict(
type='Pose3dLocalVisualizer', vis_backends=vis_backends, name='visualizer')

# runtime
train_cfg = dict(max_epochs=80, val_interval=10)

# optimizer
optim_wrapper = dict(optimizer=dict(type='Adam', lr=1e-4))

# learning policy
param_scheduler = [
dict(type='ExponentialLR', gamma=0.98, end=80, by_epoch=True)
]

auto_scale_lr = dict(base_batch_size=1024)

# hooks
default_hooks = dict(
checkpoint=dict(
type='CheckpointHook',
save_best='MPJPE',
rule='less',
max_keep_ckpts=1),
logger=dict(type='LoggerHook', interval=20),
)

# codec settings
codec = dict(
type='VideoPoseLifting',
num_keypoints=17,
zero_center=True,
root_index=0,
remove_root=False)

# model settings
model = dict(
type='PoseLifter',
backbone=dict(
type='TCN',
in_channels=2 * 17,
stem_channels=1024,
num_blocks=4,
kernel_sizes=(1, 1, 1, 1, 1),
dropout=0.25,
use_stride_conv=True,
),
head=dict(
type='TemporalRegressionHead',
in_channels=1024,
num_joints=17,
loss=dict(type='MPJPELoss'),
decoder=codec,
))

# base dataset settings
dataset_type = 'Human36mDataset'
data_root = 'data/h36m/'

# pipelines
train_pipeline = [
dict(
type='RandomFlipAroundRoot',
keypoints_flip_cfg=dict(),
target_flip_cfg=dict(),
),
dict(type='GenerateTarget', encoder=codec),
dict(
type='PackPoseInputs',
meta_keys=('id', 'category_id', 'target_img_path', 'flip_indices',
'target_root'))
]
val_pipeline = [
dict(type='GenerateTarget', encoder=codec),
dict(
type='PackPoseInputs',
meta_keys=('id', 'category_id', 'target_img_path', 'flip_indices',
'target_root'))
]

# data loaders
train_dataloader = dict(
batch_size=128,
num_workers=2,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
ann_file='annotation_body3d/fps50/h36m_train.npz',
seq_len=1,
causal=False,
pad_video_seq=False,
keypoint_2d_src='detection',
keypoint_2d_det_file='joint_2d_det_files/cpn_ft_h36m_dbb_train.npy',
camera_param_file='annotation_body3d/cameras.pkl',
data_root=data_root,
data_prefix=dict(img='images/'),
pipeline=train_pipeline,
),
)
val_dataloader = dict(
batch_size=128,
num_workers=2,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
dataset=dict(
type=dataset_type,
ann_file='annotation_body3d/fps50/h36m_test.npz',
seq_len=1,
causal=False,
pad_video_seq=False,
keypoint_2d_src='detection',
keypoint_2d_det_file='joint_2d_det_files/cpn_ft_h36m_dbb_test.npy',
camera_param_file='annotation_body3d/cameras.pkl',
data_root=data_root,
data_prefix=dict(img='images/'),
pipeline=val_pipeline,
test_mode=True,
))
test_dataloader = val_dataloader

# evaluators
val_evaluator = [
dict(type='MPJPE', mode='mpjpe'),
dict(type='MPJPE', mode='p-mpjpe')
]
test_evaluator = val_evaluator
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
_base_ = ['../../../_base_/default_runtime.py']

vis_backends = [
dict(type='LocalVisBackend'),
]
visualizer = dict(
type='Pose3dLocalVisualizer', vis_backends=vis_backends, name='visualizer')

# runtime
train_cfg = dict(max_epochs=200, val_interval=10)

# optimizer
optim_wrapper = dict(optimizer=dict(type='Adam', lr=1e-4))

# learning policy
param_scheduler = [
dict(type='ExponentialLR', gamma=0.98, end=200, by_epoch=True)
]

auto_scale_lr = dict(base_batch_size=1024)

# hooks
default_hooks = dict(
checkpoint=dict(
type='CheckpointHook',
save_best='MPJPE',
rule='less',
max_keep_ckpts=1),
logger=dict(type='LoggerHook', interval=20),
)

# codec settings
codec = dict(
type='VideoPoseLifting',
num_keypoints=17,
zero_center=True,
root_index=0,
remove_root=False)

# model settings
model = dict(
type='PoseLifter',
backbone=dict(
type='TCN',
in_channels=2 * 17,
stem_channels=1024,
num_blocks=4,
kernel_sizes=(3, 3, 3, 3, 3),
dropout=0.25,
use_stride_conv=True,
),
head=dict(
type='TemporalRegressionHead',
in_channels=1024,
num_joints=17,
loss=dict(type='MPJPELoss'),
decoder=codec,
))

# base dataset settings
dataset_type = 'Human36mDataset'
data_root = 'data/h36m/'

# pipelines
train_pipeline = [
dict(
type='RandomFlipAroundRoot',
keypoints_flip_cfg=dict(),
target_flip_cfg=dict(),
),
dict(type='GenerateTarget', encoder=codec),
dict(
type='PackPoseInputs',
meta_keys=('id', 'category_id', 'target_img_path', 'flip_indices',
'target_root'))
]
val_pipeline = [
dict(type='GenerateTarget', encoder=codec),
dict(
type='PackPoseInputs',
meta_keys=('id', 'category_id', 'target_img_path', 'flip_indices',
'target_root'))
]

# data loaders
train_dataloader = dict(
batch_size=128,
num_workers=2,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
ann_file='annotation_body3d/fps50/h36m_train.npz',
seq_len=243,
causal=False,
pad_video_seq=True,
keypoint_2d_src='detection',
keypoint_2d_det_file='joint_2d_det_files/cpn_ft_h36m_dbb_train.npy',
camera_param_file='annotation_body3d/cameras.pkl',
data_root=data_root,
data_prefix=dict(img='images/'),
pipeline=train_pipeline,
),
)
val_dataloader = dict(
batch_size=128,
num_workers=2,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
dataset=dict(
type=dataset_type,
ann_file='annotation_body3d/fps50/h36m_test.npz',
seq_len=243,
causal=False,
pad_video_seq=True,
keypoint_2d_src='detection',
keypoint_2d_det_file='joint_2d_det_files/cpn_ft_h36m_dbb_test.npy',
camera_param_file='annotation_body3d/cameras.pkl',
data_root=data_root,
data_prefix=dict(img='images/'),
pipeline=val_pipeline,
test_mode=True,
))
test_dataloader = val_dataloader

# evaluators
val_evaluator = [
dict(type='MPJPE', mode='mpjpe'),
dict(type='MPJPE', mode='p-mpjpe')
]
test_evaluator = val_evaluator
Loading

0 comments on commit e80a82b

Please sign in to comment.