Skip to content

Commit

Permalink
[Refactor] Add PoseLifter, TemporalRegressionHead, TrajectoryRegressi…
Browse files Browse the repository at this point in the history
…onHead (#2311)
  • Loading branch information
Tau-J authored Apr 25, 2023
1 parent 7dc182c commit 081e79f
Show file tree
Hide file tree
Showing 10 changed files with 616 additions and 26 deletions.
4 changes: 1 addition & 3 deletions mmpose/codecs/image_pose_lifting.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@ def encode(self,

def decode(self,
encoded: np.ndarray,
restore_global_position: bool = False,
target_root: Optional[np.ndarray] = None
) -> Tuple[np.ndarray, np.ndarray]:
"""Decode keypoint coordinates from normalized space to input image
Expand All @@ -192,8 +191,7 @@ def decode(self,
assert self.target_mean.shape == keypoints.shape[1:]
keypoints = keypoints * self.target_std + self.target_mean

if restore_global_position:
assert target_root is not None
if target_root is not None:
keypoints = keypoints + np.expand_dims(target_root, axis=0)
if self.remove_root:
keypoints = np.insert(
Expand Down
4 changes: 1 addition & 3 deletions mmpose/codecs/video_pose_lifting.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,6 @@ def encode(self,

def decode(self,
encoded: np.ndarray,
restore_global_position: bool = False,
target_root: Optional[np.ndarray] = None
) -> Tuple[np.ndarray, np.ndarray]:
"""Decode keypoint coordinates from normalized space to input image
Expand All @@ -190,8 +189,7 @@ def decode(self,
"""
keypoints = encoded.copy()

if restore_global_position:
assert target_root is not None
if target_root is not None:
keypoints = keypoints + np.expand_dims(target_root, axis=0)
if self.remove_root:
keypoints = np.insert(
Expand Down
6 changes: 4 additions & 2 deletions mmpose/models/heads/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
HeatmapHead, MSPNHead, ViPNASHead)
from .hybrid_heads import DEKRHead
from .regression_heads import (DSNTHead, IntegralRegressionHead,
RegressionHead, RLEHead)
RegressionHead, RLEHead, TemporalRegressionHead,
TrajectoryRegressionHead)

__all__ = [
'BaseHead', 'HeatmapHead', 'CPMHead', 'MSPNHead', 'ViPNASHead',
'RegressionHead', 'IntegralRegressionHead', 'SimCCHead', 'RLEHead',
'DSNTHead', 'AssociativeEmbeddingHead', 'DEKRHead', 'CIDHead', 'RTMCCHead'
'DSNTHead', 'AssociativeEmbeddingHead', 'DEKRHead', 'CIDHead', 'RTMCCHead',
'TemporalRegressionHead', 'TrajectoryRegressionHead'
]
4 changes: 4 additions & 0 deletions mmpose/models/heads/regression_heads/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,14 @@
from .integral_regression_head import IntegralRegressionHead
from .regression_head import RegressionHead
from .rle_head import RLEHead
from .temporal_regression_head import TemporalRegressionHead
from .trajectory_regression_head import TrajectoryRegressionHead

__all__ = [
'RegressionHead',
'IntegralRegressionHead',
'DSNTHead',
'RLEHead',
'TemporalRegressionHead',
'TrajectoryRegressionHead',
]
134 changes: 134 additions & 0 deletions mmpose/models/heads/regression_heads/temporal_regression_head.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Sequence, Tuple, Union

import numpy as np
import torch
from torch import Tensor, nn

from mmpose.evaluation.functional import keypoint_pck_accuracy
from mmpose.registry import KEYPOINT_CODECS, MODELS
from mmpose.utils.tensor_utils import to_numpy
from mmpose.utils.typing import (ConfigType, OptConfigType, OptSampleList,
Predictions)
from ..base_head import BaseHead

OptIntSeq = Optional[Sequence[int]]


@MODELS.register_module()
class TemporalRegressionHead(BaseHead):
"""Temporal Regression head of `VideoPose3D`_ by Dario et al (CVPR'2019).
Args:
in_channels (int | sequence[int]): Number of input channels
num_joints (int): Number of joints
loss (Config): Config for keypoint loss. Defaults to use
:class:`SmoothL1Loss`
decoder (Config, optional): The decoder config that controls decoding
keypoint coordinates from the network output. Defaults to ``None``
init_cfg (Config, optional): Config to control the initialization. See
:attr:`default_init_cfg` for default settings
.. _`VideoPose3D`: https://arxiv.org/abs/1811.11742
"""

_version = 2

def __init__(self,
in_channels: Union[int, Sequence[int]],
num_joints: int,
loss: ConfigType = dict(
type='MSELoss', use_target_weight=True),
decoder: OptConfigType = None,
init_cfg: OptConfigType = None):

if init_cfg is None:
init_cfg = self.default_init_cfg

super().__init__(init_cfg)

self.in_channels = in_channels
self.num_joints = num_joints
self.loss_module = MODELS.build(loss)
if decoder is not None:
self.decoder = KEYPOINT_CODECS.build(decoder)
else:
self.decoder = None

# Define fully-connected layers
self.conv = nn.Conv1d(in_channels, self.num_joints * 3, 1)

def forward(self, feats: Tuple[Tensor]) -> Tensor:
"""Forward the network. The input is multi scale feature maps and the
output is the coordinates.
Args:
feats (Tuple[Tensor]): Multi scale feature maps.
Returns:
Tensor: output coordinates(and sigmas[optional]).
"""
x = feats[-1]

x = self.conv(x)

return x.reshape(-1, self.num_joints, 3)

def predict(self,
feats: Tuple[Tensor],
batch_data_samples: OptSampleList,
test_cfg: ConfigType = {}) -> Predictions:
"""Predict results from outputs."""

batch_coords = self.forward(feats) # (B, K, D)

batch_coords.unsqueeze_(dim=1) # (B, N, K, D)

# Restore global position with target_root
target_root = batch_data_samples[0].metainfo.get('target_root', None)
if target_root is not None:
target_root = torch.stack(
[m['target_root'] for m in batch_data_samples[0].metainfo])

preds = self.decode(batch_coords, target_root)

return preds

def loss(self,
inputs: Tuple[Tensor],
batch_data_samples: OptSampleList,
train_cfg: ConfigType = {}) -> dict:
"""Calculate losses from a batch of inputs and data samples."""

pred_outputs = self.forward(inputs)

keypoint_labels = torch.cat(
[d.gt_instance_labels.keypoint_labels for d in batch_data_samples])
keypoint_weights = torch.cat([
d.gt_instance_labels.keypoint_weights for d in batch_data_samples
])

# calculate losses
losses = dict()
loss = self.loss_module(pred_outputs, keypoint_labels,
keypoint_weights.unsqueeze(-1))

losses.update(loss_pose3d=loss)

# calculate accuracy
_, avg_acc, _ = keypoint_pck_accuracy(
pred=to_numpy(pred_outputs),
gt=to_numpy(keypoint_labels),
mask=to_numpy(keypoint_weights) > 0,
thr=0.05,
norm_factor=np.ones((pred_outputs.size(0), 2), dtype=np.float32))

mpjpe_pose = torch.tensor(avg_acc, device=keypoint_labels.device)
losses.update(mpjpe=mpjpe_pose)

return losses

@property
def default_init_cfg(self):
init_cfg = [dict(type='Normal', layer=['Linear'], std=0.01, bias=0)]
return init_cfg
134 changes: 134 additions & 0 deletions mmpose/models/heads/regression_heads/trajectory_regression_head.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Sequence, Tuple, Union

import numpy as np
import torch
from torch import Tensor, nn

from mmpose.evaluation.functional import keypoint_pck_accuracy
from mmpose.registry import KEYPOINT_CODECS, MODELS
from mmpose.utils.tensor_utils import to_numpy
from mmpose.utils.typing import (ConfigType, OptConfigType, OptSampleList,
Predictions)
from ..base_head import BaseHead

OptIntSeq = Optional[Sequence[int]]


@MODELS.register_module()
class TrajectoryRegressionHead(BaseHead):
"""Trajectory Regression head of `VideoPose3D`_ by Dario et al (CVPR'2019).
Args:
in_channels (int | sequence[int]): Number of input channels
num_joints (int): Number of joints
loss (Config): Config for trajectory loss. Defaults to use
:class:`MPJPELoss`
decoder (Config, optional): The decoder config that controls decoding
keypoint coordinates from the network output. Defaults to ``None``
init_cfg (Config, optional): Config to control the initialization. See
:attr:`default_init_cfg` for default settings
.. _`VideoPose3D`: https://arxiv.org/abs/1811.11742
"""

_version = 2

def __init__(self,
in_channels: Union[int, Sequence[int]],
num_joints: int,
loss: ConfigType = dict(
type='MPJPELoss', use_target_weight=True),
decoder: OptConfigType = None,
init_cfg: OptConfigType = None):

if init_cfg is None:
init_cfg = self.default_init_cfg

super().__init__(init_cfg)

self.in_channels = in_channels
self.num_joints = num_joints
self.loss_module = MODELS.build(loss)
if decoder is not None:
self.decoder = KEYPOINT_CODECS.build(decoder)
else:
self.decoder = None

# Define fully-connected layers
self.conv = nn.Conv1d(in_channels, self.num_joints * 3, 1)

def forward(self, feats: Tuple[Tensor]) -> Tensor:
"""Forward the network. The input is multi scale feature maps and the
output is the coordinates.
Args:
feats (Tuple[Tensor]): Multi scale feature maps.
Returns:
Tensor: output coordinates(and sigmas[optional]).
"""
x = feats[-1]

x = self.conv(x)

return x.reshape(-1, self.num_joints, 3)

def predict(self,
feats: Tuple[Tensor],
batch_data_samples: OptSampleList,
test_cfg: ConfigType = {}) -> Predictions:
"""Predict results from outputs."""

batch_coords = self.forward(feats) # (B, K, D)

batch_coords.unsqueeze_(dim=1) # (B, N, K, D)

# Restore global position with target_root
target_root = batch_data_samples[0].metainfo.get('target_root', None)
if target_root is not None:
target_root = torch.stack(
[m['target_root'] for m in batch_data_samples[0].metainfo])

preds = self.decode(batch_coords, target_root)

return preds

def loss(self,
inputs: Union[Tensor, Tuple[Tensor]],
batch_data_samples: OptSampleList,
train_cfg: ConfigType = {}) -> dict:
"""Calculate losses from a batch of inputs and data samples."""

pred_outputs = self.forward(inputs)

keypoint_labels = torch.cat(
[d.gt_instance_labels.keypoint_labels for d in batch_data_samples])
trjectory_weights = torch.cat([
d.gt_instance_labels.trjectory_weights for d in batch_data_samples
])

# calculate losses
losses = dict()
loss = self.loss_module(pred_outputs, keypoint_labels,
trjectory_weights.unsqueeze(-1))

losses.update(loss_traj=loss)

# calculate accuracy
_, avg_acc, _ = keypoint_pck_accuracy(
pred=to_numpy(pred_outputs),
gt=to_numpy(keypoint_labels),
mask=to_numpy(trjectory_weights) > 0,
thr=0.05,
norm_factor=np.ones((pred_outputs.size(0), 2), dtype=np.float32))

mpjpe_traj = torch.tensor(avg_acc, device=keypoint_labels.device)
losses.update(mpjpe_traj=mpjpe_traj)

return losses

@property
def default_init_cfg(self):
init_cfg = [dict(type='Normal', layer=['Linear'], std=0.01, bias=0)]
return init_cfg
3 changes: 2 additions & 1 deletion mmpose/models/pose_estimators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .bottomup import BottomupPoseEstimator
from .pose_lifter import PoseLifter
from .topdown import TopdownPoseEstimator

__all__ = ['TopdownPoseEstimator', 'BottomupPoseEstimator']
__all__ = ['TopdownPoseEstimator', 'BottomupPoseEstimator', 'PoseLifter']
Loading

0 comments on commit 081e79f

Please sign in to comment.