[Refactor] add unittest for HeatmapHead (open-mmlab#1503)

* add unittest for HeatmapHead * add unittest * fix comments typo Co-authored-by: Tau <[email protected]>
wjkim81 · Jul 25, 2022 · 6e6b41d · 6e6b41d
1 parent 37a65d8
commit 6e6b41d
Show file tree

Hide file tree

Showing 15 changed files with 448 additions and 73 deletions.
diff --git a/mmpose/core/utils/tensor_utils.py b/mmpose/core/utils/tensor_utils.py
@@ -1,15 +1,58 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Any, Optional, Sequence, Union
+
 import numpy as np
+import torch
+from mmengine.utils import is_seq_of
 from torch import Tensor
 
 
-def _to_numpy(x: Tensor) -> np.ndarray:
-    """Convert a torch tensor to numpy.ndarray.
+def to_numpy(x: Union[Tensor, Sequence[Tensor]],
+             return_device: bool = False) -> Union[np.ndarray, tuple]:
+    """Convert torch tensor to numpy.ndarray.
+
+    Args:
+        x (Tensor | Sequence[Tensor]): A single tensor or a sequence of
+            tensors
+        return_device (bool): Whether return the tensor device. Defaults to
+            ``False``
+    Returns:
+        np.ndarray | tuple: If ``return_device`` is ``True``, return a tuple
+        of converted numpy array(s) and the device indicator; otherwise only
+        return the numpy array(s)
+    """
+
+    if isinstance(x, Tensor):
+        arrays = x.detach().cpu().numpy()
+        device = x.device
+    elif is_seq_of(x, Tensor):
+        arrays = [to_numpy(_x)[0] for _x in x]
+        device = x[0].device
+    else:
+        raise ValueError(f'Invalid input type {type(x)}')
+
+    if return_device:
+        return arrays, device
+    else:
+        return arrays
+
+
+def to_tensor(x: Union[np.ndarray, Sequence[np.ndarray]],
+              device: Optional[Any] = None) -> Union[Tensor, Sequence[Tensor]]:
+    """Convert numpy.ndarray to torch tensor.
 
     Args:
-        x (Tensor): A torch tensor
+        x (np.ndarray | Sequence[np.ndarray]): A single np.ndarray or a
+            sequence of tensors
+        tensor (Any, optional): The device indicator. Defaults to ``None``
 
     Returns:
-        np.ndarray: The converted numpy array
+        tuple:
+        - Tensor | Sequence[Tensor]: The converted Tensor or Tensor sequence
     """
-    return x.detach().cpu().numpy()
+    if isinstance(x, np.ndarray):
+        return torch.tensor(x, device=device)
+    elif is_seq_of(x, np.ndarray):
+        return [to_tensor(_x, device=device) for _x in x]
+    else:
+        raise ValueError(f'Invalid input type {type(x)}')
diff --git a/mmpose/datasets/pipelines/formatting.py b/mmpose/datasets/pipelines/formatting.py
@@ -95,7 +95,7 @@ class PackPoseInputs(BaseTransform):
         'keypoints': 'keypoints',
         'keypoints_visible': 'keypoints_visible',
         'reg_label': 'reg_labels',
-        'target_weights': 'target_weights'
+        'keypoint_weights': 'keypoint_weights'
     }
 
     def __init__(self,

diff --git a/mmpose/datasets/pipelines/topdown_transforms.py b/mmpose/datasets/pipelines/topdown_transforms.py
@@ -225,7 +225,7 @@ class TopdownGenerateRegressionLabel(BaseTransform):
 
         - keypoints
         - keypoints_visible
-        - image_size
+        - input_size
         - dataset_keypoint_weights
 
     Added Keys:

diff --git a/mmpose/models/heads/__init__.py b/mmpose/models/heads/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .base_head import BaseHead
+from .heatmap_heads import HeatmapHead
+
+__all__ = ['BaseHead', 'HeatmapHead']
diff --git a/mmpose/models/heads/base_head.py b/mmpose/models/heads/base_head.py
@@ -8,8 +8,9 @@
 from mmengine.model import BaseModule
 from torch import Tensor
 
-from mmpose.core.utils.tensor_utils import _to_numpy
-from mmpose.core.utils.typing import ConfigType, OptSampleList, SampleList
+from mmpose.core.utils.tensor_utils import to_numpy, to_tensor
+from mmpose.core.utils.typing import (ConfigType, OptConfigType, OptSampleList,
+                                      SampleList)
 
 
 class BaseHead(BaseModule, metaclass=ABCMeta):
@@ -25,13 +26,17 @@ def forward(self, feats: Tuple[Tensor]):
         """Forward the network."""
 
     @abstractmethod
-    def predict(self, feats: Tuple[Tensor], batch_data_samples: OptSampleList,
-                test_cfg: ConfigType) -> SampleList:
+    def predict(self,
+                feats: Tuple[Tensor],
+                batch_data_samples: OptSampleList,
+                test_cfg: OptConfigType = {}) -> SampleList:
         """Predict results from features."""
 
     @abstractmethod
-    def loss(self, feats: Tuple[Tensor], batch_data_samples: OptSampleList,
-             train_cfg: ConfigType) -> dict:
+    def loss(self,
+             feats: Tuple[Tensor],
+             batch_data_samples: OptSampleList,
+             train_cfg: OptConfigType = {}) -> dict:
         """Calculate losses from a batch of inputs and data samples."""
 
     def _get_in_channels(self):
@@ -97,28 +102,22 @@ def decode(self, batch_outputs: Union[Tensor, Tuple[Tensor]],
             raise ValueError(
                 '`batch_data_samples` is required to decode keypoitns.')
 
-        if isinstance(batch_outputs, Tensor):
-            batch_outputs_np = _to_numpy(batch_outputs)
-
-        elif isinstance(batch_outputs, Tuple):
-            assert len(batch_outputs) == 2, (
-                'batch_outputs should contain coordinates and heatmaps in '
-                f'{self.__class__.__name__}')
-
-            batch_coords, _ = batch_outputs
-            batch_outputs_np = _to_numpy(batch_coords)
+        batch_outputs_np, device = to_numpy(batch_outputs, return_device=True)
 
         # TODO: support decoding with tensor data
         for outputs, data_sample in zip(batch_outputs_np, batch_data_samples):
-            keypoints, scores = self.decoder.decode(outputs)
+            keypoints_np, scores_np = self.decoder.decode(outputs)
+            keypoints = to_tensor(keypoints_np, device)
+            scores = to_tensor(scores_np, device)
 
             # Convert the decoded local keypoints (in input space)
             # to the image coordinate space
             # Convert keypoint coordinates from input space to image space
             if 'gt_instances' in data_sample:
                 bbox_centers = data_sample.gt_instances.bbox_centers
-                bbox_scales = data_sample.get_instances.bbox_scales
-                input_size = data_sample.metainfo.input_size
+                bbox_scales = data_sample.gt_instances.bbox_scales
+                input_size = keypoints.new_tensor(
+                    data_sample.metainfo['input_size'])
                 keypoints = keypoints / input_size * bbox_scales + \
                     bbox_centers - 0.5 * bbox_scales
 

diff --git a/mmpose/models/heads/heatmap_heads/__init__.py b/mmpose/models/heads/heatmap_heads/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .heatmap_head import HeatmapHead
+
+__all__ = ['HeatmapHead']
diff --git a/mmpose/models/heads/heatmap_heads/heatmap_head.py b/mmpose/models/heads/heatmap_heads/heatmap_head.py
@@ -6,7 +6,7 @@
 from mmengine.data import PixelData
 from torch import Tensor, nn
 
-from mmpose.core.utils.tensor_utils import _to_numpy
+from mmpose.core.utils.tensor_utils import to_numpy
 from mmpose.core.utils.typing import (ConfigType, OptConfigType, OptSampleList,
                                       SampleList)
 from mmpose.metrics.utils import pose_pck_accuracy
@@ -23,13 +23,13 @@ class HeatmapHead(BaseHead):
     convolutional layer to generate heatmaps from low-resolution feature maps.
 
     Args:
-        in_channels (int): Number of channels in the input feature map
+        in_channels (int | sequence[int]): Number of channels in the input
+            feature map
         out_channels (int): Number of channels in the output heatmap
-        num_deconv_layers (int): Number of deconv layers. Defaults to 3
         deconv_out_channels (sequence[int]): The output channel number of each
             deconv layer. Defaults to ``(256, 256, 256)``
-        deconv_kernel_sizes (sequence[int | tuple]): The kernel size of
-            each deconv layer. Each element should be either an integer for
+        deconv_kernel_sizes (sequence[int | tuple], optional): The kernel size
+            of each deconv layer. Each element should be either an integer for
             both height and width dimensions, or a tuple of two integers for
             the height and the width dimension respectively.Defaults to
             ``(4, 4, 4)``
@@ -51,7 +51,7 @@ class HeatmapHead(BaseHead):
 
             Defaults to ``'select'``
         input_index (int | sequence[int]): The feature map index used in the
-            input transformation. See also ``input_transform``. Defaults to 0
+            input transformation. See also ``input_transform``. Defaults to -1
         align_corners (bool): `align_corners` argument of
             :func:`torch.nn.functional.interpolate` used in the input
             transformation. Defaults to ``False``
@@ -76,7 +76,7 @@ def __init__(self,
                  conv_kernel_sizes: OptIntSeq = None,
                  has_final_layer: bool = True,
                  input_transform: str = 'select',
-                 input_index: Union[int, Sequence[int]] = 0,
+                 input_index: Union[int, Sequence[int]] = -1,
                  align_corners: bool = False,
                  loss: ConfigType = dict(
                      type='KeypointMSELoss', use_target_weight=True),
@@ -118,7 +118,7 @@ def __init__(self,
             )
             in_channels = deconv_out_channels[-1]
         else:
-            self.deconv_layers = nn.Identity
+            self.deconv_layers = nn.Identity()
 
         if conv_out_channels:
             if conv_kernel_sizes is None or len(conv_out_channels) != len(
@@ -134,7 +134,7 @@ def __init__(self,
                 layer_kernel_sizes=conv_kernel_sizes)
             in_channels = conv_out_channels[-1]
         else:
-            self.conv_layers = nn.Identity
+            self.conv_layers = nn.Identity()
 
         if has_final_layer:
             cfg = dict(
@@ -240,15 +240,17 @@ def forward(self, feats: Tuple[Tensor]) -> Tensor:
 
         return x
 
-    def predict(self, feats: Tuple[Tensor], batch_data_samples: OptSampleList,
-                test_cfg: ConfigType) -> SampleList:
+    def predict(self,
+                feats: Tuple[Tensor],
+                batch_data_samples: OptSampleList,
+                test_cfg: OptConfigType = {}) -> SampleList:
         """Predict results from features."""
 
         batch_heatmaps = self.forward(feats)
         preds = self.decode(batch_heatmaps, batch_data_samples, test_cfg)
 
-        # Whether to visualize the predicted heatmps
-        if test_cfg.get('output_heatmaps', True):
+        # Whether to visualize the predicted heatmaps
+        if test_cfg.get('output_heatmaps', False):
             for heatmaps, data_sample in zip(batch_heatmaps, preds):
                 # Store the heatmap predictions in the data sample
                 if 'pred_fileds' not in data_sample:
@@ -257,28 +259,30 @@ def predict(self, feats: Tuple[Tensor], batch_data_samples: OptSampleList,
 
         return preds
 
-    def loss(self, feats: Tuple[Tensor], batch_data_samples: OptSampleList,
-             train_cfg: ConfigType) -> dict:
+    def loss(self,
+             feats: Tuple[Tensor],
+             batch_data_samples: OptSampleList,
+             train_cfg: OptConfigType = {}) -> dict:
         """Calculate losses from a batch of inputs and data samples."""
         pred_heatmaps = self.forward(feats)
         gt_heatmaps = torch.stack(
             [d.gt_fields.heatmaps for d in batch_data_samples])
-        target_weights = torch.cat(
-            [d.gt_instance.target_weights for d in batch_data_samples])
+        keypoint_weights = torch.cat(
+            [d.gt_instances.keypoint_weights for d in batch_data_samples])
 
         # calculate losses
         losses = dict()
-        loss = self.loss_module(pred_heatmaps, gt_heatmaps, target_weights)
+        loss = self.loss_module(pred_heatmaps, gt_heatmaps, keypoint_weights)
         if isinstance(loss, dict):
             losses.update(loss)
         else:
-            losses.update(loss_kpts=loss)
+            losses.update(loss_kpt=loss)
 
         # calculate accuracy
         _, avg_acc, _ = pose_pck_accuracy(
-            output=_to_numpy(pred_heatmaps),
-            target=_to_numpy(gt_heatmaps),
-            mask=_to_numpy(target_weights).squeeze(-1) > 0)
+            output=to_numpy(pred_heatmaps),
+            target=to_numpy(gt_heatmaps),
+            mask=to_numpy(keypoint_weights) > 0)
 
         losses.update(acc_pose=float(avg_acc))
 
@@ -294,7 +298,7 @@ def _load_state_dict_pre_hook(self, state_dict, prefix, local_meta, *args,
         """
 
         version = local_meta.get('version', None)
-        if version >= self._version:
+        if version and version >= self._version:
             return
 
         # convert old-version state dict

diff --git a/mmpose/models/losses/__init__.py b/mmpose/models/losses/__init__.py
@@ -2,14 +2,14 @@
 from .classification_loss import BCELoss
 from .heatmap_loss import AdaptiveWingLoss
 from .mesh_loss import GANLoss, MeshLoss
-from .mse_loss import JointsMSELoss, JointsOHKMMSELoss
+from .mse_loss import KeypointMSELoss, KeypointOHKMMSELoss
 from .multi_loss_factory import AELoss, HeatmapLoss, MultiLossFactory
 from .regression_loss import (BoneLoss, L1Loss, MPJPELoss, MSELoss, RLELoss,
                               SemiSupervisionLoss, SmoothL1Loss, SoftWingLoss,
                               WingLoss)
 
 __all__ = [
-    'JointsMSELoss', 'JointsOHKMMSELoss', 'HeatmapLoss', 'AELoss',
+    'KeypointMSELoss', 'KeypointOHKMMSELoss', 'HeatmapLoss', 'AELoss',
     'MultiLossFactory', 'MeshLoss', 'GANLoss', 'SmoothL1Loss', 'WingLoss',
     'MPJPELoss', 'MSELoss', 'L1Loss', 'BCELoss', 'BoneLoss',
     'SemiSupervisionLoss', 'SoftWingLoss', 'AdaptiveWingLoss', 'RLELoss'

diff --git a/mmpose/models/losses/heatmap_loss.py b/mmpose/models/losses/heatmap_loss.py
@@ -64,22 +64,22 @@ def criterion(self, pred, target):
 
         return torch.mean(losses)
 
-    def forward(self, output, target, target_weight):
+    def forward(self, output, target, target_weights):
         """Forward function.
 
         Note:
             batch_size: N
             num_keypoints: K
 
         Args:
-            output (torch.Tensor[NxKxHxW]): Output heatmaps.
-            target (torch.Tensor[NxKxHxW]): Target heatmaps.
-            target_weight (torch.Tensor[NxKx1]):
+            output (torch.Tensor[N, K, H, W]): Output heatmaps.
+            target (torch.Tensor[N, K, H, W]): Target heatmaps.
+            target_weight (torch.Tensor[N, K]):
                 Weights across different joint types.
         """
         if self.use_target_weight:
-            loss = self.criterion(output * target_weight.unsqueeze(-1),
-                                  target * target_weight.unsqueeze(-1))
+            loss = self.criterion(output * target_weights,
+                                  target * target_weights)
         else:
             loss = self.criterion(output, target)
 

diff --git a/mmpose/models/losses/mesh_loss.py b/mmpose/models/losses/mesh_loss.py
@@ -76,7 +76,7 @@ def __init__(self,
         # Per-vertex loss on the mesh
         self.criterion_vertex = nn.L1Loss(reduction='none')
 
-        # Joints (2D and 3D) loss
+        # Keypoints (2D and 3D) loss
         self.criterion_joints_2d = nn.SmoothL1Loss(reduction='none')
         self.criterion_joints_3d = nn.SmoothL1Loss(reduction='none')