[Fix] Fix data collate and scatter in inference (#1175)

* clean inference code * LoadImageFromFile supports given img
open-mmlab · Mar 1, 2022 · 23d671e · 23d671e
1 parent fcb75a6
commit 23d671e
Show file tree

Hide file tree

Showing 5 changed files with 148 additions and 178 deletions.
diff --git a/mmpose/apis/inference.py b/mmpose/apis/inference.py
@@ -2,7 +2,6 @@
 import os
 import warnings
 
-import cv2
 import mmcv
 import numpy as np
 import torch
@@ -108,46 +107,11 @@ def _box2cs(cfg, box):
 
     # pixel std is 200.0
     scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
-
     scale = scale * 1.25
 
     return center, scale
 
 
-class LoadImage:
-    """A simple pipeline to load image."""
-
-    def __init__(self, color_type='color', channel_order='rgb'):
-        self.color_type = color_type
-        self.channel_order = channel_order
-
-    def __call__(self, results):
-        """Call function to load images into results.
-
-        Args:
-            results (dict): A result dict contains the img_or_path.
-
-        Returns:
-            dict: ``results`` will be returned containing loaded image.
-        """
-        if isinstance(results['img_or_path'], str):
-            results['image_file'] = results['img_or_path']
-            img = mmcv.imread(results['img_or_path'], self.color_type,
-                              self.channel_order)
-        elif isinstance(results['img_or_path'], np.ndarray):
-            results['image_file'] = ''
-            if self.color_type == 'color' and self.channel_order == 'rgb':
-                img = cv2.cvtColor(results['img_or_path'], cv2.COLOR_BGR2RGB)
-            else:
-                img = results['img_or_path']
-        else:
-            raise TypeError('"img_or_path" must be a numpy array or a str or '
-                            'a pathlib.Path object')
-
-        results['img'] = img
-        return results
-
-
 def _inference_single_pose_model(model,
                                  img_or_path,
                                  bboxes,
@@ -178,12 +142,11 @@ def _inference_single_pose_model(model,
 
     cfg = model.cfg
     device = next(model.parameters()).device
+    if device.type == 'cpu':
+        device = -1
 
     # build the data pipeline
-    channel_order = cfg.test_pipeline[0].get('channel_order', 'rgb')
-    test_pipeline = [LoadImage(channel_order=channel_order)
-                     ] + cfg.test_pipeline[1:]
-    test_pipeline = Compose(test_pipeline)
+    test_pipeline = Compose(cfg.test_pipeline)
 
     assert len(bboxes[0]) in [4, 5]
 
@@ -285,8 +248,6 @@ def _inference_single_pose_model(model,
 
         # prepare data
         data = {
-            'img_or_path':
-            img_or_path,
             'center':
             center,
             'scale':
@@ -309,18 +270,16 @@ def _inference_single_pose_model(model,
                 'flip_pairs': flip_pairs
             }
         }
+        if isinstance(img_or_path, np.ndarray):
+            data['img'] = img_or_path
+        else:
+            data['image_file'] = img_or_path
+
         data = test_pipeline(data)
         batch_data.append(data)
 
-    batch_data = collate(batch_data, samples_per_gpu=1)
-
-    if next(model.parameters()).is_cuda:
-        # scatter not work so just move image to cuda device
-        batch_data['img'] = batch_data['img'].to(device)
-    # get all img_metas of each bounding box
-    batch_data['img_metas'] = [
-        img_metas[0] for img_metas in batch_data['img_metas'].data
-    ]
+    batch_data = collate(batch_data, samples_per_gpu=len(batch_data))
+    batch_data = scatter(batch_data, [device])[0]
 
     # forward the model
     with torch.no_grad():
@@ -525,33 +484,29 @@ def inference_bottom_up_pose_model(model,
 
     cfg = model.cfg
     device = next(model.parameters()).device
-    score_per_joint = cfg.model.test_cfg.get('score_per_joint', False)
+    if device.type == 'cpu':
+        device = -1
 
     # build the data pipeline
-    channel_order = cfg.test_pipeline[0].get('channel_order', 'rgb')
-    test_pipeline = [LoadImage(channel_order=channel_order)
-                     ] + cfg.test_pipeline[1:]
-    test_pipeline = Compose(test_pipeline)
+    test_pipeline = Compose(cfg.test_pipeline)
 
     # prepare data
     data = {
-        'img_or_path': img_or_path,
         'dataset': dataset_name,
         'ann_info': {
             'image_size': np.array(cfg.data_cfg['image_size']),
             'num_joints': cfg.data_cfg['num_joints'],
             'flip_index': flip_index,
         }
     }
+    if isinstance(img_or_path, np.ndarray):
+        data['img'] = img_or_path
+    else:
+        data['image_file'] = img_or_path
 
     data = test_pipeline(data)
     data = collate([data], samples_per_gpu=1)
-    if next(model.parameters()).is_cuda:
-        # scatter to specified GPU
-        data = scatter(data, [device])[0]
-    else:
-        # just get the actual data from DataContainer
-        data['img_metas'] = data['img_metas'].data[0]
+    data = scatter(data, [device])[0]
 
     with OutputHook(model, outputs=outputs, as_tensor=False) as h:
         # forward the model
@@ -577,6 +532,7 @@ def inference_bottom_up_pose_model(model,
             })
 
         # pose nms
+        score_per_joint = cfg.model.test_cfg.get('score_per_joint', False)
         keep = oks_nms(
             pose_results,
             pose_nms_thr,

diff --git a/mmpose/apis/inference_3d.py b/mmpose/apis/inference_3d.py
@@ -6,7 +6,7 @@
 from mmcv.parallel import collate, scatter
 
 from mmpose.datasets.pipelines import Compose
-from .inference import LoadImage, _box2cs, _xywh2xyxy, _xyxy2xywh
+from .inference import _box2cs, _xywh2xyxy, _xyxy2xywh
 
 
 def extract_pose_sequence(pose_results, frame_idx, causal, seq_len, step=1):
@@ -253,6 +253,10 @@ def inference_pose_lifter_model(model,
     cfg = model.cfg
     test_pipeline = Compose(cfg.test_pipeline)
 
+    device = next(model.parameters()).device
+    if device.type == 'cpu':
+        device = -1
+
     if dataset_info is not None:
         flip_pairs = dataset_info.flip_pairs
         assert 'stats_info' in dataset_info._dataset_info
@@ -329,11 +333,7 @@ def inference_pose_lifter_model(model,
         batch_data.append(data)
 
     batch_data = collate(batch_data, samples_per_gpu=len(batch_data))
-    if next(model.parameters()).is_cuda:
-        device = next(model.parameters()).device
-        batch_data = scatter(batch_data, target_gpus=[device.index])[0]
-    else:
-        batch_data = scatter(batch_data, target_gpus=[-1])[0]
+    batch_data = scatter(batch_data, target_gpus=[device])[0]
 
     with torch.no_grad():
         result = model(
@@ -528,12 +528,11 @@ def inference_interhand_3d_model(model,
 
     cfg = model.cfg
     device = next(model.parameters()).device
+    if device.type == 'cpu':
+        device = -1
 
     # build the data pipeline
-    channel_order = cfg.test_pipeline[0].get('channel_order', 'rgb')
-    test_pipeline = [LoadImage(channel_order=channel_order)
-                     ] + cfg.test_pipeline[1:]
-    test_pipeline = Compose(test_pipeline)
+    test_pipeline = Compose(cfg.test_pipeline)
 
     assert len(bboxes[0]) in [4, 5]
 
@@ -548,8 +547,6 @@ def inference_interhand_3d_model(model,
 
         # prepare data
         data = {
-            'img_or_path':
-            img_or_path,
             'center':
             center,
             'scale':
@@ -576,18 +573,16 @@ def inference_interhand_3d_model(model,
             }
         }
 
+        if isinstance(img_or_path, np.ndarray):
+            data['img'] = img_or_path
+        else:
+            data['image_file'] = img_or_path
+
         data = test_pipeline(data)
         batch_data.append(data)
 
-    batch_data = collate(batch_data, samples_per_gpu=1)
-
-    if next(model.parameters()).is_cuda:
-        # scatter not work so just move image to cuda device
-        batch_data['img'] = batch_data['img'].to(device)
-    # get all img_metas of each bounding box
-    batch_data['img_metas'] = [
-        img_metas[0] for img_metas in batch_data['img_metas'].data
-    ]
+    batch_data = collate(batch_data, samples_per_gpu=len(batch_data))
+    batch_data = scatter(batch_data, [device])[0]
 
     # forward the model
     with torch.no_grad():
@@ -700,12 +695,11 @@ def inference_mesh_model(model,
 
     cfg = model.cfg
     device = next(model.parameters()).device
+    if device.type == 'cpu':
+        device = -1
 
     # build the data pipeline
-    channel_order = cfg.test_pipeline[0].get('channel_order', 'rgb')
-    test_pipeline = [LoadImage(channel_order=channel_order)
-                     ] + cfg.test_pipeline[1:]
-    test_pipeline = Compose(test_pipeline)
+    test_pipeline = Compose(cfg.test_pipeline)
 
     assert len(bboxes[0]) in [4, 5]
 
@@ -721,7 +715,7 @@ def inference_mesh_model(model,
 
         # prepare data
         data = {
-            'img_or_path':
+            'image_file':
             img_or_path,
             'center':
             center,
@@ -757,15 +751,8 @@ def inference_mesh_model(model,
         data = test_pipeline(data)
         batch_data.append(data)
 
-    batch_data = collate(batch_data, samples_per_gpu=1)
-
-    if next(model.parameters()).is_cuda:
-        # scatter not work so just move image to cuda device
-        batch_data['img'] = batch_data['img'].to(device)
-    # get all img_metas of each bounding box
-    batch_data['img_metas'] = [
-        img_metas[0] for img_metas in batch_data['img_metas'].data
-    ]
+    batch_data = collate(batch_data, samples_per_gpu=len(batch_data))
+    batch_data = scatter(batch_data, target_gpus=[device])[0]
 
     # forward the model
     with torch.no_grad():

diff --git a/mmpose/datasets/pipelines/loading.py b/mmpose/datasets/pipelines/loading.py
@@ -36,38 +36,50 @@ def __init__(self,
         self.file_client_args = file_client_args.copy()
         self.file_client = None
 
+    def _read_image(self, path):
+        img_bytes = self.file_client.get(path)
+        img = mmcv.imfrombytes(
+            img_bytes, flag=self.color_type, channel_order=self.channel_order)
+        if img is None:
+            raise ValueError(f'Fail to read {path}')
+        if self.to_float32:
+            img = img.astype(np.float32)
+        return img
+
     def __call__(self, results):
         """Loading image(s) from file."""
         if self.file_client is None:
             self.file_client = mmcv.FileClient(**self.file_client_args)
 
-        image_file = results['image_file']
+        image_file = results.get('image_file', None)
 
         if isinstance(image_file, (list, tuple)):
-            imgs = []
-            for image in image_file:
-                img_bytes = self.file_client.get(image)
-                img = mmcv.imfrombytes(
-                    img_bytes,
-                    flag=self.color_type,
-                    channel_order=self.channel_order)
-                if self.to_float32:
-                    img = img.astype(np.float32)
-                if img is None:
-                    raise ValueError(f'Fail to read {image}')
-                imgs.append(img)
-            results['img'] = imgs
+            # Load images from a list of paths
+            results['img'] = [self._read_image(path) for path in image_file]
+        elif image_file is not None:
+            # Load single image from path
+            results['img'] = self._read_image(image_file)
         else:
-            img_bytes = self.file_client.get(image_file)
-            img = mmcv.imfrombytes(
-                img_bytes,
-                flag=self.color_type,
-                channel_order=self.channel_order)
-            if self.to_float32:
-                img = img.astype(np.float32)
-            if img is None:
-                raise ValueError(f'Fail to read {image_file}')
-            results['img'] = img
+            if 'img' not in results:
+                # If `image_file`` is not in results, check the `img` exists
+                # and format the image. This for compatibility when the image
+                # is manually set outside the pipeline.
+                raise KeyError('Either `image_file` or `img` should exist in '
+                               'results.')
+            assert isinstance(results['img'], np.ndarray)
+            if self.color_type == 'color' and self.channel_order == 'rgb':
+                # The original results['img'] is assumed to be image(s) in BGR
+                # order, so we convert the color according to the arguments.
+                if results['img'].ndim == 3:
+                    results['img'] = mmcv.bgr2rgb(results['img'])
+                elif results['img'].ndim == 4:
+                    results['img'] = np.concatenate(
+                        [mmcv.bgr2rgb(img) for img in results['img']], axis=0)
+                else:
+                    raise ValueError('results["img"] has invalid shape '
+                                     f'{results["img"].shape}')
+
+            results['image_file'] = None
 
         return results