add inferencer doc

open-mmlab · Jan 30, 2023 · 1f05b3b · 1f05b3b
1 parent f17a6a7
commit 1f05b3b
Show file tree

Hide file tree

Showing 4 changed files with 70 additions and 8 deletions.
diff --git a/demo/README.md b/demo/README.md
@@ -52,7 +52,7 @@ Optional arguments:
 Examples:
 
 Assume that you are located at `$MMACTION2` and have already downloaded the checkpoints to the directory `checkpoints/`,
-or use checkpoint url from to directly load corresponding checkpoint, which will be automatically saved in `$HOME/.cache/torch/checkpoints`.
+or use checkpoint url from `configs/` to directly load corresponding checkpoint, which will be automatically saved in `$HOME/.cache/torch/checkpoints`.
 
 1. Recognize a video file as input by using a TSN model on cuda by default.
 
@@ -183,7 +183,7 @@ Users can change:
 
 ## Skeleton-based Action Recognition Demo
 
-MMAction2 provides an demo script to predict the skeleton-based action recognition result using a single video.
+MMAction2 provides a demo script to predict the skeleton-based action recognition result using a single video.
 
 ```shell
 python demo/demo_skeleton.py ${VIDEO_FILE} ${OUT_FILENAME} \
@@ -247,3 +247,63 @@ python demo/demo_skeleton.py demo/demo_skeleton.mp4 demo/demo_skeleton_out.mp4 \
     --pose-checkpoint https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth \
     --label-map tools/data/skeleton/label_map_ntu60.txt
 ```
+
+## Inferencer
+
+MMAction2 provides a demo script to implement fast predict for video analysis tasks based on unified inferencer interface, currently only supports action recognition task.
+
+```shell
+python demo/demo.py ${INPUTS} \
+    [--vid-out-dir ${VID_OUT_DIR}] \
+    [--rec ${RECOG_TASK}] \
+    [--rec-weights ${RECOG_WEIGHTS}] \
+    [--label-file ${LABEL_FILE}] \
+    [--device ${DEVICE_TYPE}] \
+    [--batch-size ${BATCH_SIZE}] \
+    [--print-result ${PRINT_RESULT}] \
+    [--pred-out-file ${PRED_OUT_FILE} ]
+```
+
+Optional arguments:
+
+- `--show`: If specified, the demo will display the video in a popup window.
+- `--print-result`: If specified, the demo will print the inference results'
+- `VID_OUT_DIR`: Output directory of saved videos. Defaults to None, means not to save videos.
+- `RECOG_TASK`: Type of Action Recognition algorithm. It could be the path to the config file, the model name or alias defined in metafile.
+- `RECOG_WEIGHTS`: Path to the custom checkpoint file of the selected recog model. If it is not specified and "rec" is a model name of metafile, the weights will be loaded from metafile.
+- `LABEL_FILE`: Label file for dataset the algorithm pretrained on. Defaults to None, means don't show label in result.
+- `DEVICE_TYPE`: Type of device to run the demo. Allowed values are cuda device like `cuda:0` or `cpu`. Defaults to `cuda:0`.
+- `BATCH_SIZE`: The batch size used in inference. Defaults to 1.
+- `PRED_OUT_FILE`: File path to save the inference results. Defaults to None, means not to save prediction results.
+
+Examples:
+
+Assume that you are located at `$MMACTION2`.
+
+1. Recognize a video file as input by using a TSN model, loading checkpoint from metafile.
+
+   ```shell
+   # The demo.mp4 and label_map_k400.txt are both from Kinetics-400
+   python demo/demo_inferencer.py demo/demo.mp4
+       --rec configs/recognition/tsn/tsn_r50_8xb32-1x1x8-100e_kinetics400-rgb.py \
+       --label-file tools/data/kinetics/label_map_k400.txt
+   ```
+
+2. Recognize a video file as input by using a TSN model, using model alias in metafile.
+
+   ```shell
+   # The demo.mp4 and label_map_k400.txt are both from Kinetics-400
+   python demo/demo_inferencer.py demo/demo.mp4
+       --rec tsn \
+       --label-file tools/data/kinetics/label_map_k400.txt
+   ```
+
+3. Recognize a video file as input by using a TSN model, and then save visulization video.
+
+   ```shell
+   # The demo.mp4 and label_map_k400.txt are both from Kinetics-400
+   python demo/demo_inferencer.py demo/demo.mp4
+       --vid-out-dir demo_out \
+       --rec tsn \
+       --label-file tools/data/kinetics/label_map_k400.txt
+   ```
diff --git a/demo/demo_inferencer.py b/demo/demo_inferencer.py
@@ -39,7 +39,7 @@ def parse_args():
     parser.add_argument(
         '--show',
         action='store_true',
-        help='Display the image in a popup window.')
+        help='Display the video in a popup window.')
     parser.add_argument(
         '--print-result',
         action='store_true',

diff --git a/mmaction/apis/inferencers/actionrecog_inferencer.py b/mmaction/apis/inferencers/actionrecog_inferencer.py
@@ -43,6 +43,7 @@ class ActionRecogInferencer(BaseInferencer):
             means input data is a np.ndarray. Defaults to 'video'.
         pack_cfg (dict, optional): Config for `InferencerPackInput` to load
             input. Defaults to empty dict.
+        scope (str, optional): The scope of the model. Defaults to "mmaction".
     """
 
     preprocess_kwargs: set = set()
@@ -62,7 +63,7 @@ def __init__(self,
                  label_file: Optional[str] = None,
                  input_format: str = 'video',
                  pack_cfg: dict = {},
-                 scope: Optional[str] = 'mmaction2') -> None:
+                 scope: Optional[str] = 'mmaction') -> None:
         # A global counter tracking the number of videos processed, for
         # naming of the output videos
         self.num_visualized_vids = 0

diff --git a/tests/apis/test_inferencer.py b/tests/apis/test_inferencer.py
@@ -26,12 +26,13 @@ def test_init_recognizer(self, config, lable_file, devices):
                 # Skip the test if cuda is required but unavailable
                 continue
 
-            _ = ActionRecogInferencer(config, label=lable_file, device=device)
+            _ = ActionRecogInferencer(
+                config, label_file=lable_file, device=device)
 
             # test `init_recognizer` with invalid config
             with self.assertRaisesRegex(ValueError, 'Cannot find model'):
                 _ = ActionRecogInferencer(
-                    'slowfast_config', label=lable_file, device=device)
+                    'slowfast_config', label_file=lable_file, device=device)
 
     @parameterized.expand([
         (('tsn'), ('tools/data/kinetics/label_map_k400.txt'),
@@ -48,7 +49,7 @@ def test_inference_recognizer(self, config, label_file, video_path,
 
                 # test video file input and return datasample
                 inferencer = ActionRecogInferencer(
-                    config, label=label_file, device=device)
+                    config, label_file=label_file, device=device)
                 results = inferencer(
                     video_path, vid_out_dir=tmp_dir, return_datasamples=True)
                 self.assertIn('predictions', results)
@@ -67,7 +68,7 @@ def test_inference_recognizer(self, config, label_file, video_path,
                 # test np.ndarray input
                 inferencer = ActionRecogInferencer(
                     config,
-                    label=label_file,
+                    label_file=label_file,
                     device=device,
                     input_format='array')
                 import decord