Skip to content

Commit

Permalink
Add benchmark.py (#35)
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaohangcd authored Jul 30, 2020
1 parent 0109dfc commit 64cf6a3
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 0 deletions.
6 changes: 6 additions & 0 deletions docs/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,12 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 ./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} con
CUDA_VISIBLE_DEVICES=4,5,6,7 ./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config2.py ${WORK_DIR} 4
```

## Benchmark
You can get average inference speed using the following script. Not that it does not includes the IO time and pre-processing time.
```shell
python tools/benchmark_inference.py ${MMPOSE_CONFIG_FILE}
```

## Tutorials

Currently, we provide some tutorials for users to [finetune model](tutorials/finetune.md),
Expand Down
72 changes: 72 additions & 0 deletions tools/benchmark_inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import argparse
import time

import torch
from mmcv import Config
from mmcv.parallel import MMDataParallel

from mmpose.core import wrap_fp16_model
from mmpose.datasets import build_dataloader, build_dataset
from mmpose.models import build_posenet


def parse_args():
parser = argparse.ArgumentParser(
description='MMPose benchmark a recognizer')
parser.add_argument('config', help='test config file path')
parser.add_argument(
'--log-interval', default=10, help='interval of logging')
args = parser.parse_args()
return args


def main():
args = parse_args()

cfg = Config.fromfile(args.config)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True

# build the dataloader
dataset = build_dataset(cfg.data.val)
data_loader = build_dataloader(
dataset,
samples_per_gpu=1,
workers_per_gpu=cfg.data.workers_per_gpu,
dist=False,
shuffle=False)

# build the model and load checkpoint
model = build_posenet(cfg.model)
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
wrap_fp16_model(model)
model = MMDataParallel(model, device_ids=[0])

# the first several iterations may be very slow so skip them
num_warmup = 5
pure_inf_time = 0

# benchmark with total batch and take the average
for i, data in enumerate(data_loader):

torch.cuda.synchronize()
start_time = time.perf_counter()
with torch.no_grad():
model(return_loss=False, **data)

torch.cuda.synchronize()
elapsed = time.perf_counter() - start_time

if i >= num_warmup:
pure_inf_time += elapsed
if (i + 1) % args.log_interval == 0:
its = (i + 1 - num_warmup) / pure_inf_time
print(f'Done item [{i + 1:<3}], {its:.2f} items / s')
print(f'Overall average: {its:.2f} items / s')
print(f'Total time: {pure_inf_time:.2f} s')


if __name__ == '__main__':
main()

0 comments on commit 64cf6a3

Please sign in to comment.