Skip to content

Commit

Permalink
PyTorch Hub amp.autocast() inference (ultralytics#2641)
Browse files Browse the repository at this point in the history
I think this should help speed up CUDA inference, as currently models may be running in FP32 inference mode on CUDA devices unnecesarily.

(cherry picked from commit 2bf34f5)
  • Loading branch information
glenn-jocher authored and Lechtr committed Apr 1, 2021
1 parent b7d28d3 commit 7877ac8
Showing 1 changed file with 9 additions and 8 deletions.
17 changes: 9 additions & 8 deletions models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import torch
import torch.nn as nn
from PIL import Image
from torch.cuda import amp

from utils.datasets import letterbox
from utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh
Expand Down Expand Up @@ -219,17 +220,17 @@ def forward(self, imgs, size=640, augment=False, profile=False):
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
t.append(time_synchronized())

# Inference
with torch.no_grad():
with torch.no_grad(), amp.autocast(enabled=p.device.type != 'cpu'):
# Inference
y = self.model(x, augment, profile)[0] # forward
t.append(time_synchronized())
t.append(time_synchronized())

# Post-process
y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
for i in range(n):
scale_coords(shape1, y[i][:, :4], shape0[i])
t.append(time_synchronized())
# Post-process
y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
for i in range(n):
scale_coords(shape1, y[i][:, :4], shape0[i])

t.append(time_synchronized())
return Detections(imgs, y, files, t, self.names, x.shape)


Expand Down

0 comments on commit 7877ac8

Please sign in to comment.