diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 7a3e1b7ddfbc..f25b017ace8b 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,3 +1,4 @@
+
diff --git a/.github/SECURITY.md b/.github/SECURITY.md
new file mode 100644
index 000000000000..aa3e8409da6b
--- /dev/null
+++ b/.github/SECURITY.md
@@ -0,0 +1,7 @@
+# Security Policy
+
+We aim to make YOLOv5 🚀 as secure as possible! If you find potential vulnerabilities or have any concerns please let us know so we can investigate and take corrective action if needed.
+
+### Reporting a Vulnerability
+
+To report vulnerabilities please email us at hello@ultralytics.com or visit https://ultralytics.com/contact. Thank you!
diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index 5cf1613ab0cd..f2096ce17a17 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -25,9 +25,9 @@ jobs:
# Timeout: https://stackoverflow.com/a/59076067/4521646
timeout-minutes: 60
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v2
+ uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 67f51f0e8bce..8bc88e957a36 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -22,7 +22,7 @@ jobs:
steps:
- name: Checkout repository
- uses: actions/checkout@v2
+ uses: actions/checkout@v3
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
diff --git a/.github/workflows/rebase.yml b/.github/workflows/rebase.yml
index a4db1efb2971..75c57546166b 100644
--- a/.github/workflows/rebase.yml
+++ b/.github/workflows/rebase.yml
@@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout the latest code
- uses: actions/checkout@v2
+ uses: actions/checkout@v3
with:
token: ${{ secrets.ACTIONS_TOKEN }}
fetch-depth: 0 # otherwise, you will fail to push refs to dest repo
diff --git a/Dockerfile b/Dockerfile
index 489dd04ce5c9..304e8b2801a9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -11,7 +11,7 @@ COPY requirements.txt .
RUN python -m pip install --upgrade pip
RUN pip uninstall -y torch torchvision torchtext
RUN pip install --no-cache -r requirements.txt albumentations wandb gsutil notebook \
- torch==1.10.2+cu113 torchvision==0.11.3+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
+ torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
# RUN pip install --no-cache -U torch torchvision
# Create working directory
@@ -19,7 +19,8 @@ RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app
# Copy contents
-COPY . /usr/src/app
+RUN git clone https://github.com/ultralytics/yolov5 /usr/src/app
+# COPY . /usr/src/app
# Downloads to user config dir
ADD https://ultralytics.com/assets/Arial.ttf /root/.config/Ultralytics/
diff --git a/README.md b/README.md
index 9f61157f4d5f..3ebc085b6c33 100644
--- a/README.md
+++ b/README.md
@@ -245,7 +245,7 @@ We are super excited about our first-ever Ultralytics YOLOv5 🚀 EXPORT Competi
|[YOLOv5x][assets] |640 |50.7 |68.9 |766 |12.1 |4.8 |86.7 |205.7
| | | | | | | | |
|[YOLOv5n6][assets] |1280 |36.0 |54.4 |153 |8.1 |2.1 |3.2 |4.6
-|[YOLOv5s6][assets] |1280 |44.8 |63.7 |385 |8.2 |3.6 |16.8 |12.6
+|[YOLOv5s6][assets] |1280 |44.8 |63.7 |385 |8.2 |3.6 |12.6 |16.8
|[YOLOv5m6][assets] |1280 |51.3 |69.3 |887 |11.1 |6.8 |35.7 |50.0
|[YOLOv5l6][assets] |1280 |53.7 |71.3 |1784 |15.8 |10.5 |76.8 |111.4
|[YOLOv5x6][assets]
+ [TTA][TTA]|1280
1536 |55.0
**55.8** |72.7
**72.7** |3136
- |26.2
- |19.4
- |140.7
- |209.8
-
diff --git a/data/hyps/hyp.VOC.yaml b/data/hyps/hyp.VOC.yaml
index aa952c501969..0aa4e7d9f8f5 100644
--- a/data/hyps/hyp.VOC.yaml
+++ b/data/hyps/hyp.VOC.yaml
@@ -4,37 +4,37 @@
# See Hyperparameter Evolution tutorial for details https://github.com/ultralytics/yolov5#tutorials
# YOLOv5 Hyperparameter Evolution Results
-# Best generation: 319
-# Last generation: 434
+# Best generation: 467
+# Last generation: 996
# metrics/precision, metrics/recall, metrics/mAP_0.5, metrics/mAP_0.5:0.95, val/box_loss, val/obj_loss, val/cls_loss
-# 0.86236, 0.86184, 0.91274, 0.72647, 0.0077056, 0.0042449, 0.0013846
+# 0.87729, 0.85125, 0.91286, 0.72664, 0.0076739, 0.0042529, 0.0013865
-lr0: 0.0033
-lrf: 0.15184
-momentum: 0.74747
+lr0: 0.00334
+lrf: 0.15135
+momentum: 0.74832
weight_decay: 0.00025
-warmup_epochs: 3.4278
-warmup_momentum: 0.59032
-warmup_bias_lr: 0.18742
+warmup_epochs: 3.3835
+warmup_momentum: 0.59462
+warmup_bias_lr: 0.18657
box: 0.02
-cls: 0.21563
+cls: 0.21638
cls_pw: 0.5
-obj: 0.50843
-obj_pw: 0.6729
+obj: 0.51728
+obj_pw: 0.67198
iou_t: 0.2
-anchor_t: 3.4172
+anchor_t: 3.3744
fl_gamma: 0.0
-hsv_h: 0.01032
-hsv_s: 0.5562
-hsv_v: 0.28255
+hsv_h: 0.01041
+hsv_s: 0.54703
+hsv_v: 0.27739
degrees: 0.0
-translate: 0.04575
-scale: 0.73711
+translate: 0.04591
+scale: 0.75544
shear: 0.0
perspective: 0.0
flipud: 0.0
fliplr: 0.5
-mosaic: 0.87158
-mixup: 0.04294
+mosaic: 0.85834
+mixup: 0.04266
copy_paste: 0.0
-anchors: 3.3556
+anchors: 3.412
diff --git a/detect.py b/detect.py
index 76f67bea1b90..ccb9fbf5103f 100644
--- a/detect.py
+++ b/detect.py
@@ -89,15 +89,10 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s)
# Load model
device = select_device(device)
- model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data)
- stride, names, pt, jit, onnx, engine = model.stride, model.names, model.pt, model.jit, model.onnx, model.engine
+ model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
+ stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride) # check image size
- # Half
- half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA
- if pt or jit:
- model.model.half() if half else model.model.float()
-
# Dataloader
if webcam:
view_img = check_imshow()
@@ -110,12 +105,12 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s)
vid_path, vid_writer = [None] * bs, [None] * bs
# Run inference
- model.warmup(imgsz=(1 if pt else bs, 3, *imgsz), half=half) # warmup
+ model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup
dt, seen = [0.0, 0.0, 0.0], 0
for path, im, im0s, vid_cap, s in dataset:
t1 = time_sync()
im = torch.from_numpy(im).to(device)
- im = im.half() if half else im.float() # uint8 to fp16/32
+ im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
diff --git a/export.py b/export.py
index 15e92a784a50..2d4a68e62f89 100644
--- a/export.py
+++ b/export.py
@@ -75,18 +75,18 @@
def export_formats():
# YOLOv5 export formats
- x = [['PyTorch', '-', '.pt'],
- ['TorchScript', 'torchscript', '.torchscript'],
- ['ONNX', 'onnx', '.onnx'],
- ['OpenVINO', 'openvino', '_openvino_model'],
- ['TensorRT', 'engine', '.engine'],
- ['CoreML', 'coreml', '.mlmodel'],
- ['TensorFlow SavedModel', 'saved_model', '_saved_model'],
- ['TensorFlow GraphDef', 'pb', '.pb'],
- ['TensorFlow Lite', 'tflite', '.tflite'],
- ['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite'],
- ['TensorFlow.js', 'tfjs', '_web_model']]
- return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix'])
+ x = [['PyTorch', '-', '.pt', True],
+ ['TorchScript', 'torchscript', '.torchscript', True],
+ ['ONNX', 'onnx', '.onnx', True],
+ ['OpenVINO', 'openvino', '_openvino_model', False],
+ ['TensorRT', 'engine', '.engine', True],
+ ['CoreML', 'coreml', '.mlmodel', False],
+ ['TensorFlow SavedModel', 'saved_model', '_saved_model', True],
+ ['TensorFlow GraphDef', 'pb', '.pb', True],
+ ['TensorFlow Lite', 'tflite', '.tflite', False],
+ ['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', False],
+ ['TensorFlow.js', 'tfjs', '_web_model', False]]
+ return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'GPU'])
def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')):
@@ -218,6 +218,7 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F
builder = trt.Builder(logger)
config = builder.create_builder_config()
config.max_workspace_size = workspace * 1 << 30
+ # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30) # fix TRT 8.4 deprecation notice
flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
network = builder.create_network(flag)
@@ -233,9 +234,8 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F
for out in outputs:
LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
- half &= builder.platform_has_fast_fp16
- LOGGER.info(f'{prefix} building FP{16 if half else 32} engine in {f}')
- if half:
+ LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 else 32} engine in {f}')
+ if builder.platform_has_fast_fp16:
config.set_flag(trt.BuilderFlag.FP16)
with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
t.write(engine.serialize())
@@ -260,9 +260,9 @@ def export_saved_model(model, im, file, dynamic,
batch_size, ch, *imgsz = list(im.shape) # BCHW
tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
- im = tf.zeros((batch_size, *imgsz, 3)) # BHWC order for TensorFlow
+ im = tf.zeros((batch_size, *imgsz, ch)) # BHWC order for TensorFlow
_ = tf_model.predict(im, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres)
- inputs = tf.keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
+ inputs = tf.keras.Input(shape=(*imgsz, ch), batch_size=None if dynamic else batch_size)
outputs = tf_model.predict(inputs, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres)
keras_model = tf.keras.Model(inputs=inputs, outputs=outputs)
keras_model.trainable = False
@@ -275,7 +275,7 @@ def export_saved_model(model, im, file, dynamic,
m = m.get_concrete_function(spec)
frozen_func = convert_variables_to_constants_v2(m)
tfm = tf.Module()
- tfm.__call__ = tf.function(lambda x: frozen_func(x), [spec])
+ tfm.__call__ = tf.function(lambda x: frozen_func(x)[0], [spec])
tfm.__call__(im)
tf.saved_model.save(
tfm,
@@ -331,7 +331,7 @@ def export_tflite(keras_model, im, file, int8, data, ncalib, prefix=colorstr('Te
converter.target_spec.supported_types = []
converter.inference_input_type = tf.uint8 # or tf.int8
converter.inference_output_type = tf.uint8 # or tf.int8
- converter.experimental_new_quantizer = False
+ converter.experimental_new_quantizer = True
f = str(file).replace('.pt', '-int8.tflite')
tflite_model = converter.convert()
@@ -494,7 +494,7 @@ def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'
if int8 or edgetpu: # TFLite --int8 bug https://github.com/ultralytics/yolov5/issues/5707
check_requirements(('flatbuffers==1.12',)) # required before `import tensorflow`
assert not (tflite and tfjs), 'TFLite and TF.js models must be exported separately, please pass only one type.'
- model, f[5] = export_saved_model(model, im, file, dynamic, tf_nms=nms or agnostic_nms or tfjs,
+ model, f[5] = export_saved_model(model.cpu(), im, file, dynamic, tf_nms=nms or agnostic_nms or tfjs,
agnostic_nms=agnostic_nms or tfjs, topk_per_class=topk_per_class,
topk_all=topk_all, conf_thres=conf_thres, iou_thres=iou_thres) # keras model
if pb or tfjs: # pb prerequisite to tfjs
diff --git a/models/common.py b/models/common.py
index 0dae0244e932..066f8774d3c3 100644
--- a/models/common.py
+++ b/models/common.py
@@ -31,7 +31,7 @@
def autopad(k, p=None): # kernel, padding
# Pad to 'same'
if p is None:
- p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
+ p = k // 2 if isinstance(k, int) else (x // 2 for x in k) # auto-pad
return p
@@ -133,7 +133,7 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, nu
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
- # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
+ # self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
def forward(self, x):
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
@@ -194,7 +194,7 @@ def forward(self, x):
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
y1 = self.m(x)
y2 = self.m(y1)
- return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
+ return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
class Focus(nn.Module):
@@ -205,7 +205,7 @@ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, k
# self.contract = Contract(gain=2)
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
- return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
+ return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
# return self.conv(self.contract(x))
@@ -219,7 +219,7 @@ def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, s
def forward(self, x):
y = self.cv1(x)
- return torch.cat([y, self.cv2(y)], 1)
+ return torch.cat((y, self.cv2(y)), 1)
class GhostBottleneck(nn.Module):
@@ -277,7 +277,7 @@ def forward(self, x):
class DetectMultiBackend(nn.Module):
# YOLOv5 MultiBackend class for python inference on various backends
- def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None):
+ def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False):
# Usage:
# PyTorch: weights = *.pt
# TorchScript: *.torchscript
@@ -297,6 +297,7 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None):
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w) # get backend
stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
w = attempt_download(w) # download if not local
+ fp16 &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16
if data: # data.yaml path (optional)
with open(data, errors='ignore') as f:
names = yaml.safe_load(f)['names'] # class names
@@ -305,11 +306,13 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None):
model = attempt_load(weights if isinstance(weights, list) else w, map_location=device)
stride = max(int(model.stride.max()), 32) # model stride
names = model.module.names if hasattr(model, 'module') else model.names # get class names
+ model.half() if fp16 else model.float()
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
elif jit: # TorchScript
LOGGER.info(f'Loading {w} for TorchScript inference...')
extra_files = {'config.txt': ''} # model metadata
model = torch.jit.load(w, _extra_files=extra_files)
+ model.half() if fp16 else model.float()
if extra_files['config.txt']:
d = json.loads(extra_files['config.txt']) # extra_files dict
stride, names = int(d['stride']), d['names']
@@ -342,12 +345,15 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None):
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
model = runtime.deserialize_cuda_engine(f.read())
bindings = OrderedDict()
+ fp16 = False # default updated below
for index in range(model.num_bindings):
name = model.get_binding_name(index)
dtype = trt.nptype(model.get_binding_dtype(index))
shape = tuple(model.get_binding_shape(index))
data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
+ if model.binding_is_input(index) and dtype == np.float16:
+ fp16 = True
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
context = model.create_execution_context()
batch_size = bindings['images'].shape[0]
@@ -435,7 +441,7 @@ def forward(self, im, augment=False, visualize=False, val=False):
else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
if self.saved_model: # SavedModel
- y = (self.model(im, training=False) if self.keras else self.model(im)[0]).numpy()
+ y = (self.model(im, training=False) if self.keras else self.model(im)).numpy()
elif self.pb: # GraphDef
y = self.frozen_func(x=self.tf.constant(im)).numpy()
else: # Lite or Edge TPU
@@ -452,15 +458,17 @@ def forward(self, im, augment=False, visualize=False, val=False):
y = (y.astype(np.float32) - zero_point) * scale # re-scale
y[..., :4] *= [w, h, w, h] # xywh normalized to pixels
- y = torch.tensor(y) if isinstance(y, np.ndarray) else y
+ if isinstance(y, np.ndarray):
+ y = torch.tensor(y, device=self.device)
return (y, []) if val else y
- def warmup(self, imgsz=(1, 3, 640, 640), half=False):
+ def warmup(self, imgsz=(1, 3, 640, 640)):
# Warmup model by running inference once
- if self.pt or self.jit or self.onnx or self.engine: # warmup types
- if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models
- im = torch.zeros(*imgsz).to(self.device).type(torch.half if half else torch.float) # input image
- self.forward(im) # warmup
+ if any((self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb)): # warmup types
+ if self.device.type != 'cpu': # only warmup GPU models
+ im = torch.zeros(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
+ for _ in range(2 if self.jit else 1): #
+ self.forward(im) # warmup
@staticmethod
def model_type(p='path/to/model.pt'):
@@ -541,10 +549,9 @@ def forward(self, imgs, size=640, augment=False, profile=False):
g = (size / max(s)) # gain
shape1.append([y * g for y in s])
imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
- shape1 = [make_divisible(x, self.stride) for x in np.stack(shape1, 0).max(0)] # inference shape
- x = [letterbox(im, new_shape=shape1 if self.pt else size, auto=False)[0] for im in imgs] # pad
- x = np.stack(x, 0) if n > 1 else x[0][None] # stack
- x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
+ shape1 = [make_divisible(x, self.stride) if self.pt else size for x in np.array(shape1).max(0)] # inf shape
+ x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
+ x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW
x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
t.append(time_sync())
diff --git a/models/experimental.py b/models/experimental.py
index 463e5514a06e..1230f4656c8f 100644
--- a/models/experimental.py
+++ b/models/experimental.py
@@ -94,21 +94,22 @@ def attempt_load(weights, map_location=None, inplace=True, fuse=True):
model = Ensemble()
for w in weights if isinstance(weights, list) else [weights]:
ckpt = torch.load(attempt_download(w), map_location=map_location) # load
- if fuse:
- model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model
- else:
- model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().eval()) # without layer fuse
+ ckpt = (ckpt.get('ema') or ckpt['model']).float() # FP32 model
+ model.append(ckpt.fuse().eval() if fuse else ckpt.eval()) # fused or un-fused model in eval mode
# Compatibility updates
for m in model.modules():
- if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]:
- m.inplace = inplace # pytorch 1.7.0 compatibility
- if type(m) is Detect:
+ t = type(m)
+ if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
+ m.inplace = inplace # torch 1.7.0 compatibility
+ if t is Detect:
if not isinstance(m.anchor_grid, list): # new Detect Layer compatibility
delattr(m, 'anchor_grid')
setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)
- elif type(m) is Conv:
- m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
+ elif t is Conv:
+ m._non_persistent_buffers_set = set() # torch 1.6.0 compatibility
+ elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
+ m.recompute_scale_factor = None # torch 1.11.0 compatibility
if len(model) == 1:
return model[-1] # return model
diff --git a/models/tf.py b/models/tf.py
index 74681e403afd..728907f8fb47 100644
--- a/models/tf.py
+++ b/models/tf.py
@@ -222,19 +222,21 @@ def call(self, inputs):
x.append(self.m[i](inputs[i]))
# x(bs,20,20,255) to x(bs,3,20,20,85)
ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
- x[i] = tf.transpose(tf.reshape(x[i], [-1, ny * nx, self.na, self.no]), [0, 2, 1, 3])
+ x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
if not self.training: # inference
y = tf.sigmoid(x[i])
- xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy
- wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]
+ grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
+ anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
+ xy = (y[..., 0:2] * 2 + grid) * self.stride[i] # xy
+ wh = y[..., 2:4] ** 2 * anchor_grid
# Normalize xywh to 0-1 to reduce calibration error
xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
y = tf.concat([xy, wh, y[..., 4:]], -1)
z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
- return x if self.training else (tf.concat(z, 1), x)
+ return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1), x)
@staticmethod
def _make_grid(nx=20, ny=20):
diff --git a/models/yolo.py b/models/yolo.py
index f659a04545b9..09215101e8a0 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -62,9 +62,10 @@ def forward(self, x):
y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
- xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy
- wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
- y = torch.cat((xy, wh, y[..., 4:]), -1)
+ xy, wh, conf = y.tensor_split((2, 4), 4)
+ xy = (xy * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy
+ wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
+ y = torch.cat((xy, wh, conf), 4)
z.append(y.view(bs, -1, self.no))
return x if self.training else (torch.cat(z, 1), x)
@@ -110,8 +111,8 @@ def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, i
s = 256 # 2x min stride
m.inplace = self.inplace
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
+ check_anchor_order(m) # must be in pixel-space (not grid-space)
m.anchors /= m.stride.view(-1, 1, 1)
- check_anchor_order(m)
self.stride = m.stride
self._initialize_biases() # only run once
diff --git a/train.py b/train.py
index d8df31b72282..60be962d447f 100644
--- a/train.py
+++ b/train.py
@@ -268,7 +268,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
# Start training
t0 = time.time()
- nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations)
+ nw = max(round(hyp['warmup_epochs'] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations)
# nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training
last_opt_step = -1
maps = np.zeros(nc) # mAP per class
diff --git a/tutorial.ipynb b/tutorial.ipynb
index 09b2b33bda6f..1479a164cd8e 100644
--- a/tutorial.ipynb
+++ b/tutorial.ipynb
@@ -420,7 +420,7 @@
"name": "stdout",
"text": [
"YOLOv5 🚀 v6.0-48-g84a8099 torch 1.10.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)\n",
- "Setup complete ✅\n"
+ "Setup complete ✅ (2 CPUs, 12.7 GB RAM, 42.2/166.8 GB disk)\n"
]
}
]
@@ -731,7 +731,7 @@
"output_type": "stream",
"name": "stdout",
"text": [
- "\u001b[34m\u001b[1mtrain: \u001b[0mweights=yolov5s.pt, cfg=, data=coco128.yaml, hyp=data/hyps/hyp.scratch.yaml, epochs=3, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, adam=False, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, patience=100, freeze=0, save_period=-1, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest\n",
+ "\u001b[34m\u001b[1mtrain: \u001b[0mweights=yolov5s.pt, cfg=, data=coco128.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=3, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, adam=False, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, patience=100, freeze=0, save_period=-1, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest\n",
"\u001b[34m\u001b[1mgithub: \u001b[0mup to date with https://github.com/ultralytics/yolov5 ✅\n",
"YOLOv5 🚀 v6.0-48-g84a8099 torch 1.10.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)\n",
"\n",
@@ -1078,7 +1078,7 @@
"source": [
"# VOC\n",
"for b, m in zip([64, 64, 32, 16], ['yolov5s', 'yolov5m', 'yolov5l', 'yolov5x']): # zip(batch_size, model)\n",
- " !python train.py --batch {b} --weights {m}.pt --data VOC.yaml --epochs 50 --cache --img 512 --nosave --hyp hyp.finetune.yaml --project VOC --name {m}"
+ " !python train.py --batch {b} --weights {m}.pt --data VOC.yaml --epochs 50 --cache --img 512 --nosave --hyp hyp.VOC.yaml --project VOC --name {m}"
],
"execution_count": null,
"outputs": []
diff --git a/utils/__init__.py b/utils/__init__.py
index 4658ed6473cd..a63c473a4340 100644
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -21,14 +21,13 @@ def notebook_init(verbose=True):
if is_colab():
shutil.rmtree('/content/sample_data', ignore_errors=True) # remove colab /sample_data directory
+ # System info
if verbose:
- # System info
- # gb = 1 / 1000 ** 3 # bytes to GB
- gib = 1 / 1024 ** 3 # bytes to GiB
+ gb = 1 << 30 # bytes to GiB (1024 ** 3)
ram = psutil.virtual_memory().total
total, used, free = shutil.disk_usage("/")
display.clear_output()
- s = f'({os.cpu_count()} CPUs, {ram * gib:.1f} GB RAM, {(total - free) * gib:.1f}/{total * gib:.1f} GB disk)'
+ s = f'({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)'
else:
s = ''
diff --git a/utils/autoanchor.py b/utils/autoanchor.py
index 27d6fb68bb38..77518abe9889 100644
--- a/utils/autoanchor.py
+++ b/utils/autoanchor.py
@@ -17,10 +17,10 @@
def check_anchor_order(m):
# Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
- a = m.anchors.prod(-1).view(-1) # anchor area
+ a = m.anchors.prod(-1).mean(-1).view(-1) # mean anchor area per output layer
da = a[-1] - a[0] # delta a
ds = m.stride[-1] - m.stride[0] # delta s
- if da.sign() != ds.sign(): # same order
+ if da and (da.sign() != ds.sign()): # same order
LOGGER.info(f'{PREFIX}Reversing anchor order')
m.anchors[:] = m.anchors.flip(0)
@@ -40,7 +40,8 @@ def metric(k): # compute metric
bpr = (best > 1 / thr).float().mean() # best possible recall
return bpr, aat
- anchors = m.anchors.clone() * m.stride.to(m.anchors.device).view(-1, 1, 1) # current anchors
+ stride = m.stride.to(m.anchors.device).view(-1, 1, 1) # model strides
+ anchors = m.anchors.clone() * stride # current anchors
bpr, aat = metric(anchors.cpu().view(-1, 2))
s = f'\n{PREFIX}{aat:.2f} anchors/target, {bpr:.3f} Best Possible Recall (BPR). '
if bpr > 0.98: # threshold to recompute
@@ -55,11 +56,13 @@ def metric(k): # compute metric
new_bpr = metric(anchors)[0]
if new_bpr > bpr: # replace anchors
anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
- m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss
- check_anchor_order(m)
- LOGGER.info(f'{PREFIX}New anchors saved to model. Update model *.yaml to use these anchors in the future.')
+ m.anchors[:] = anchors.clone().view_as(m.anchors)
+ check_anchor_order(m) # must be in pixel-space (not grid-space)
+ m.anchors /= stride
+ s = f'{PREFIX}Done ✅ (optional: update model *.yaml to use these anchors in the future)'
else:
- LOGGER.info(f'{PREFIX}Original anchors better than new anchors. Proceeding with original anchors.')
+ s = f'{PREFIX}Done ⚠️ (original anchors better than new anchors, proceeding with original anchors)'
+ LOGGER.info(emojis(s))
def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
@@ -120,19 +123,21 @@ def print_results(k, verbose=True):
# Filter
i = (wh0 < 3.0).any(1).sum()
if i:
- LOGGER.info(f'{PREFIX}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
+ LOGGER.info(f'{PREFIX}WARNING: Extremely small objects found: {i} of {len(wh0)} labels are < 3 pixels in size')
wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
# wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1
- # Kmeans calculation
- LOGGER.info(f'{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...')
- s = wh.std(0) # sigmas for whitening
- k = kmeans(wh / s, n, iter=30)[0] * s # points
- if len(k) != n: # kmeans may return fewer points than requested if wh is insufficient or too similar
- LOGGER.warning(f'{PREFIX}WARNING: scipy.cluster.vq.kmeans returned only {len(k)} of {n} requested points')
+ # Kmeans init
+ try:
+ LOGGER.info(f'{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...')
+ assert n <= len(wh) # apply overdetermined constraint
+ s = wh.std(0) # sigmas for whitening
+ k = kmeans(wh / s, n, iter=30)[0] * s # points
+ assert n == len(k) # kmeans may return fewer points than requested if wh is insufficient or too similar
+ except Exception:
+ LOGGER.warning(f'{PREFIX}WARNING: switching strategies from kmeans to random init')
k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init
- wh = torch.tensor(wh, dtype=torch.float32) # filtered
- wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered
+ wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0))
k = print_results(k, verbose=False)
# Plot
@@ -149,7 +154,7 @@ def print_results(k, verbose=True):
# Evolve
f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
- pbar = tqdm(range(gen), desc=f'{PREFIX}Evolving anchors with Genetic Algorithm:') # progress bar
+ pbar = tqdm(range(gen), bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar
for _ in pbar:
v = np.ones(sh)
while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
diff --git a/utils/autobatch.py b/utils/autobatch.py
index cb94f041e95d..e53b4787b87d 100644
--- a/utils/autobatch.py
+++ b/utils/autobatch.py
@@ -34,11 +34,12 @@ def autobatch(model, imgsz=640, fraction=0.9, batch_size=16):
LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}')
return batch_size
+ gb = 1 << 30 # bytes to GiB (1024 ** 3)
d = str(device).upper() # 'CUDA:0'
properties = torch.cuda.get_device_properties(device) # device properties
- t = properties.total_memory / 1024 ** 3 # (GiB)
- r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GiB)
- a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GiB)
+ t = properties.total_memory / gb # (GiB)
+ r = torch.cuda.memory_reserved(device) / gb # (GiB)
+ a = torch.cuda.memory_allocated(device) / gb # (GiB)
f = t - (r + a) # free inside reserved
LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free')
diff --git a/utils/benchmarks.py b/utils/benchmarks.py
index 962df812a9d3..bdbbdc43b639 100644
--- a/utils/benchmarks.py
+++ b/utils/benchmarks.py
@@ -19,6 +19,7 @@
Requirements:
$ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime openvino-dev tensorflow-cpu # CPU
$ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime-gpu openvino-dev tensorflow # GPU
+ $ pip install -U nvidia-tensorrt --index-url https://pypi.ngc.nvidia.com # TensorRT
Usage:
$ python utils/benchmarks.py --weights yolov5s.pt --img 640
@@ -41,20 +42,29 @@
import val
from utils import notebook_init
from utils.general import LOGGER, print_args
+from utils.torch_utils import select_device
def run(weights=ROOT / 'yolov5s.pt', # weights path
imgsz=640, # inference size (pixels)
batch_size=1, # batch size
data=ROOT / 'data/coco128.yaml', # dataset.yaml path
+ device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
+ half=False, # use FP16 half-precision inference
):
y, t = [], time.time()
formats = export.export_formats()
- for i, (name, f, suffix) in formats.iterrows(): # index, (name, file, suffix)
+ device = select_device(device)
+ for i, (name, f, suffix, gpu) in formats.iterrows(): # index, (name, file, suffix, gpu-capable)
try:
- w = weights if f == '-' else export.run(weights=weights, imgsz=[imgsz], include=[f], device='cpu')[-1]
+ if device.type != 'cpu':
+ assert gpu, f'{name} inference not supported on GPU'
+ if f == '-':
+ w = weights # PyTorch format
+ else:
+ w = export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1] # all others
assert suffix in str(w), 'export failed'
- result = val.run(data, w, batch_size, imgsz=imgsz, plots=False, device='cpu', task='benchmark')
+ result = val.run(data, w, batch_size, imgsz, plots=False, device=device, task='benchmark', half=half)
metrics = result[0] # metrics (mp, mr, map50, map, *losses(box, obj, cls))
speeds = result[2] # times (preprocess, inference, postprocess)
y.append([name, metrics[3], speeds[1]]) # mAP, t_inference
@@ -78,6 +88,8 @@ def parse_opt():
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+ parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+ parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
opt = parser.parse_args()
print_args(FILE.stem, opt)
return opt
diff --git a/utils/datasets.py b/utils/datasets.py
index e132e04f6d9d..8627344af7b4 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -15,6 +15,7 @@
from multiprocessing.pool import Pool, ThreadPool
from pathlib import Path
from threading import Thread
+from urllib.parse import urlparse
from zipfile import ZipFile
import cv2
@@ -33,8 +34,9 @@
# Parameters
HELP_URL = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
-IMG_FORMATS = ['bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp'] # include image suffixes
-VID_FORMATS = ['asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'wmv'] # include video suffixes
+IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp' # include image suffixes
+VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv' # include video suffixes
+BAR_FORMAT = '{l_bar}{bar:10}{r_bar}{bar:-10b}' # tqdm bar format
# Get orientation exif tag
for orientation in ExifTags.TAGS.keys():
@@ -300,7 +302,7 @@ def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True):
for i, s in enumerate(sources): # index, source
# Start thread to read frames from video stream
st = f'{i + 1}/{n}: {s}... '
- if 'youtube.com/' in s or 'youtu.be/' in s: # if source is YouTube video
+ if urlparse(s).hostname in ('youtube.com', 'youtu.be'): # if source is YouTube video
check_requirements(('pafy', 'youtube_dl==2020.12.2'))
import pafy
s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL
@@ -407,19 +409,19 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
# f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
else:
raise Exception(f'{prefix}{p} does not exist')
- self.img_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS)
+ self.im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS)
# self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib
- assert self.img_files, f'{prefix}No images found'
+ assert self.im_files, f'{prefix}No images found'
except Exception as e:
raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {HELP_URL}')
# Check cache
- self.label_files = img2label_paths(self.img_files) # labels
+ self.label_files = img2label_paths(self.im_files) # labels
cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')
try:
cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict
assert cache['version'] == self.cache_version # same version
- assert cache['hash'] == get_hash(self.label_files + self.img_files) # same hash
+ assert cache['hash'] == get_hash(self.label_files + self.im_files) # same hash
except Exception:
cache, exists = self.cache_labels(cache_path, prefix), False # cache
@@ -427,7 +429,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total
if exists:
d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupt"
- tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results
+ tqdm(None, desc=prefix + d, total=n, initial=n, bar_format=BAR_FORMAT) # display cache results
if cache['msgs']:
LOGGER.info('\n'.join(cache['msgs'])) # display warnings
assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}'
@@ -437,7 +439,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
labels, shapes, self.segments = zip(*cache.values())
self.labels = list(labels)
self.shapes = np.array(shapes, dtype=np.float64)
- self.img_files = list(cache.keys()) # update
+ self.im_files = list(cache.keys()) # update
self.label_files = img2label_paths(cache.keys()) # update
n = len(shapes) # number of images
bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
@@ -466,7 +468,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
s = self.shapes # wh
ar = s[:, 1] / s[:, 0] # aspect ratio
irect = ar.argsort()
- self.img_files = [self.img_files[i] for i in irect]
+ self.im_files = [self.im_files[i] for i in irect]
self.label_files = [self.label_files[i] for i in irect]
self.labels = [self.labels[i] for i in irect]
self.shapes = s[irect] # wh
@@ -485,24 +487,20 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
# Cache images into RAM/disk for faster training (WARNING: large datasets may exceed system resources)
- self.imgs, self.img_npy = [None] * n, [None] * n
+ self.ims = [None] * n
+ self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files]
if cache_images:
- if cache_images == 'disk':
- self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + '_npy')
- self.img_npy = [self.im_cache_dir / Path(f).with_suffix('.npy').name for f in self.img_files]
- self.im_cache_dir.mkdir(parents=True, exist_ok=True)
gb = 0 # Gigabytes of cached images
- self.img_hw0, self.img_hw = [None] * n, [None] * n
- results = ThreadPool(NUM_THREADS).imap(self.load_image, range(n))
- pbar = tqdm(enumerate(results), total=n)
+ self.im_hw0, self.im_hw = [None] * n, [None] * n
+ fcn = self.cache_images_to_disk if cache_images == 'disk' else self.load_image
+ results = ThreadPool(NUM_THREADS).imap(fcn, range(n))
+ pbar = tqdm(enumerate(results), total=n, bar_format=BAR_FORMAT)
for i, x in pbar:
if cache_images == 'disk':
- if not self.img_npy[i].exists():
- np.save(self.img_npy[i].as_posix(), x[0])
- gb += self.img_npy[i].stat().st_size
+ gb += self.npy_files[i].stat().st_size
else: # 'ram'
- self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i)
- gb += self.imgs[i].nbytes
+ self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i)
+ gb += self.ims[i].nbytes
pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})'
pbar.close()
@@ -512,8 +510,8 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''):
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
with Pool(NUM_THREADS) as pool:
- pbar = tqdm(pool.imap(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix))),
- desc=desc, total=len(self.img_files))
+ pbar = tqdm(pool.imap(verify_image_label, zip(self.im_files, self.label_files, repeat(prefix))),
+ desc=desc, total=len(self.im_files), bar_format=BAR_FORMAT)
for im_file, lb, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
nm += nm_f
nf += nf_f
@@ -530,8 +528,8 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''):
LOGGER.info('\n'.join(msgs))
if nf == 0:
LOGGER.warning(f'{prefix}WARNING: No labels found in {path}. See {HELP_URL}')
- x['hash'] = get_hash(self.label_files + self.img_files)
- x['results'] = nf, nm, ne, nc, len(self.img_files)
+ x['hash'] = get_hash(self.label_files + self.im_files)
+ x['results'] = nf, nm, ne, nc, len(self.im_files)
x['msgs'] = msgs # warnings
x['version'] = self.cache_version # cache version
try:
@@ -543,7 +541,7 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''):
return x
def __len__(self):
- return len(self.img_files)
+ return len(self.im_files)
# def __iter__(self):
# self.count = -1
@@ -622,17 +620,15 @@ def __getitem__(self, index):
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
- return torch.from_numpy(img), labels_out, self.img_files[index], shapes
+ return torch.from_numpy(img), labels_out, self.im_files[index], shapes
def load_image(self, i):
- # loads 1 image from dataset index 'i', returns (im, original hw, resized hw)
- im = self.imgs[i]
+ # Loads 1 image from dataset index 'i', returns (im, original hw, resized hw)
+ im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i],
if im is None: # not cached in RAM
- npy = self.img_npy[i]
- if npy and npy.exists(): # load npy
- im = np.load(npy)
+ if fn.exists(): # load npy
+ im = np.load(fn)
else: # read image
- f = self.img_files[i]
im = cv2.imread(f) # BGR
assert im is not None, f'Image Not Found {f}'
h0, w0 = im.shape[:2] # orig hw
@@ -643,7 +639,13 @@ def load_image(self, i):
interpolation=cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA)
return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized
else:
- return self.imgs[i], self.img_hw0[i], self.img_hw[i] # im, hw_original, hw_resized
+ return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized
+
+ def cache_images_to_disk(self, i):
+ # Saves an image as an *.npy file for faster loading
+ f = self.npy_files[i]
+ if not f.exists():
+ np.save(f.as_posix(), cv2.imread(self.im_files[i]))
def load_mosaic(self, index):
# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
@@ -777,16 +779,16 @@ def load_mosaic9(self, index):
@staticmethod
def collate_fn(batch):
- img, label, path, shapes = zip(*batch) # transposed
+ im, label, path, shapes = zip(*batch) # transposed
for i, lb in enumerate(label):
lb[:, 0] = i # add target image index for build_targets()
- return torch.stack(img, 0), torch.cat(label, 0), path, shapes
+ return torch.stack(im, 0), torch.cat(label, 0), path, shapes
@staticmethod
def collate_fn4(batch):
img, label, path, shapes = zip(*batch) # transposed
n = len(shapes) // 4
- img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
+ im4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]])
wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]])
@@ -800,13 +802,13 @@ def collate_fn4(batch):
else:
im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)
lb = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s
- img4.append(im)
+ im4.append(im)
label4.append(lb)
for i, lb in enumerate(label4):
lb[:, 0] = i # add target image index for build_targets()
- return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4
+ return torch.stack(im4, 0), torch.cat(label4, 0), path4, shapes4
# Ancillary functions --------------------------------------------------------------------------------------------------
@@ -906,7 +908,7 @@ def verify_image_label(args):
nf = 1 # label found
with open(lb_file) as f:
lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
- if any([len(x) > 8 for x in lb]): # is segment
+ if any(len(x) > 6 for x in lb): # is segment
classes = np.array([x[0] for x in lb], dtype=np.float32)
segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb] # (cls, xy1...)
lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh)
@@ -999,12 +1001,12 @@ def hub_ops(f, max_dim=1920):
'image_stats': {'total': dataset.n, 'unlabelled': int(np.all(x == 0, 1).sum()),
'per_class': (x > 0).sum(0).tolist()},
'labels': [{str(Path(k).name): round_labels(v.tolist())} for k, v in
- zip(dataset.img_files, dataset.labels)]}
+ zip(dataset.im_files, dataset.labels)]}
if hub:
im_dir = hub_dir / 'images'
im_dir.mkdir(parents=True, exist_ok=True)
- for _ in tqdm(ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files), total=dataset.n, desc='HUB Ops'):
+ for _ in tqdm(ThreadPool(NUM_THREADS).imap(hub_ops, dataset.im_files), total=dataset.n, desc='HUB Ops'):
pass
# Profile
diff --git a/utils/general.py b/utils/general.py
index 3044b9c1ae78..e8b3b05c5fe1 100755
--- a/utils/general.py
+++ b/utils/general.py
@@ -15,6 +15,7 @@
import signal
import time
import urllib
+from datetime import datetime
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path
@@ -45,6 +46,7 @@
pd.options.display.max_columns = 10
cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
os.environ['NUMEXPR_MAX_THREADS'] = str(NUM_THREADS) # NumExpr max threads
+os.environ['OMP_NUM_THREADS'] = str(NUM_THREADS) # OpenMP max threads (PyTorch and SciPy)
def is_kaggle():
@@ -121,13 +123,15 @@ def _timeout_handler(self, signum, frame):
raise TimeoutError(self.timeout_message)
def __enter__(self):
- signal.signal(signal.SIGALRM, self._timeout_handler) # Set handler for SIGALRM
- signal.alarm(self.seconds) # start countdown for SIGALRM to be raised
+ if platform.system() != 'Windows': # not supported on Windows
+ signal.signal(signal.SIGALRM, self._timeout_handler) # Set handler for SIGALRM
+ signal.alarm(self.seconds) # start countdown for SIGALRM to be raised
def __exit__(self, exc_type, exc_val, exc_tb):
- signal.alarm(0) # Cancel SIGALRM if it's scheduled
- if self.suppress and exc_type is TimeoutError: # Suppress TimeoutError
- return True
+ if platform.system() != 'Windows':
+ signal.alarm(0) # Cancel SIGALRM if it's scheduled
+ if self.suppress and exc_type is TimeoutError: # Suppress TimeoutError
+ return True
class WorkingDirectory(contextlib.ContextDecorator):
@@ -220,13 +224,26 @@ def emojis(str=''):
return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
+def file_age(path=__file__):
+ # Return days since last file update
+ dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta
+ return dt.days # + dt.seconds / 86400 # fractional days
+
+
+def file_update_date(path=__file__):
+ # Return human-readable file modification date, i.e. '2021-3-26'
+ t = datetime.fromtimestamp(Path(path).stat().st_mtime)
+ return f'{t.year}-{t.month}-{t.day}'
+
+
def file_size(path):
# Return file/dir size (MB)
+ mb = 1 << 20 # bytes to MiB (1024 ** 2)
path = Path(path)
if path.is_file():
- return path.stat().st_size / 1E6
+ return path.stat().st_size / mb
elif path.is_dir():
- return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / 1E6
+ return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / mb
else:
return 0.0
@@ -241,6 +258,14 @@ def check_online():
return False
+def git_describe(path=ROOT): # path must be a directory
+ # Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe
+ try:
+ return check_output(f'git -C {path} describe --tags --long --always', shell=True).decode()[:-1]
+ except Exception:
+ return ''
+
+
@try_except
@WorkingDirectory(ROOT)
def check_git_status():
diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index 86ccf38443a9..866bdc4be2f5 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -47,7 +47,7 @@ def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None,
'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', # metrics
'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss
'x/lr0', 'x/lr1', 'x/lr2'] # params
- self.best_keys = ['best/epoch', 'best/precision', 'best/recall', 'best/mAP_0.5', 'best/mAP_0.5:0.95',]
+ self.best_keys = ['best/epoch', 'best/precision', 'best/recall', 'best/mAP_0.5', 'best/mAP_0.5:0.95']
for k in LOGGERS:
setattr(self, k, None) # init empty logger dictionary
self.csv = True # always log to csv
@@ -56,7 +56,7 @@ def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None,
if not wandb:
prefix = colorstr('Weights & Biases: ')
s = f"{prefix}run 'pip install wandb' to automatically track and visualize YOLOv5 🚀 runs (RECOMMENDED)"
- print(emojis(s))
+ self.logger.info(emojis(s))
# TensorBoard
s = self.save_dir
diff --git a/utils/loggers/wandb/sweep.yaml b/utils/loggers/wandb/sweep.yaml
index c7790d75f6b2..688b1ea0285f 100644
--- a/utils/loggers/wandb/sweep.yaml
+++ b/utils/loggers/wandb/sweep.yaml
@@ -88,7 +88,7 @@ parameters:
fl_gamma:
distribution: uniform
min: 0.0
- max: 0.1
+ max: 4.0
hsv_h:
distribution: uniform
min: 0.0
diff --git a/utils/loggers/wandb/wandb_utils.py b/utils/loggers/wandb/wandb_utils.py
index 3835436543d2..786e58a19972 100644
--- a/utils/loggers/wandb/wandb_utils.py
+++ b/utils/loggers/wandb/wandb_utils.py
@@ -403,7 +403,7 @@ def create_dataset_table(self, dataset: LoadImagesAndLabels, class_to_id: Dict[i
# TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging
artifact = wandb.Artifact(name=name, type="dataset")
img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None
- img_files = tqdm(dataset.img_files) if not img_files else img_files
+ img_files = tqdm(dataset.im_files) if not img_files else img_files
for img_file in img_files:
if Path(img_file).is_dir():
artifact.add_dir(img_file, name='data/images')
diff --git a/utils/loss.py b/utils/loss.py
index 5aa9f017d2af..0f0137817955 100644
--- a/utils/loss.py
+++ b/utils/loss.py
@@ -89,9 +89,10 @@ def forward(self, pred, true):
class ComputeLoss:
+ sort_obj_iou = False
+
# Compute losses
def __init__(self, model, autobalance=False):
- self.sort_obj_iou = False
device = next(model.parameters()).device # get model device
h = model.hyp # hyperparameters
@@ -111,26 +112,28 @@ def __init__(self, model, autobalance=False):
self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7
self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index
self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
+ self.device = device
for k in 'na', 'nc', 'nl', 'anchors':
setattr(self, k, getattr(det, k))
- def __call__(self, p, targets): # predictions, targets, model
- device = targets.device
- lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
+ def __call__(self, p, targets): # predictions, targets
+ lcls = torch.zeros(1, device=self.device) # class loss
+ lbox = torch.zeros(1, device=self.device) # box loss
+ lobj = torch.zeros(1, device=self.device) # object loss
tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets
# Losses
for i, pi in enumerate(p): # layer index, layer predictions
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
- tobj = torch.zeros_like(pi[..., 0], device=device) # target obj
+ tobj = torch.zeros(pi.shape[:4], device=self.device) # target obj
n = b.shape[0] # number of targets
if n:
- ps = pi[b, a, gj, gi] # prediction subset corresponding to targets
+ pxy, pwh, _, pcls = pi[b, a, gj, gi].tensor_split((2, 4, 5), dim=1) # target-subset of predictions
# Regression
- pxy = ps[:, :2].sigmoid() * 2 - 0.5
- pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
+ pxy = pxy.sigmoid() * 2 - 0.5
+ pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
pbox = torch.cat((pxy, pwh), 1) # predicted box
iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target)
lbox += (1.0 - iou).mean() # iou loss
@@ -144,9 +147,9 @@ def __call__(self, p, targets): # predictions, targets, model
# Classification
if self.nc > 1: # cls loss (only if multiple classes)
- t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets
+ t = torch.full_like(pcls, self.cn, device=self.device) # targets
t[range(n), tcls[i]] = self.cp
- lcls += self.BCEcls(ps[:, 5:], t) # BCE
+ lcls += self.BCEcls(pcls, t) # BCE
# Append targets to text file
# with open('targets.txt', 'a') as file:
@@ -170,15 +173,15 @@ def build_targets(self, p, targets):
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
na, nt = self.na, targets.shape[0] # number of anchors, targets
tcls, tbox, indices, anch = [], [], [], []
- gain = torch.ones(7, device=targets.device) # normalized to gridspace gain
- ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
+ gain = torch.ones(7, device=self.device) # normalized to gridspace gain
+ ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices
g = 0.5 # bias
off = torch.tensor([[0, 0],
[1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m
# [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
- ], device=targets.device).float() * g # offsets
+ ], device=self.device).float() * g # offsets
for i in range(self.nl):
anchors = self.anchors[i]
@@ -206,14 +209,12 @@ def build_targets(self, p, targets):
offsets = 0
# Define
- b, c = t[:, :2].long().T # image, class
- gxy = t[:, 2:4] # grid xy
- gwh = t[:, 4:6] # grid wh
+ bc, gxy, gwh, a = t.unsafe_chunk(4, dim=1) # (image, class), grid xy, grid wh, anchors
+ a, (b, c) = a.long().view(-1), bc.long().T # anchors, image, class
gij = (gxy - offsets).long()
- gi, gj = gij.T # grid xy indices
+ gi, gj = gij.T # grid indices
# Append
- a = t[:, 6].long() # anchor indices
indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices
tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
anch.append(anchors[a]) # anchors
diff --git a/utils/plots.py b/utils/plots.py
index 6c3f5bcaef37..a30c0faf962a 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -7,6 +7,7 @@
import os
from copy import copy
from pathlib import Path
+from urllib.error import URLError
import cv2
import matplotlib
@@ -55,11 +56,13 @@ def check_pil_font(font=FONT, size=10):
try:
return ImageFont.truetype(str(font) if font.exists() else font.name, size)
except Exception: # download if missing
- check_font(font)
try:
+ check_font(font)
return ImageFont.truetype(str(font), size)
except TypeError:
check_requirements('Pillow>=8.4.0') # known issue https://github.com/ultralytics/yolov5/issues/5374
+ except URLError: # not online
+ return ImageFont.load_default()
class Annotator:
@@ -455,7 +458,7 @@ def profile_idetection(start=0, stop=0, labels=(), save_dir=''):
plt.savefig(Path(save_dir) / 'idetection_profile.png', dpi=200)
-def save_one_box(xyxy, im, file='image.jpg', gain=1.02, pad=10, square=False, BGR=False, save=True):
+def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, BGR=False, save=True):
# Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop
xyxy = torch.tensor(xyxy).view(-1, 4)
b = xyxy2xywh(xyxy) # boxes
@@ -467,5 +470,7 @@ def save_one_box(xyxy, im, file='image.jpg', gain=1.02, pad=10, square=False, BG
crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
if save:
file.parent.mkdir(parents=True, exist_ok=True) # make directory
- cv2.imwrite(str(increment_path(file).with_suffix('.jpg')), crop)
+ f = str(increment_path(file).with_suffix('.jpg'))
+ # cv2.imwrite(f, crop) # https://github.com/ultralytics/yolov5/issues/7007 chroma subsampling issue
+ Image.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)).save(f, quality=95, subsampling=0)
return crop
diff --git a/utils/torch_utils.py b/utils/torch_utils.py
index c5257c6ebfeb..efcacc9ca735 100644
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@@ -3,7 +3,6 @@
PyTorch utils
"""
-import datetime
import math
import os
import platform
@@ -12,14 +11,13 @@
import warnings
from contextlib import contextmanager
from copy import deepcopy
-from pathlib import Path
import torch
import torch.distributed as dist
import torch.nn as nn
import torch.nn.functional as F
-from utils.general import LOGGER
+from utils.general import LOGGER, file_update_date, git_describe
try:
import thop # for FLOPs computation
@@ -32,9 +30,7 @@
@contextmanager
def torch_distributed_zero_first(local_rank: int):
- """
- Decorator to make all processes in distributed training wait for each local_master to do something.
- """
+ # Decorator to make all processes in distributed training wait for each local_master to do something
if local_rank not in [-1, 0]:
dist.barrier(device_ids=[local_rank])
yield
@@ -42,21 +38,6 @@ def torch_distributed_zero_first(local_rank: int):
dist.barrier(device_ids=[0])
-def date_modified(path=__file__):
- # return human-readable file modification date, i.e. '2021-3-26'
- t = datetime.datetime.fromtimestamp(Path(path).stat().st_mtime)
- return f'{t.year}-{t.month}-{t.day}'
-
-
-def git_describe(path=Path(__file__).parent): # path must be a directory
- # return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe
- s = f'git -C {path} describe --tags --long --always'
- try:
- return subprocess.check_output(s, shell=True, stderr=subprocess.STDOUT).decode()[:-1]
- except subprocess.CalledProcessError:
- return '' # not a git repository
-
-
def device_count():
# Returns number of CUDA devices available. Safe version of torch.cuda.device_count(). Only works on Linux.
assert platform.system() == 'Linux', 'device_count() function only works on Linux'
@@ -69,7 +50,7 @@ def device_count():
def select_device(device='', batch_size=0, newline=True):
# device = 'cpu' or '0' or '0,1,2,3'
- s = f'YOLOv5 🚀 {git_describe() or date_modified()} torch {torch.__version__} ' # string
+ s = f'YOLOv5 🚀 {git_describe() or file_update_date()} torch {torch.__version__} ' # string
device = str(device).strip().lower().replace('cuda:', '') # to string, 'cuda:0' to '0'
cpu = device == 'cpu'
if cpu:
@@ -88,7 +69,7 @@ def select_device(device='', batch_size=0, newline=True):
space = ' ' * (len(s) + 1)
for i, d in enumerate(devices):
p = torch.cuda.get_device_properties(i)
- s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2:.0f}MiB)\n" # bytes to MB
+ s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB
else:
s += 'CPU\n'
@@ -99,7 +80,7 @@ def select_device(device='', batch_size=0, newline=True):
def time_sync():
- # pytorch-accurate time
+ # PyTorch-accurate time
if torch.cuda.is_available():
torch.cuda.synchronize()
return time.time()
@@ -205,7 +186,7 @@ def prune(model, amount=0.3):
def fuse_conv_and_bn(conv, bn):
- # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
+ # Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
fusedconv = nn.Conv2d(conv.in_channels,
conv.out_channels,
kernel_size=conv.kernel_size,
@@ -214,12 +195,12 @@ def fuse_conv_and_bn(conv, bn):
groups=conv.groups,
bias=True).requires_grad_(False).to(conv.weight.device)
- # prepare filters
+ # Prepare filters
w_conv = conv.weight.clone().view(conv.out_channels, -1)
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
- # prepare spatial bias
+ # Prepare spatial bias
b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
@@ -252,7 +233,7 @@ def model_info(model, verbose=False, img_size=640):
def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416)
- # scales img(bs,3,y,x) by ratio constrained to gs-multiple
+ # Scales img(bs,3,y,x) by ratio constrained to gs-multiple
if ratio == 1.0:
return img
else:
@@ -302,13 +283,13 @@ class ModelEMA:
For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
"""
- def __init__(self, model, decay=0.9999, updates=0):
+ def __init__(self, model, decay=0.9999, tau=2000, updates=0):
# Create EMA
self.ema = deepcopy(de_parallel(model)).eval() # FP32 EMA
# if next(model.parameters()).device.type != 'cpu':
# self.ema.half() # FP16 EMA
self.updates = updates # number of EMA updates
- self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs)
+ self.decay = lambda x: decay * (1 - math.exp(-x / tau)) # decay exponential ramp (to help early epochs)
for p in self.ema.parameters():
p.requires_grad_(False)
diff --git a/val.py b/val.py
index 78abbda8231a..2dd2aec679f9 100644
--- a/val.py
+++ b/val.py
@@ -87,7 +87,7 @@ def process_batch(detections, labels, iouv):
matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
# matches = matches[matches[:, 2].argsort()[::-1]]
matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
- matches = torch.Tensor(matches).to(iouv.device)
+ matches = torch.from_numpy(matches).to(iouv.device)
correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
return correct
@@ -125,7 +125,6 @@ def run(data,
training = model is not None
if training: # called by train.py
device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model
-
half &= device.type != 'cpu' # half precision only supported on CUDA
model.half() if half else model.float()
else: # called directly
@@ -136,33 +135,32 @@ def run(data,
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Load model
- model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data)
- stride, pt, jit, onnx, engine = model.stride, model.pt, model.jit, model.onnx, model.engine
+ model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
+ stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
imgsz = check_img_size(imgsz, s=stride) # check image size
- half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA
- if pt or jit:
- model.model.half() if half else model.model.float()
- elif engine:
+ half = model.fp16 # FP16 supported on limited backends with CUDA
+ if engine:
batch_size = model.batch_size
else:
- half = False
- batch_size = 1 # export.py models default to batch-size 1
- device = torch.device('cpu')
- LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends')
+ device = model.device
+ if not (pt or jit):
+ batch_size = 1 # export.py models default to batch-size 1
+ LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models')
# Data
data = check_dataset(data) # check
# Configure
model.eval()
+ cuda = device.type != 'cpu'
is_coco = isinstance(data.get('val'), str) and data['val'].endswith('coco/val2017.txt') # COCO dataset
nc = 1 if single_cls else int(data['nc']) # number of classes
- iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95
+ iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95
niou = iouv.numel()
# Dataloader
if not training:
- model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz), half=half) # warmup
+ model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup
pad = 0.0 if task in ('speed', 'benchmark') else 0.5
rect = False if task == 'benchmark' else pt # square inference for benchmarks
task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images
@@ -180,7 +178,7 @@ def run(data,
pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar
for batch_i, (im, targets, paths, shapes) in enumerate(pbar):
t1 = time_sync()
- if pt or jit or engine:
+ if cuda:
im = im.to(device, non_blocking=True)
targets = targets.to(device)
im = im.half() if half else im.float() # uint8 to fp16/32
@@ -198,7 +196,7 @@ def run(data,
loss += compute_loss([x.float() for x in train_out], targets)[1] # box, obj, cls
# NMS
- targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels
+ targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels
lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling
t3 = time_sync()
out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls)
@@ -297,7 +295,7 @@ def run(data,
pred = anno.loadRes(pred_json) # init predictions api
eval = COCOeval(anno, pred, 'bbox')
if is_coco:
- eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] # image IDs to evaluate
+ eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # image IDs to evaluate
eval.evaluate()
eval.accumulate()
eval.summarize()