From d733648cfba1a948753970ba7b9df27de5d6e7ed Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 29 Aug 2021 16:46:13 +0200 Subject: [PATCH] Create `Annotator()` class (#4591) * Add Annotator() class * Download Arial * 2x for loop * Cleanup * tuple 2 list * max_size=1920 * bold logging results to * tolist() * im = annotator.im * PIL save in detect.py * Smart asarray in detect.py * revert to cv2.imwrite * Cleanup * Return result asarray * Add `Profile()` profiler * CamelCase Timeout * Resize after mosaic * pillow>=8.0.0 * daemon imwrite * Add cv2 support * Remove plot_wh_methods and plot_one_box * pil=False for hubconf.py annotations * im.shape bug fix * colorstr common.py * join daemons * Update t.daemon * Removed daemon saving --- detect.py | 6 +- models/common.py | 11 ++- requirements.txt | 2 +- train.py | 2 +- utils/general.py | 5 +- utils/plots.py | 189 ++++++++++++++++++++++------------------------- 6 files changed, 106 insertions(+), 109 deletions(-) diff --git a/detect.py b/detect.py index 15ddc1ffb6a4..77502b0c5bee 100644 --- a/detect.py +++ b/detect.py @@ -23,7 +23,7 @@ from utils.datasets import LoadStreams, LoadImages from utils.general import check_img_size, check_requirements, check_imshow, colorstr, non_max_suppression, \ apply_classifier, scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path, save_one_box -from utils.plots import colors, plot_one_box +from utils.plots import colors, Annotator from utils.torch_utils import select_device, load_classifier, time_sync @@ -181,6 +181,7 @@ def wrap_frozen_graph(gd, inputs, outputs): s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop + annotator = Annotator(im0, line_width=line_thickness, pil=False) if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() @@ -201,7 +202,7 @@ def wrap_frozen_graph(gd, inputs, outputs): if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') - im0 = plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_width=line_thickness) + annotator.box_label(xyxy, label, color=colors(c, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) @@ -209,6 +210,7 @@ def wrap_frozen_graph(gd, inputs, outputs): print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results + im0 = annotator.result() if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond diff --git a/models/common.py b/models/common.py index e1f5aea3abed..0c60b39a483d 100644 --- a/models/common.py +++ b/models/common.py @@ -18,8 +18,9 @@ from torch.cuda import amp from utils.datasets import exif_transpose, letterbox -from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh, save_one_box -from utils.plots import colors, plot_one_box +from utils.general import colorstr, non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh, \ + save_one_box +from utils.plots import colors, Annotator from utils.torch_utils import time_sync LOGGER = logging.getLogger(__name__) @@ -370,12 +371,14 @@ def display(self, pprint=False, show=False, save=False, crop=False, render=False n = (pred[:, -1] == c).sum() # detections per class str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string if show or save or render or crop: + annotator = Annotator(im, pil=False) for *box, conf, cls in reversed(pred): # xyxy, confidence, class label = f'{self.names[int(cls)]} {conf:.2f}' if crop: save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i]) else: # all others - im = plot_one_box(box, im, label=label, color=colors(cls)) + annotator.box_label(box, label, color=colors(cls)) + im = annotator.im else: str += '(no detections)' @@ -388,7 +391,7 @@ def display(self, pprint=False, show=False, save=False, crop=False, render=False f = self.files[i] im.save(save_dir / f) # save if i == self.n - 1: - LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to '{save_dir}'") + LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}") if render: self.imgs[i] = np.asarray(im) diff --git a/requirements.txt b/requirements.txt index f6361d591f1b..2ad65ba53e29 100755 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ matplotlib>=3.2.2 numpy>=1.18.5 opencv-python>=4.1.2 -Pillow +Pillow>=8.0.0 PyYAML>=5.3.1 scipy>=1.4.1 torch>=1.7.0 diff --git a/train.py b/train.py index aaee41a499e4..2fe38ef043d0 100644 --- a/train.py +++ b/train.py @@ -260,7 +260,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary compute_loss = ComputeLoss(model) # init loss class LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' f'Using {train_loader.num_workers} dataloader workers\n' - f'Logging results to {save_dir}\n' + f"Logging results to {colorstr('bold', save_dir)}\n" f'Starting training for {epochs} epochs...') for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ model.train() diff --git a/utils/general.py b/utils/general.py index c74d8bb299de..fe9a8ac537fb 100755 --- a/utils/general.py +++ b/utils/general.py @@ -122,9 +122,10 @@ def is_pip(): return 'site-packages' in Path(__file__).absolute().parts -def is_ascii(str=''): +def is_ascii(s=''): # Is string composed of all ASCII (no UTF) characters? - return len(str.encode().decode('ascii', 'ignore')) == len(str) + s = str(s) # convert to str() in case of None, etc. + return len(s.encode().decode('ascii', 'ignore')) == len(s) def emojis(str=''): diff --git a/utils/plots.py b/utils/plots.py index 25d70dbabc75..696d32345dd5 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -67,51 +67,59 @@ def butter_lowpass(cutoff, fs, order): return filtfilt(b, a, data) # forward-backward filter -def plot_one_box(box, im, color=(128, 128, 128), txt_color=(255, 255, 255), label=None, line_width=3, use_pil=False): - # Plots one xyxy box on image im with label - assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.' - lw = line_width or max(int(min(im.size) / 200), 2) # line width - - if use_pil or (label is not None and not is_ascii(label)): # use PIL - im = Image.fromarray(im) - draw = ImageDraw.Draw(im) - draw.rectangle(box, width=lw + 1, outline=color) # plot - if label: - font = ImageFont.truetype("Arial.ttf", size=max(round(max(im.size) / 40), 12)) - txt_width, txt_height = font.getsize(label) - draw.rectangle([box[0], box[1] - txt_height + 4, box[0] + txt_width, box[1]], fill=color) - draw.text((box[0], box[1] - txt_height + 1), label, fill=txt_color, font=font) - return np.asarray(im) - else: # use OpenCV - c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) - cv2.rectangle(im, c1, c2, color, thickness=lw, lineType=cv2.LINE_AA) - if label: - tf = max(lw - 1, 1) # font thickness - txt_width, txt_height = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0] - c2 = c1[0] + txt_width, c1[1] - txt_height - 3 - cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA) # filled - cv2.putText(im, label, (c1[0], c1[1] - 2), 0, lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA) - return im - - -def plot_wh_methods(): # from utils.plots import *; plot_wh_methods() - # Compares the two methods for width-height anchor multiplication - # https://github.com/ultralytics/yolov3/issues/168 - x = np.arange(-4.0, 4.0, .1) - ya = np.exp(x) - yb = torch.sigmoid(torch.from_numpy(x)).numpy() * 2 - - fig = plt.figure(figsize=(6, 3), tight_layout=True) - plt.plot(x, ya, '.-', label='YOLOv3') - plt.plot(x, yb ** 2, '.-', label='YOLOv5 ^2') - plt.plot(x, yb ** 1.6, '.-', label='YOLOv5 ^1.6') - plt.xlim(left=-4, right=4) - plt.ylim(bottom=0, top=6) - plt.xlabel('input') - plt.ylabel('output') - plt.grid() - plt.legend() - fig.savefig('comparison.png', dpi=200) +class Annotator: + # YOLOv5 PIL Annotator class + def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=True): + assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.' + self.pil = pil + if self.pil: # use PIL + self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) + self.draw = ImageDraw.Draw(self.im) + s = sum(self.im.size) / 2 # mean shape + f = font_size or max(round(s * 0.035), 12) + try: + self.font = ImageFont.truetype(font, size=f) + except: # download TTF + url = "https://github.com/ultralytics/yolov5/releases/download/v1.0/" + font + torch.hub.download_url_to_file(url, font) + self.font = ImageFont.truetype(font, size=f) + self.fh = self.font.getsize('a')[1] - 3 # font height + else: # use cv2 + self.im = im + s = sum(im.shape) / 2 # mean shape + self.lw = line_width or max(round(s * 0.003), 2) # line width + + def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)): + # Add one xyxy box to image with label + if self.pil or not is_ascii(label): + self.draw.rectangle(box, width=self.lw, outline=color) # box + if label: + w = self.font.getsize(label)[0] # text width + self.draw.rectangle([box[0], box[1] - self.fh, box[0] + w + 1, box[1] + 1], fill=color) + self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') + else: # cv2 + c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) + cv2.rectangle(self.im, c1, c2, color, thickness=self.lw, lineType=cv2.LINE_AA) + if label: + tf = max(self.lw - 1, 1) # font thickness + w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] + c2 = c1[0] + w, c1[1] - h - 3 + cv2.rectangle(self.im, c1, c2, color, -1, cv2.LINE_AA) # filled + cv2.putText(self.im, label, (c1[0], c1[1] - 2), 0, self.lw / 3, txt_color, thickness=tf, + lineType=cv2.LINE_AA) + + def rectangle(self, xy, fill=None, outline=None, width=1): + # Add rectangle to image (PIL-only) + self.draw.rectangle(xy, fill, outline, width) + + def text(self, xy, text, txt_color=(255, 255, 255)): + # Add text to image (PIL-only) + w, h = self.font.getsize(text) # text width, height + self.draw.text((xy[0], xy[1] - h + 1), text, fill=txt_color, font=self.font) + + def result(self): + # Return annotated image as array + return np.asarray(self.im) def output_to_target(output): @@ -123,82 +131,65 @@ def output_to_target(output): return np.array(targets) -def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16): +def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=1920, max_subplots=16): # Plot image grid with labels - if isinstance(images, torch.Tensor): images = images.cpu().float().numpy() if isinstance(targets, torch.Tensor): targets = targets.cpu().numpy() - - # un-normalise if np.max(images[0]) <= 1: - images *= 255 - - tl = 3 # line thickness - tf = max(tl - 1, 1) # font thickness + images *= 255.0 # de-normalise (optional) bs, _, h, w = images.shape # batch size, _, height, width bs = min(bs, max_subplots) # limit plot images ns = np.ceil(bs ** 0.5) # number of subplots (square) - # Check if we should resize - scale_factor = max_size / max(h, w) - if scale_factor < 1: - h = math.ceil(scale_factor * h) - w = math.ceil(scale_factor * w) - + # Build Image mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init - for i, img in enumerate(images): + for i, im in enumerate(images): if i == max_subplots: # if last batch has fewer images than we expect break - - block_x = int(w * (i // ns)) - block_y = int(h * (i % ns)) - - img = img.transpose(1, 2, 0) - if scale_factor < 1: - img = cv2.resize(img, (w, h)) - - mosaic[block_y:block_y + h, block_x:block_x + w, :] = img + x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin + im = im.transpose(1, 2, 0) + mosaic[y:y + h, x:x + w, :] = im + + # Resize (optional) + scale = max_size / ns / max(h, w) + if scale < 1: + h = math.ceil(scale * h) + w = math.ceil(scale * w) + mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) + + # Annotate + fs = int(h * ns * 0.02) # font size + annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs) + for i in range(i + 1): + x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin + annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders + if paths: + annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames if len(targets) > 0: - image_targets = targets[targets[:, 0] == i] - boxes = xywh2xyxy(image_targets[:, 2:6]).T - classes = image_targets[:, 1].astype('int') - labels = image_targets.shape[1] == 6 # labels if no conf column - conf = None if labels else image_targets[:, 6] # check for confidence presence (label vs pred) + ti = targets[targets[:, 0] == i] # image targets + boxes = xywh2xyxy(ti[:, 2:6]).T + classes = ti[:, 1].astype('int') + labels = ti.shape[1] == 6 # labels if no conf column + conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) if boxes.shape[1]: if boxes.max() <= 1.01: # if normalized with tolerance 0.01 boxes[[0, 2]] *= w # scale to pixels boxes[[1, 3]] *= h - elif scale_factor < 1: # absolute coords need scale if image scales - boxes *= scale_factor - boxes[[0, 2]] += block_x - boxes[[1, 3]] += block_y - for j, box in enumerate(boxes.T): - cls = int(classes[j]) + elif scale < 1: # absolute coords need scale if image scales + boxes *= scale + boxes[[0, 2]] += x + boxes[[1, 3]] += y + for j, box in enumerate(boxes.T.tolist()): + cls = classes[j] color = colors(cls) cls = names[cls] if names else cls if labels or conf[j] > 0.25: # 0.25 conf thresh - label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j]) - mosaic = plot_one_box(box, mosaic, label=label, color=color, line_width=tl) - - # Draw image filename labels - if paths: - label = Path(paths[i]).name[:40] # trim to 40 char - t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] - cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf, - lineType=cv2.LINE_AA) - - # Image border - cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3) - - if fname: - r = min(1280. / max(h, w) / ns, 1.0) # ratio to limit image size - mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA) - # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save - Image.fromarray(mosaic).save(fname) # PIL save - return mosaic + label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}' + annotator.box_label(box, label, color=color) + annotator.im.save(fname) # save def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''):