Skip to content

Commit

Permalink
palm detection using yolo with oak-d pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
Kovelja009 committed Apr 1, 2024
1 parent ac6c438 commit af6956f
Show file tree
Hide file tree
Showing 7 changed files with 354 additions and 52 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ results*.csv
coco/
coco128/
VOC/

hagrid/
# MATLAB GitIgnore -----------------------------------------------------------------------------------------------------
*.m~
*.mat
!targets*.mat

# Neural Network weights -----------------------------------------------------------------------------------------------
*.weights
*.pt
# *.pt
*.pb
*.onnx
*.engine
Expand Down
80 changes: 80 additions & 0 deletions OAK_D_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import cv2
import depthai as dai
import time


class FPSHandler:
def __init__(self):
self.timestamp = time.time() + 1
self.start = time.time()
self.frame_cnt = 0

self._coordinates = (20, 20)
self._font = cv2.FONT_HERSHEY_SIMPLEX
self._font_scale = 0.7
self._color = (0, 0, 255)
self._thickness = 1

def next_iter(self):
self.timestamp = time.time()
self.frame_cnt += 1

def fps(self):
return self.frame_cnt / (self.timestamp - self.start)

def show_fps(self, frame, fps):
return cv2.putText(frame, fps.__str__(), self._coordinates, self._font, self._font_scale, self._color,
self._thickness, cv2.LINE_AA)


class OAK_D:
def __init__(self, fps=24, width=1920, height=1080):
# Create pipeline
self._pipeline = dai.Pipeline()

# Define source and output
self._camRgb = self._pipeline.create(dai.node.ColorCamera)
self._xoutVideo = self._pipeline.create(dai.node.XLinkOut)

self._xoutVideo.setStreamName("video")

# Properties
self._camRgb.setBoardSocket(dai.CameraBoardSocket.RGB)
self._camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
self._camRgb.setVideoSize(width, height)
self._camRgb.setFps(fps)

self._xoutVideo.input.setBlocking(False)
self._xoutVideo.input.setQueueSize(1)

# Linking
self._camRgb.video.link(self._xoutVideo.input)

# Connect to device and start pipeline
self._device = dai.Device(self._pipeline)
self._video = self._device.getOutputQueue(name="video", maxSize=1, blocking=False)
self.fps_handler = FPSHandler()
self.height = self._camRgb.getVideoHeight()
self.width = self._camRgb.getVideoWidth()

def get_color_frame(self, show_fps=False):
video_in = self._video.get()
# convert from
# Get BGR frame from NV12 encoded video frame to show with opencv
# Visualizing the frame on slower hosts might have overhead
cv_frame = video_in.getCvFrame()
if show_fps:
self.fps_handler.next_iter()
# return video_in.getCvFrame()
return self.fps_handler.show_fps(cv_frame, round(self.fps_handler.fps(), 2))
else:
return cv_frame


if __name__ == '__main__':
oak_d = OAK_D(fps=60, width=300, height=300)
while True:
frame = oak_d.get_color_frame(show_fps=True)
cv2.imshow("VidraCar", frame)
if cv2.waitKey(1) == ord('q'):
break
91 changes: 91 additions & 0 deletions inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import torch
from ultralytics.utils.plotting import Annotator, colors

from utils.dataloaders import OakDLoadImages
from utils.general import (
Profile,
check_img_size,
non_max_suppression,
scale_boxes,
xyxy2xywh,
)
from utils.torch_utils import smart_inference_mode


@smart_inference_mode()
def run(
frame=None, # openCV image
imgsz=(640, 640), # inference size (height, width)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device="", # cuda device, i.e. 0 or 0,1,2,3 or cpu
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidences
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
model=None
):
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride) # check image size

# Dataloader
bs = 1 # batch_size
dataset = OakDLoadImages(frame, img_size= imgsz[0], stride=stride, auto=pt)

# Run inference
model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup
seen, _, dt = 0, [], (Profile(device=device), Profile(device=device), Profile(device=device))
for im, im0 in dataset:
with dt[0]:
im = torch.from_numpy(im).to(model.device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
if model.xml and im.shape[0] > 1:
ims = torch.chunk(im, im.shape[0], 0)

# Inference
with dt[1]:
if model.xml and im.shape[0] > 1:
pred = None
for image in ims:
if pred is None:
pred = model(image, augment=False, visualize=False).unsqueeze(0)
else:
pred = torch.cat((pred, model(image, augment=False, visualize=False).unsqueeze(0)), dim=0)
pred = [pred, None]
else:
pred = model(im, augment=False, visualize=False)
# NMS
with dt[2]:
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)

results_for_bounding_boxes = []
# Process predictions
for _, det in enumerate(pred): # per image
seen += 1
annotator = Annotator(im0, line_width=line_thickness, example=str(names))
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()

# Write results
for *xyxy, conf, cls in reversed(det):
c = int(cls) # integer class
label = names[c] if hide_conf else f"{names[c]}"
# Add bbox to image
c = int(cls) # integer class
label = None if hide_labels else (names[c] if hide_conf else f"{names[c]} {conf:.2f}")
annotator.box_label(xyxy, label, color=colors(c, True))

######################
# TODO: check whether I need normalized xyxy or standard
results_for_bounding_boxes.append(xyxy)
######################

return im0, results_for_bounding_boxes
49 changes: 49 additions & 0 deletions models/custom_yolov5s.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license

# Parameters
nc: 3 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32

# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]

# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13

[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)

[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)

[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)

[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]
109 changes: 59 additions & 50 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,50 +1,59 @@
# YOLOv5 requirements
# Usage: pip install -r requirements.txt

# Base ------------------------------------------------------------------------
gitpython>=3.1.30
matplotlib>=3.3
numpy>=1.23.5
opencv-python>=4.1.1
Pillow>=9.4.0
psutil # system resources
PyYAML>=5.3.1
requests>=2.23.0
scipy>=1.4.1
thop>=0.1.1 # FLOPs computation
torch>=1.8.0 # see https://pytorch.org/get-started/locally (recommended)
torchvision>=0.9.0
tqdm>=4.64.0
ultralytics>=8.0.232
# protobuf<=3.20.1 # https://github.com/ultralytics/yolov5/issues/8012

# Logging ---------------------------------------------------------------------
# tensorboard>=2.4.1
# clearml>=1.2.0
# comet

# Plotting --------------------------------------------------------------------
pandas>=1.1.4
seaborn>=0.11.0

# Export ----------------------------------------------------------------------
# coremltools>=6.0 # CoreML export
# onnx>=1.10.0 # ONNX export
# onnx-simplifier>=0.4.1 # ONNX simplifier
# nvidia-pyindex # TensorRT export
# nvidia-tensorrt # TensorRT export
# scikit-learn<=1.1.2 # CoreML quantization
# tensorflow>=2.4.0,<=2.13.1 # TF exports (-cpu, -aarch64, -macos)
# tensorflowjs>=3.9.0 # TF.js export
# openvino-dev>=2023.0 # OpenVINO export

# Deploy ----------------------------------------------------------------------
setuptools>=65.5.1 # Snyk vulnerability fix
# tritonclient[all]~=2.24.0

# Extras ----------------------------------------------------------------------
# ipython # interactive notebook
# mss # screenshots
# albumentations>=1.0.3
# pycocotools>=2.0.6 # COCO mAP
wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability
certifi==2024.2.2
charset-normalizer==3.3.2
clip @ git+https://github.com/ultralytics/CLIP.git@e17416a36b45d040758327936a1ea150c13fe3d1
contourpy==1.2.0
cycler==0.12.1
depthai==2.25.0.0
filelock==3.13.3
fonttools==4.50.0
fsspec==2024.3.1
ftfy==6.2.0
gitdb==4.0.11
GitPython==3.1.43
idna==3.6
Jinja2==3.1.3
kiwisolver==1.4.5
MarkupSafe==2.1.5
matplotlib==3.8.3
mpmath==1.3.0
networkx==3.2.1
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==8.9.2.26
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.19.3
nvidia-nvjitlink-cu12==12.4.99
nvidia-nvtx-cu12==12.1.105
opencv-python==4.9.0.80
packaging==24.0
pandas==2.2.1
pillow==10.2.0
psutil==5.9.8
py-cpuinfo==9.0.0
pyparsing==3.1.2
python-dateutil==2.9.0.post0
pytz==2024.1
PyYAML==6.0.1
regex==2023.12.25
requests==2.31.0
scipy==1.12.0
seaborn==0.13.2
six==1.16.0
smmap==5.0.1
sympy==1.12
thop==0.1.1.post2209072238
torch==2.2.2
torchvision==0.17.2
tqdm==4.66.2
triton==2.2.0
typing_extensions==4.10.0
tzdata==2024.1
ultralytics==8.1.39
urllib3==2.2.1
wcwidth==0.2.13
32 changes: 32 additions & 0 deletions rivian.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import OAK_D_api as oak
import cv2
import torch
from inference import run
from models.common import DetectMultiBackend


def run_object_detection():

# Load model
weights_path = './runs/train/yolov5s_results3/weights/best.pt'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DetectMultiBackend(weights_path, device=device, dnn=False, fp16=False)
model.eval()

# camera setup
oak_d = oak.OAK_D(fps=60, width=1920, height=1080)

while True:
frame = oak_d.get_color_frame(show_fps=True)
img, results_for_bounding_boxes = run(frame=frame, classes=[1], model=model)

cv2.imshow("Levi", img)

# Break the loop if 'q' key is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break

if __name__ == '__main__':
run_object_detection()

Loading

0 comments on commit af6956f

Please sign in to comment.