-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
palm detection using yolo with oak-d pipeline
- Loading branch information
1 parent
ac6c438
commit af6956f
Showing
7 changed files
with
354 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
import cv2 | ||
import depthai as dai | ||
import time | ||
|
||
|
||
class FPSHandler: | ||
def __init__(self): | ||
self.timestamp = time.time() + 1 | ||
self.start = time.time() | ||
self.frame_cnt = 0 | ||
|
||
self._coordinates = (20, 20) | ||
self._font = cv2.FONT_HERSHEY_SIMPLEX | ||
self._font_scale = 0.7 | ||
self._color = (0, 0, 255) | ||
self._thickness = 1 | ||
|
||
def next_iter(self): | ||
self.timestamp = time.time() | ||
self.frame_cnt += 1 | ||
|
||
def fps(self): | ||
return self.frame_cnt / (self.timestamp - self.start) | ||
|
||
def show_fps(self, frame, fps): | ||
return cv2.putText(frame, fps.__str__(), self._coordinates, self._font, self._font_scale, self._color, | ||
self._thickness, cv2.LINE_AA) | ||
|
||
|
||
class OAK_D: | ||
def __init__(self, fps=24, width=1920, height=1080): | ||
# Create pipeline | ||
self._pipeline = dai.Pipeline() | ||
|
||
# Define source and output | ||
self._camRgb = self._pipeline.create(dai.node.ColorCamera) | ||
self._xoutVideo = self._pipeline.create(dai.node.XLinkOut) | ||
|
||
self._xoutVideo.setStreamName("video") | ||
|
||
# Properties | ||
self._camRgb.setBoardSocket(dai.CameraBoardSocket.RGB) | ||
self._camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) | ||
self._camRgb.setVideoSize(width, height) | ||
self._camRgb.setFps(fps) | ||
|
||
self._xoutVideo.input.setBlocking(False) | ||
self._xoutVideo.input.setQueueSize(1) | ||
|
||
# Linking | ||
self._camRgb.video.link(self._xoutVideo.input) | ||
|
||
# Connect to device and start pipeline | ||
self._device = dai.Device(self._pipeline) | ||
self._video = self._device.getOutputQueue(name="video", maxSize=1, blocking=False) | ||
self.fps_handler = FPSHandler() | ||
self.height = self._camRgb.getVideoHeight() | ||
self.width = self._camRgb.getVideoWidth() | ||
|
||
def get_color_frame(self, show_fps=False): | ||
video_in = self._video.get() | ||
# convert from | ||
# Get BGR frame from NV12 encoded video frame to show with opencv | ||
# Visualizing the frame on slower hosts might have overhead | ||
cv_frame = video_in.getCvFrame() | ||
if show_fps: | ||
self.fps_handler.next_iter() | ||
# return video_in.getCvFrame() | ||
return self.fps_handler.show_fps(cv_frame, round(self.fps_handler.fps(), 2)) | ||
else: | ||
return cv_frame | ||
|
||
|
||
if __name__ == '__main__': | ||
oak_d = OAK_D(fps=60, width=300, height=300) | ||
while True: | ||
frame = oak_d.get_color_frame(show_fps=True) | ||
cv2.imshow("VidraCar", frame) | ||
if cv2.waitKey(1) == ord('q'): | ||
break |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
import torch | ||
from ultralytics.utils.plotting import Annotator, colors | ||
|
||
from utils.dataloaders import OakDLoadImages | ||
from utils.general import ( | ||
Profile, | ||
check_img_size, | ||
non_max_suppression, | ||
scale_boxes, | ||
xyxy2xywh, | ||
) | ||
from utils.torch_utils import smart_inference_mode | ||
|
||
|
||
@smart_inference_mode() | ||
def run( | ||
frame=None, # openCV image | ||
imgsz=(640, 640), # inference size (height, width) | ||
conf_thres=0.25, # confidence threshold | ||
iou_thres=0.45, # NMS IOU threshold | ||
max_det=1000, # maximum detections per image | ||
device="", # cuda device, i.e. 0 or 0,1,2,3 or cpu | ||
classes=None, # filter by class: --class 0, or --class 0 2 3 | ||
agnostic_nms=False, # class-agnostic NMS | ||
line_thickness=3, # bounding box thickness (pixels) | ||
hide_labels=False, # hide labels | ||
hide_conf=False, # hide confidences | ||
half=False, # use FP16 half-precision inference | ||
dnn=False, # use OpenCV DNN for ONNX inference | ||
model=None | ||
): | ||
stride, names, pt = model.stride, model.names, model.pt | ||
imgsz = check_img_size(imgsz, s=stride) # check image size | ||
|
||
# Dataloader | ||
bs = 1 # batch_size | ||
dataset = OakDLoadImages(frame, img_size= imgsz[0], stride=stride, auto=pt) | ||
|
||
# Run inference | ||
model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup | ||
seen, _, dt = 0, [], (Profile(device=device), Profile(device=device), Profile(device=device)) | ||
for im, im0 in dataset: | ||
with dt[0]: | ||
im = torch.from_numpy(im).to(model.device) | ||
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 | ||
im /= 255 # 0 - 255 to 0.0 - 1.0 | ||
if len(im.shape) == 3: | ||
im = im[None] # expand for batch dim | ||
if model.xml and im.shape[0] > 1: | ||
ims = torch.chunk(im, im.shape[0], 0) | ||
|
||
# Inference | ||
with dt[1]: | ||
if model.xml and im.shape[0] > 1: | ||
pred = None | ||
for image in ims: | ||
if pred is None: | ||
pred = model(image, augment=False, visualize=False).unsqueeze(0) | ||
else: | ||
pred = torch.cat((pred, model(image, augment=False, visualize=False).unsqueeze(0)), dim=0) | ||
pred = [pred, None] | ||
else: | ||
pred = model(im, augment=False, visualize=False) | ||
# NMS | ||
with dt[2]: | ||
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) | ||
|
||
results_for_bounding_boxes = [] | ||
# Process predictions | ||
for _, det in enumerate(pred): # per image | ||
seen += 1 | ||
annotator = Annotator(im0, line_width=line_thickness, example=str(names)) | ||
if len(det): | ||
# Rescale boxes from img_size to im0 size | ||
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round() | ||
|
||
# Write results | ||
for *xyxy, conf, cls in reversed(det): | ||
c = int(cls) # integer class | ||
label = names[c] if hide_conf else f"{names[c]}" | ||
# Add bbox to image | ||
c = int(cls) # integer class | ||
label = None if hide_labels else (names[c] if hide_conf else f"{names[c]} {conf:.2f}") | ||
annotator.box_label(xyxy, label, color=colors(c, True)) | ||
|
||
###################### | ||
# TODO: check whether I need normalized xyxy or standard | ||
results_for_bounding_boxes.append(xyxy) | ||
###################### | ||
|
||
return im0, results_for_bounding_boxes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license | ||
|
||
# Parameters | ||
nc: 3 # number of classes | ||
depth_multiple: 0.33 # model depth multiple | ||
width_multiple: 0.50 # layer channel multiple | ||
anchors: | ||
- [10, 13, 16, 30, 33, 23] # P3/8 | ||
- [30, 61, 62, 45, 59, 119] # P4/16 | ||
- [116, 90, 156, 198, 373, 326] # P5/32 | ||
|
||
# YOLOv5 v6.0 backbone | ||
backbone: | ||
# [from, number, module, args] | ||
[ | ||
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 | ||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 | ||
[-1, 3, C3, [128]], | ||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 | ||
[-1, 6, C3, [256]], | ||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 | ||
[-1, 9, C3, [512]], | ||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 | ||
[-1, 3, C3, [1024]], | ||
[-1, 1, SPPF, [1024, 5]], # 9 | ||
] | ||
|
||
# YOLOv5 v6.0 head | ||
head: [ | ||
[-1, 1, Conv, [512, 1, 1]], | ||
[-1, 1, nn.Upsample, [None, 2, "nearest"]], | ||
[[-1, 6], 1, Concat, [1]], # cat backbone P4 | ||
[-1, 3, C3, [512, False]], # 13 | ||
|
||
[-1, 1, Conv, [256, 1, 1]], | ||
[-1, 1, nn.Upsample, [None, 2, "nearest"]], | ||
[[-1, 4], 1, Concat, [1]], # cat backbone P3 | ||
[-1, 3, C3, [256, False]], # 17 (P3/8-small) | ||
|
||
[-1, 1, Conv, [256, 3, 2]], | ||
[[-1, 14], 1, Concat, [1]], # cat head P4 | ||
[-1, 3, C3, [512, False]], # 20 (P4/16-medium) | ||
|
||
[-1, 1, Conv, [512, 3, 2]], | ||
[[-1, 10], 1, Concat, [1]], # cat head P5 | ||
[-1, 3, C3, [1024, False]], # 23 (P5/32-large) | ||
|
||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,50 +1,59 @@ | ||
# YOLOv5 requirements | ||
# Usage: pip install -r requirements.txt | ||
|
||
# Base ------------------------------------------------------------------------ | ||
gitpython>=3.1.30 | ||
matplotlib>=3.3 | ||
numpy>=1.23.5 | ||
opencv-python>=4.1.1 | ||
Pillow>=9.4.0 | ||
psutil # system resources | ||
PyYAML>=5.3.1 | ||
requests>=2.23.0 | ||
scipy>=1.4.1 | ||
thop>=0.1.1 # FLOPs computation | ||
torch>=1.8.0 # see https://pytorch.org/get-started/locally (recommended) | ||
torchvision>=0.9.0 | ||
tqdm>=4.64.0 | ||
ultralytics>=8.0.232 | ||
# protobuf<=3.20.1 # https://github.com/ultralytics/yolov5/issues/8012 | ||
|
||
# Logging --------------------------------------------------------------------- | ||
# tensorboard>=2.4.1 | ||
# clearml>=1.2.0 | ||
# comet | ||
|
||
# Plotting -------------------------------------------------------------------- | ||
pandas>=1.1.4 | ||
seaborn>=0.11.0 | ||
|
||
# Export ---------------------------------------------------------------------- | ||
# coremltools>=6.0 # CoreML export | ||
# onnx>=1.10.0 # ONNX export | ||
# onnx-simplifier>=0.4.1 # ONNX simplifier | ||
# nvidia-pyindex # TensorRT export | ||
# nvidia-tensorrt # TensorRT export | ||
# scikit-learn<=1.1.2 # CoreML quantization | ||
# tensorflow>=2.4.0,<=2.13.1 # TF exports (-cpu, -aarch64, -macos) | ||
# tensorflowjs>=3.9.0 # TF.js export | ||
# openvino-dev>=2023.0 # OpenVINO export | ||
|
||
# Deploy ---------------------------------------------------------------------- | ||
setuptools>=65.5.1 # Snyk vulnerability fix | ||
# tritonclient[all]~=2.24.0 | ||
|
||
# Extras ---------------------------------------------------------------------- | ||
# ipython # interactive notebook | ||
# mss # screenshots | ||
# albumentations>=1.0.3 | ||
# pycocotools>=2.0.6 # COCO mAP | ||
wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability | ||
certifi==2024.2.2 | ||
charset-normalizer==3.3.2 | ||
clip @ git+https://github.com/ultralytics/CLIP.git@e17416a36b45d040758327936a1ea150c13fe3d1 | ||
contourpy==1.2.0 | ||
cycler==0.12.1 | ||
depthai==2.25.0.0 | ||
filelock==3.13.3 | ||
fonttools==4.50.0 | ||
fsspec==2024.3.1 | ||
ftfy==6.2.0 | ||
gitdb==4.0.11 | ||
GitPython==3.1.43 | ||
idna==3.6 | ||
Jinja2==3.1.3 | ||
kiwisolver==1.4.5 | ||
MarkupSafe==2.1.5 | ||
matplotlib==3.8.3 | ||
mpmath==1.3.0 | ||
networkx==3.2.1 | ||
numpy==1.26.4 | ||
nvidia-cublas-cu12==12.1.3.1 | ||
nvidia-cuda-cupti-cu12==12.1.105 | ||
nvidia-cuda-nvrtc-cu12==12.1.105 | ||
nvidia-cuda-runtime-cu12==12.1.105 | ||
nvidia-cudnn-cu12==8.9.2.26 | ||
nvidia-cufft-cu12==11.0.2.54 | ||
nvidia-curand-cu12==10.3.2.106 | ||
nvidia-cusolver-cu12==11.4.5.107 | ||
nvidia-cusparse-cu12==12.1.0.106 | ||
nvidia-nccl-cu12==2.19.3 | ||
nvidia-nvjitlink-cu12==12.4.99 | ||
nvidia-nvtx-cu12==12.1.105 | ||
opencv-python==4.9.0.80 | ||
packaging==24.0 | ||
pandas==2.2.1 | ||
pillow==10.2.0 | ||
psutil==5.9.8 | ||
py-cpuinfo==9.0.0 | ||
pyparsing==3.1.2 | ||
python-dateutil==2.9.0.post0 | ||
pytz==2024.1 | ||
PyYAML==6.0.1 | ||
regex==2023.12.25 | ||
requests==2.31.0 | ||
scipy==1.12.0 | ||
seaborn==0.13.2 | ||
six==1.16.0 | ||
smmap==5.0.1 | ||
sympy==1.12 | ||
thop==0.1.1.post2209072238 | ||
torch==2.2.2 | ||
torchvision==0.17.2 | ||
tqdm==4.66.2 | ||
triton==2.2.0 | ||
typing_extensions==4.10.0 | ||
tzdata==2024.1 | ||
ultralytics==8.1.39 | ||
urllib3==2.2.1 | ||
wcwidth==0.2.13 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import OAK_D_api as oak | ||
import cv2 | ||
import torch | ||
from inference import run | ||
from models.common import DetectMultiBackend | ||
|
||
|
||
def run_object_detection(): | ||
|
||
# Load model | ||
weights_path = './runs/train/yolov5s_results3/weights/best.pt' | ||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | ||
model = DetectMultiBackend(weights_path, device=device, dnn=False, fp16=False) | ||
model.eval() | ||
|
||
# camera setup | ||
oak_d = oak.OAK_D(fps=60, width=1920, height=1080) | ||
|
||
while True: | ||
frame = oak_d.get_color_frame(show_fps=True) | ||
img, results_for_bounding_boxes = run(frame=frame, classes=[1], model=model) | ||
|
||
cv2.imshow("Levi", img) | ||
|
||
# Break the loop if 'q' key is pressed | ||
if cv2.waitKey(1) & 0xFF == ord('q'): | ||
cv2.destroyAllWindows() | ||
break | ||
|
||
if __name__ == '__main__': | ||
run_object_detection() | ||
|
Oops, something went wrong.