Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gaze estimator experiment in v3 #529

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
8 changes: 8 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[submodule "depthai-core"]
path = depthai-core
url = https://github.com/luxonis/depthai-core.git
branch = v3_develop
[submodule "gen3-gaze-estimation-cpp/depthai-core"]
path = gen3-gaze-estimation-cpp/depthai-core
url = https://github.com/luxonis/depthai-core.git
branch = v3_develop
1 change: 1 addition & 0 deletions depthai-core
Submodule depthai-core added at 777c2c
2 changes: 1 addition & 1 deletion gen2-gaze-estimation/MultiMsgSync.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ def get_msgs(self):
seq_remove = [] # Arr of sequence numbers to get deleted

for seq, msgs in self.msgs.items():
print(seq)
seq_remove.append(seq) # Will get removed from dict if we find synced msgs pair

# Check if we have both detections and color frame with this sequence number
if "color" in msgs and "len" in msgs:

Expand Down
Binary file not shown.
16 changes: 9 additions & 7 deletions gen2-gaze-estimation/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,13 @@ def create_output(name: str, output: dai.Node.Output):
print("Creating Face Detection Neural Network...")
face_det_nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
face_det_nn.setConfidenceThreshold(0.5)
face_det_nn.setBlobPath(blobconverter.from_zoo(
name="face-detection-retail-0004",
shaves=6,
version=openvino_version
))
#face_det_nn.setBlobPath(blobconverter.from_zoo(
# name="face-detection-retail-0004",
# shaves=6,
# version=openvino_version
#))
face_det_nn.setBlobPath("face-detection-retail-0004.blob")

# Link Face ImageManip -> Face detection NN node
face_det_manip.out.link(face_det_nn.input)

Expand Down Expand Up @@ -148,7 +150,6 @@ def create_output(name: str, output: dai.Node.Output):
script.inputs['none'].setQueueSize(1)

create_output('gaze', gaze_nn.out)

#==================================================

with dai.Device(pipeline) as device:
Expand All @@ -170,6 +171,7 @@ def create_output(name: str, output: dai.Node.Output):

msgs = sync.get_msgs()
if msgs is not None:
print("adasd")
frame = msgs["color"].getCvFrame()
dets = msgs["detection"].detections
for i, detection in enumerate(dets):
Expand All @@ -195,4 +197,4 @@ def create_output(name: str, output: dai.Node.Output):
cv2.imshow("Lasers", frame)

if cv2.waitKey(1) == ord('q'):
break
break
2 changes: 2 additions & 0 deletions gen3-gaze-estimation-cpp/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.vscode/
build/
4 changes: 4 additions & 0 deletions gen3-gaze-estimation-cpp/.gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[submodule "depthai-core"]
path = depthai-core
url = https://github.com/luxonis/depthai-core.git
branch = v3_develop
55 changes: 55 additions & 0 deletions gen3-gaze-estimation-cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
cmake_minimum_required(VERSION 3.4)

# Add depthai-core dependency
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/depthai-core EXCLUDE_FROM_ALL)

# Create a project with name 'gen3'
set(TARGET_NAME gen3)
project(${TARGET_NAME})

# Dependencies (optional, only used for example)
find_package(OpenCV REQUIRED)

# Add source files
add_executable("${TARGET_NAME}"
src/main.cpp
)

# Link with libraries
target_link_libraries(${TARGET_NAME}
PUBLIC
depthai::core
${OpenCV_LIBS} # optional, used for example
)

# Copy files to /build
file(COPY script.py DESTINATION "${CMAKE_CURRENT_BINARY_DIR}")
file(COPY face-detection-retail-0004.blob DESTINATION "${CMAKE_CURRENT_BINARY_DIR}")
file(COPY gaze-estimation-adas-0002.blob DESTINATION "${CMAKE_CURRENT_BINARY_DIR}")
file(COPY head-pose-estimation-adas-0001.blob DESTINATION "${CMAKE_CURRENT_BINARY_DIR}")
file(COPY landmarks-regression-retail-0009.blob DESTINATION "${CMAKE_CURRENT_BINARY_DIR}")


# Set some errors
if(NOT MSVC)
target_compile_options(${TARGET_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-Werror=return-type>)
endif()

# Set compiler features (c++14)
set_property(TARGET ${TARGET_NAME} PROPERTY CXX_STANDARD 17)


# Windows - Add runtime dependencies
if(WIN32)
if(CMAKE_VERSION VERSION_LESS "3.21")
message(WARNING "CMake version less than 3.21 - automatic DLL handling not available. Make sure to copy required DLLs to the same folder as .exe")
else()
# TARGET_RUNTIME_DLLS generator expression available since CMake 3.21
set(depthai_dll_libraries "$<TARGET_RUNTIME_DLLS:${TARGET_NAME}>")
# Copy the required dlls
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD COMMAND
${CMAKE_COMMAND} -E copy_if_different ${depthai_dll_libraries} $<TARGET_FILE_DIR:${TARGET_NAME}>
COMMAND_EXPAND_LISTS
)
endif()
endif()
1 change: 1 addition & 0 deletions gen3-gaze-estimation-cpp/depthai-core
Submodule depthai-core added at a851d0
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
152 changes: 152 additions & 0 deletions gen3-gaze-estimation-cpp/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import time
sync = {} # Dict of messages

def find_in_dict(target_seq, name):
if str(target_seq) in sync:
return sync[str(target_seq)][name]

def add_to_dict(det, seq, name):
sync[str(seq)][name] = det

def correct_bb(bb):
if bb.xmin < 0: bb.xmin = 0.001
if bb.ymin < 0: bb.ymin = 0.001
if bb.xmax > 1: bb.xmax = 0.999
if bb.ymax > 1: bb.ymax = 0.999

def check_gaze_est(seq):
dict = sync[str(seq)]

if "left" in dict and "right" in dict and "angles" in dict:
# node.warn("GOT ALL 3")
# Send to gaze estimation NN
node.io['to_gaze_left'].send(dict['left'])
node.io['to_gaze_right'].send(dict['right'])
head_pose = NNData(6)
head_pose.setLayer("head_pose_angles", dict['angles'])
node.io['to_gaze_head'].send(head_pose)

# Clear previous results
for i, sq in enumerate(sync):
del sync[str(seq)]
if str(seq) == str(sq):
return

PAD = 0.15
PAD2x = PAD * 2
def get_eye_coords(x, y, det):
xdelta = det.xmax - det.xmin
ydelta = det.ymax - det.ymin

xmin = x - PAD
xmax = xmin + PAD2x
ymin = y - PAD
ymax = ymin + PAD2x

xmin2 = det.xmin + xdelta * xmin
xmax2 = det.xmin + xdelta * xmax
ymin2 = det.ymin + ydelta * ymin
ymax2 = det.ymin + ydelta * ymax
ret = (xmin2, ymin2, xmax2, ymax2)
# node.warn(f"Eye: {x}/{y}, Crop eyes: {ret}, det {det.xmin}, {det.ymin}, {det.xmax}, {det.ymax}")
return ret

while True:
time.sleep(0.001)

preview = node.io['preview'].tryGet()
if preview is not None:
sync[str(preview.getSequenceNum())] = {
"frame": preview
}
# node.warn(f"New frame, {len(sync)}")

face_dets = node.io['face_det_in'].tryGet()
if face_dets is not None:
passthrough = node.io['face_pass'].get()
seq = passthrough.getSequenceNum()

# No detections, carry on
if len(face_dets.detections) == 0:
del sync[str(seq)]
continue

#node.warn(f"New detection {seq}")
if len(sync) == 0: continue
img = find_in_dict(seq, "frame")
if img is None: continue

add_to_dict(face_dets.detections[0], seq, "detections")

for det in face_dets.detections:
correct_bb(det)

# To head post estimation model
cfg1 = ImageManipConfig()
cfg1.setCropRect(det.xmin, det.ymin, det.xmax, det.ymax)
cfg1.setResize(60, 60)
cfg1.setKeepAspectRatio(False)
node.io['headpose_cfg'].send(cfg1)
node.io['headpose_img'].send(img)

# To face landmark detection model
cfg2 = ImageManipConfig()
cfg2.setCropRect(det.xmin, det.ymin, det.xmax, det.ymax)
cfg2.setResize(48, 48)
cfg2.setKeepAspectRatio(False)
node.io['landmark_cfg'].send(cfg2)
node.io['landmark_img'].send(img)
break # Only 1 face at the time currently supported

headpose = node.io['headpose_in'].tryGet()
if headpose is not None:
passthrough = node.io['headpose_pass'].get()
seq = passthrough.getSequenceNum()
# Face rotation in degrees
y = headpose.getLayerFp16('angle_y_fc')[0]
p = headpose.getLayerFp16('angle_p_fc')[0]
r = headpose.getLayerFp16('angle_r_fc')[0]
angles = [y,p,r]
# node.warn(f"angles {angles}")
add_to_dict(angles, seq, "angles")
check_gaze_est(seq)

landmark_in = node.io['landmark_in'].tryGet()
if landmark_in is not None:
passthrough = node.io['landmark_pass'].get()
seq = passthrough.getSequenceNum()

img = find_in_dict(seq, "frame")
det = find_in_dict(seq, "detections")
if img is None or det is None: continue

landmarks = landmark_in.getFirstLayerFp16()

# We need to crop left and right eye out of the face frame
left_cfg = ImageManipConfig()
left_cfg.setCropRect(*get_eye_coords(landmarks[0], landmarks[1], det))
left_cfg.setResize(60, 60)
left_cfg.setKeepAspectRatio(False)
node.io['left_manip_cfg'].send(left_cfg)
node.io['left_manip_img'].send(img)

right_cfg = ImageManipConfig()
right_cfg.setCropRect(*get_eye_coords(landmarks[2], landmarks[3], det))
right_cfg.setResize(60, 60)
right_cfg.setKeepAspectRatio(False)
node.io['right_manip_cfg'].send(right_cfg)
node.io['right_manip_img'].send(img)

left_eye = node.io['left_eye_in'].tryGet()
if left_eye is not None:
# node.warn("LEFT EYE GOT")
seq = left_eye.getSequenceNum()
add_to_dict(left_eye, seq, "left")
check_gaze_est(seq)

right_eye = node.io['right_eye_in'].tryGet()
if right_eye is not None:
# node.warn("RIGHT EYE GOT")
seq = right_eye.getSequenceNum()
add_to_dict(right_eye, seq, "right")
check_gaze_est(seq)
57 changes: 57 additions & 0 deletions gen3-gaze-estimation-cpp/src/MultiMsgSync.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Color frames (ImgFrame), object detection (ImgDetections) and age/gender gaze (NNData)
// messages arrive to the host all with some additional delay.
// For each ImgFrame there's one ImgDetections msg, which has multiple detections, and for each
// detection there's a NNData msg which contains age/gender gaze results.//
// How it works:
// Every ImgFrame, ImgDetections and NNData message has it's own sequence number, by which we can sync messages.

#include <map>
#include <memory>
#include <string>
#include <utility>
#include <depthai/depthai.hpp>

class TwoStageHostSeqSync{
public:
TwoStageHostSeqSync(){
msgs.clear();
}
// name: color,detection or gaze
void add_msg(std::shared_ptr<dai::MessageQueue> msg, std::string name){
int64_t f = -1;
if(name == "gaze" || name == "landmarks")
f = msg->get<dai::NNData>()->getSequenceNum();
else if(name == "color")
f = msg->get<dai::ImgFrame>()->getSequenceNum();
else f = msg->get<dai::ImgDetections>()->getSequenceNum();
auto seq = std::to_string(f);
msgs[seq][name].push_back(msg);
}

std::pair<std::map<std::string,std::vector<std::shared_ptr<dai::MessageQueue>>>,int> get_msgs(){
//std::cout<<"msgs size: "<<msgs.size()<<"\n";
std::vector<std::string> seq_remove;

for(auto it = msgs.begin(); it != msgs.end();it++){
auto seq = it->first;
auto r_msgs = it->second;

seq_remove.push_back(seq); // Will get removed from dict if we find synced msgs pairs
// Check if we have both detections and color frame with this sequence number
if(r_msgs.count("color") > 0 && r_msgs.count("detection") > 0){
// Check if all detected objects (faces) have finished gaze (age/gender) inference
if(0 < r_msgs["gaze"].size()){
// We have synced msgs, remove previous msgs (memory cleaning)
for(auto rm : seq_remove){
msgs[rm].clear();
}
return {r_msgs,0}; // Returned synced msgs
}
}
}
return {msgs["-1"],-1}; // No synced msgs
}

private:
std::map<std::string,std::map<std::string,std::vector<std::shared_ptr<dai::MessageQueue>>>> msgs;
};
53 changes: 53 additions & 0 deletions gen3-gaze-estimation-cpp/src/bbox.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#include <array>
#include <vector>
#include <depthai/depthai.hpp>

class Point{
//Used within the BoundingBox class when dealing with points.
public:
Point(float x,float y) : x(x),y(y){}
//Denormalize the point to pixel coordinates (0..frame width, 0..frame height)
std::array<int,2> denormalize(std::vector<int> frame_shape){
return {(int)(x * (float)frame_shape[1]), int(y * (float)frame_shape[0])};
}

private:
float x,y;
};


class BoundingBox{
//This class helps with bounding box calculations. It can be used to calculate relative bounding boxes,
//map points from relative to absolute coordinates and vice versa, crop frames, etc.
public:
BoundingBox(dai::ImgDetection bbox){
xmin = bbox.xmin,ymin = bbox.ymin,xmax = bbox.xmax,ymax = bbox.ymax;
width = xmax-xmin,height=ymax-ymin;
}


std::array<int,4> denormalize(std::vector<int> frame_shape){
/*
Denormalize the bounding box to pixel coordinates (0..frame width, 0..frame height).
Useful when you want to draw the bounding box on the frame.

*/
return {
(int)(frame_shape[1] * xmin),(int)(frame_shape[0] * ymin),
(int)(frame_shape[1] * xmax),(int)(frame_shape[0] * ymax)
};
}

Point map_point(float x,float y){
/*
Useful when you have a point inside the bounding box, and you want to map it to the frame.
Example: You run face detection, create BoundingBox from the result, and also run
facial landmarks detection on the cropped frame of the face. The landmarks are relative
to the face bounding box, but you want to draw them on the original frame.
*/
float mapped_x = xmin + width * x, mapped_y = ymin + height * y;
return Point(mapped_x, mapped_y);
}
private:
float xmin,ymin,xmax,ymax,width,height;
};
Loading