forked from robmarkcole/HASS-Deepstack-object
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimage_processing.py
585 lines (522 loc) · 21.1 KB
/
image_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
"""
Component that will perform object detection and identification via deepstack.
For more details about this platform, please refer to the documentation at
https://home-assistant.io/components/image_processing.deepstack_object
"""
from collections import namedtuple, Counter
import datetime
import io
import logging
import os
import re
from datetime import timedelta
from typing import Tuple, Dict, List
from pathlib import Path
from PIL import Image, ImageDraw
import deepstack.core as ds
import homeassistant.helpers.config_validation as cv
import homeassistant.util.dt as dt_util
import voluptuous as vol
from homeassistant.util.pil import draw_box
from homeassistant.components.image_processing import (
ATTR_CONFIDENCE,
CONF_CONFIDENCE,
CONF_ENTITY_ID,
CONF_NAME,
CONF_SOURCE,
DEFAULT_CONFIDENCE,
DOMAIN,
PLATFORM_SCHEMA,
ImageProcessingEntity,
)
from homeassistant.const import (
ATTR_ENTITY_ID,
ATTR_NAME,
CONF_IP_ADDRESS,
CONF_PORT,
)
from homeassistant.core import split_entity_id
_LOGGER = logging.getLogger(__name__)
ANIMAL = "animal"
ANIMALS = [
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
]
OTHER = "other"
PERSON = "person"
VEHICLE = "vehicle"
VEHICLES = ["bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck"]
OBJECT_TYPES = [ANIMAL, OTHER, PERSON, VEHICLE]
CONF_API_KEY = "api_key"
CONF_TARGET = "target"
CONF_TARGETS = "targets"
CONF_TIMEOUT = "timeout"
CONF_SAVE_FILE_FORMAT = "save_file_format"
CONF_SAVE_FILE_FOLDER = "save_file_folder"
CONF_SAVE_TIMESTAMPTED_FILE = "save_timestamped_file"
CONF_ALWAYS_SAVE_LATEST_FILE = "always_save_latest_file"
CONF_SHOW_BOXES = "show_boxes"
CONF_PREV_OBJECTS_IGNORE = "prev_objects_ignore" #giddy - boolean
CONF_PREV_OBJECTS_PCT = "prev_objects_pct" #giddy - 0-100% PERCENTAGE difference to ignore prev objects
CONF_ROI_Y_MIN = "roi_y_min"
CONF_ROI_X_MIN = "roi_x_min"
CONF_ROI_Y_MAX = "roi_y_max"
CONF_ROI_X_MAX = "roi_x_max"
CONF_SCALE = "scale"
CONF_CUSTOM_MODEL = "custom_model"
DATETIME_FORMAT = "%Y-%m-%d_%H-%M-%S" #giddy - removed microseconds
DEFAULT_API_KEY = ""
DEFAULT_TARGETS = [{CONF_TARGET: PERSON}]
DEFAULT_TIMEOUT = 10
DEFAULT_PREV_OBJECTS_PCT = 0.02
DEFAULT_ROI_Y_MIN = 0.0
DEFAULT_ROI_Y_MAX = 1.0
DEFAULT_ROI_X_MIN = 0.0
DEFAULT_ROI_X_MAX = 1.0
DEAULT_SCALE = 1.0
DEFAULT_ROI = (
DEFAULT_ROI_Y_MIN,
DEFAULT_ROI_X_MIN,
DEFAULT_ROI_Y_MAX,
DEFAULT_ROI_X_MAX,
)
EVENT_OBJECT_DETECTED = "deepstack.object_detected"
BOX = "box"
FILE = "file"
OBJECT = "object"
SAVED_FILE = "saved_file"
MIN_CONFIDENCE = 0.1
JPG = "jpg"
PNG = "png"
# rgb(red, green, blue)
RED = (255, 0, 0) # For objects within the ROI
GREEN = (0, 255, 0) # For ignored objects previously detected
PURPLE = (170, 0, 255) # For vehicles
SILVER = (192, 192, 192) #giddy - For ROI box
YELLOW = (255, 255, 0) # Unused
TARGETS_SCHEMA = {
vol.Required(CONF_TARGET): cv.string,
vol.Optional(CONF_CONFIDENCE): vol.All(
vol.Coerce(float), vol.Range(min=10, max=100)
),
}
PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
{
vol.Required(CONF_IP_ADDRESS): cv.string,
vol.Required(CONF_PORT): cv.port,
vol.Optional(CONF_API_KEY, default=DEFAULT_API_KEY): cv.string,
vol.Optional(CONF_TIMEOUT, default=DEFAULT_TIMEOUT): cv.positive_int,
vol.Optional(CONF_CUSTOM_MODEL, default=""): cv.string,
vol.Optional(CONF_TARGETS, default=DEFAULT_TARGETS): vol.All(
cv.ensure_list, [vol.Schema(TARGETS_SCHEMA)]
),
vol.Optional(CONF_ROI_Y_MIN, default=DEFAULT_ROI_Y_MIN): cv.small_float,
vol.Optional(CONF_ROI_X_MIN, default=DEFAULT_ROI_X_MIN): cv.small_float,
vol.Optional(CONF_ROI_Y_MAX, default=DEFAULT_ROI_Y_MAX): cv.small_float,
vol.Optional(CONF_ROI_X_MAX, default=DEFAULT_ROI_X_MAX): cv.small_float,
vol.Optional(CONF_SCALE, default=DEAULT_SCALE): vol.All(
vol.Coerce(float, vol.Range(min=0.1, max=1))
),
vol.Optional(CONF_SAVE_FILE_FOLDER): cv.isdir,
vol.Optional(CONF_SAVE_FILE_FORMAT, default=JPG): vol.In([JPG, PNG]),
vol.Optional(CONF_SAVE_TIMESTAMPTED_FILE, default=False): cv.boolean,
vol.Optional(CONF_ALWAYS_SAVE_LATEST_FILE, default=False): cv.boolean,
vol.Optional(CONF_SHOW_BOXES, default=True): cv.boolean,
vol.Optional(CONF_PREV_OBJECTS_IGNORE, default=True): cv.boolean,
vol.Optional(CONF_PREV_OBJECTS_PCT, default=DEFAULT_PREV_OBJECTS_PCT): cv.small_float,
}
)
Box = namedtuple("Box", "y_min x_min y_max x_max")
Point = namedtuple("Point", "y x")
def point_in_box(box: Box, point: Point) -> bool:
"""Return true if point lies in box"""
if (box.x_min <= point.x <= box.x_max) and (box.y_min <= point.y <= box.y_max):
return True
return False
def object_in_roi(roi: dict, centroid: dict) -> bool:
"""Convenience to convert dicts to the Point and Box."""
target_center_point = Point(centroid["y"], centroid["x"])
roi_box = Box(roi["y_min"], roi["x_min"], roi["y_max"], roi["x_max"])
return point_in_box(roi_box, target_center_point)
def get_valid_filename(name: str) -> str:
return re.sub(r"(?u)[^-\w.]", "", str(name).strip().replace(" ", "_"))
def get_object_type(object_name: str) -> str:
if object_name == PERSON:
return PERSON
elif object_name in ANIMALS:
return ANIMAL
elif object_name in VEHICLES:
return VEHICLE
else:
return OTHER
def get_objects(predictions: list, img_width: int, img_height: int) -> List[Dict]:
"""Return objects with formatting and extra info."""
objects = []
decimal_places = 3
for pred in predictions:
box_width = pred["x_max"] - pred["x_min"]
box_height = pred["y_max"] - pred["y_min"]
box = {
"height": round(box_height / img_height, decimal_places),
"width": round(box_width / img_width, decimal_places),
"y_min": round(pred["y_min"] / img_height, decimal_places),
"x_min": round(pred["x_min"] / img_width, decimal_places),
"y_max": round(pred["y_max"] / img_height, decimal_places),
"x_max": round(pred["x_max"] / img_width, decimal_places),
}
box_area = round(box["height"] * box["width"], decimal_places)
centroid = {
"x": round(box["x_min"] + (box["width"] / 2), decimal_places),
"y": round(box["y_min"] + (box["height"] / 2), decimal_places),
}
name = pred["label"]
object_type = get_object_type(name)
confidence = round(pred["confidence"] * 100, decimal_places)
objects.append(
{
"bounding_box": box,
"box_area": box_area,
"centroid": centroid,
"name": name,
"object_type": object_type,
"confidence": confidence,
}
)
return objects
def setup_platform(hass, config, add_devices, discovery_info=None):
"""Set up the classifier."""
save_file_folder = config.get(CONF_SAVE_FILE_FOLDER)
if save_file_folder:
save_file_folder = Path(save_file_folder)
entities = []
for camera in config[CONF_SOURCE]:
object_entity = ObjectClassifyEntity(
ip_address=config.get(CONF_IP_ADDRESS),
port=config.get(CONF_PORT),
api_key=config.get(CONF_API_KEY),
timeout=config.get(CONF_TIMEOUT),
custom_model=config.get(CONF_CUSTOM_MODEL),
targets=config.get(CONF_TARGETS),
confidence=config.get(CONF_CONFIDENCE),
roi_y_min=config[CONF_ROI_Y_MIN],
roi_x_min=config[CONF_ROI_X_MIN],
roi_y_max=config[CONF_ROI_Y_MAX],
roi_x_max=config[CONF_ROI_X_MAX],
scale=config[CONF_SCALE],
show_boxes=config[CONF_SHOW_BOXES],
prev_objs_ignore=config[CONF_PREV_OBJECTS_IGNORE],
prev_objs_pct=config[CONF_PREV_OBJECTS_PCT],
save_file_folder=save_file_folder,
save_file_format=config[CONF_SAVE_FILE_FORMAT],
save_timestamped_file=config.get(CONF_SAVE_TIMESTAMPTED_FILE),
always_save_latest_file=config.get(CONF_ALWAYS_SAVE_LATEST_FILE),
camera_entity=camera.get(CONF_ENTITY_ID),
name=camera.get(CONF_NAME),
)
entities.append(object_entity)
add_devices(entities)
class ObjectClassifyEntity(ImageProcessingEntity):
"""Perform a object classification."""
def __init__(
self,
ip_address,
port,
api_key,
timeout,
custom_model,
targets,
confidence,
roi_y_min,
roi_x_min,
roi_y_max,
roi_x_max,
scale,
show_boxes,
prev_objs_ignore,
prev_objs_pct,
save_file_folder,
save_file_format,
save_timestamped_file,
always_save_latest_file,
camera_entity,
name=None,
):
"""Init with the API key and model id."""
super().__init__()
self._dsobject = ds.DeepstackObject(
ip=ip_address,
port=port,
api_key=api_key,
timeout=timeout,
min_confidence=MIN_CONFIDENCE,
custom_model=custom_model,
)
self._custom_model = custom_model
self._confidence = confidence
self._summary = {}
self._targets = targets
for target in self._targets:
if CONF_CONFIDENCE not in target.keys():
target.update({CONF_CONFIDENCE: self._confidence})
self._targets_names = [
target[CONF_TARGET] for target in targets
] # can be a name or a type
self._camera = camera_entity
if name:
self._name = name
else:
camera_name = split_entity_id(camera_entity)[1]
self._name = "deepstack_object_{}".format(camera_name)
self._state = None
self._objects = [] # The parsed raw data
self._targets_found = []
self._targets_last = {} # used for storing the previous/last objects
self._targets_last[self._camera] = []
self._targets_latest = {} # used for comparing latest objects
self._targets_latest[self._camera] = []
self._last_detection = None
self._last_filename = None
self._roi_dict = {
"y_min": roi_y_min,
"x_min": roi_x_min,
"y_max": roi_y_max,
"x_max": roi_x_max,
}
self._scale = scale
self._show_boxes = show_boxes
self._prev_objs_ignore = prev_objs_ignore
self._prev_objs_pct = prev_objs_pct
self._image_width = None
self._image_height = None
self._save_file_folder = save_file_folder
self._save_file_format = save_file_format
self._always_save_latest_file = always_save_latest_file
self._save_timestamped_file = save_timestamped_file
self._always_save_latest_file = always_save_latest_file
self._image = None
def process_image(self, image):
"""Process an image."""
self._image = Image.open(io.BytesIO(bytearray(image)))
self._image_width, self._image_height = self._image.size
# resize image if different then default
if self._scale != DEAULT_SCALE:
newsize = (self._image_width * self._scale, self._image_width * self._scale)
self._image.thumbnail(newsize, Image.ANTIALIAS)
self._image_width, self._image_height = self._image.size
with io.BytesIO() as output:
self._image.save(output, format="JPEG")
image = output.getvalue()
_LOGGER.debug(
(
f"Image scaled with : {self._scale} W={self._image_width} H={self._image_height}"
)
)
self._state = None
self._objects = [] # The parsed raw data
self._targets_found = []
self._summary = {}
saved_image_path = None
try:
predictions = self._dsobject.detect(image)
except ds.DeepstackException as exc:
_LOGGER.error("Deepstack error : %s", exc)
return
self._objects = get_objects(predictions, self._image_width, self._image_height)
self._targets_found = []
real_targets_found = [] #only real targets excluding ignored targets
latest_targets_cp = {} #make copy of _targets_latest and pop matches, so its faster
if self._targets_latest and self._camera in self._targets_latest:
latest_targets_cp = self._targets_latest[self._camera]
self._targets_last[self._camera] = self._targets_latest[self._camera] #set the last targets to the existing latest
confidence = DEFAULT_CONFIDENCE
ignore_count = 0
target_count = 0
for obj in self._objects:
if not (
(obj["name"] in self._targets_names)
or (obj["object_type"] in self._targets_names)
):
continue
## Then check if the type has a configured confidence, if yes assign
## Then if a confidence for a named object, this takes precedence over type confidence
for target in self._targets:
if obj["object_type"] == target[CONF_TARGET]:
confidence = target[CONF_CONFIDENCE]
for target in self._targets:
if obj["name"] == target[CONF_TARGET]:
confidence = target[CONF_CONFIDENCE]
if obj["confidence"] > confidence: #FIXED CRASH here since confidence initialized as NONE and it doesn't match any targets from both loops above!
if not object_in_roi(self._roi_dict, obj["centroid"]):
continue
# Ignore target if it was previously detected
ignore = "false"
if self._prev_objs_ignore:
for last in latest_targets_cp:
if obj["name"] == last['name']: #FIXED CRASH here cuz last contains keys, should be last['name']
objBox = obj["bounding_box"]
lasBox = last["bounding_box"]
if (round(abs(objBox["x_min"]-lasBox["x_min"]),5) < self._prev_objs_pct) \
and (round(abs(objBox["x_max"]-lasBox["x_max"]),5) < self._prev_objs_pct) \
and (round(abs(objBox["y_min"]-lasBox["y_min"]),5) < self._prev_objs_pct) \
and (round(abs(objBox["y_max"]-lasBox["y_max"]),5) < self._prev_objs_pct):
ignore = "true"
ignore_count += 1
break
else: continue
obj["ignore"] = ignore
obj["ignoreCount"] = ignore_count
self._targets_found.append(obj)
if ignore == "false":
target_count += 1
real_targets_found.append(obj)
#END for obj in self._objects
real_targets_found = [
obj["name"] for obj in real_targets_found #self._targets_found
] # Just the list of target names, e.g. [car, car, person]
self._summary = dict(Counter(real_targets_found)) # e.g. {'car':2, 'person':1}
target_event_data = {}
self._state = target_count #len(self._targets_found)
if self._state > 0:
# Set last_detection time
self._last_detection = dt_util.now().strftime(DATETIME_FORMAT)
# Fire 1 event of all detected objects, with 'targets_found' and 'summary'
target_event_data[ATTR_ENTITY_ID] = self.entity_id
target_event_data["targets_found"] = real_targets_found
target_event_data["summary"] = self._summary
if saved_image_path:
target_event_data[SAVED_FILE] = saved_image_path
self.hass.bus.fire(EVENT_OBJECT_DETECTED, target_event_data)
if self._save_file_folder:
if self._state > 0 or self._always_save_latest_file:
saved_image_path = self.save_image(
self._targets_found,
self._save_file_folder,
)
# for target in self._targets_found:
# target_event_data = target.copy()
# target_event_data[ATTR_ENTITY_ID] = self.entity_id
# if saved_image_path:
# target_event_data[SAVED_FILE] = saved_image_path
# self.hass.bus.fire(EVENT_OBJECT_DETECTED, target_event_data)
self._targets_latest[self._camera] = self._targets_found #save the targets for next time
@property
def camera_entity(self):
"""Return camera entity id from process pictures."""
return self._camera
@property
def state(self):
"""Return the state of the entity."""
return self._state
@property
def name(self):
"""Return the name of the sensor."""
return self._name
@property
def unit_of_measurement(self):
"""Return the unit of measurement."""
return "targets"
@property
def should_poll(self):
"""Return the polling state."""
return False
@property
def extra_state_attributes(self) -> Dict:
"""Return device specific state attributes."""
attr = {}
attr["targets"] = self._targets
attr["targets_last"] = [
{obj["name"]: obj["confidence"], "bounding_box": obj["bounding_box"]} for obj in self._targets_last[self._camera]
]
attr["targets_found"] = [
{obj["name"]: obj["confidence"], "bounding_box": obj["bounding_box"], "ignore": obj["ignore"]} for obj in self._targets_found
]
attr["summary"] = self._summary
if self._last_detection:
attr["last_target_detection"] = self._last_detection
if self._last_filename:
attr["last_filename"] = str(self._last_filename) #convert path to str otherwise will get error "Object of type PosixPath is not JSON serializable"
if self._custom_model:
attr["custom_model"] = self._custom_model
attr["all_objects"] = [
{obj["name"]: obj["confidence"], "bounding_box": obj["bounding_box"]} for obj in self._objects
]
if self._save_file_folder:
attr[CONF_SAVE_FILE_FOLDER] = str(self._save_file_folder)
attr[CONF_SAVE_FILE_FORMAT] = self._save_file_format
attr[CONF_SAVE_TIMESTAMPTED_FILE] = self._save_timestamped_file
attr[CONF_ALWAYS_SAVE_LATEST_FILE] = self._always_save_latest_file
return attr
def save_image(self, targets, directory) -> str:
"""Draws the actual bounding box of the detected objects.
Returns: saved_image_path, which is the path to the saved timestamped file if configured, else the default saved image.
"""
try:
img = self._image.convert("RGB")
except UnidentifiedImageError:
_LOGGER.warning("Deepstack unable to process image, bad data")
return
draw = ImageDraw.Draw(img)
roi_tuple = tuple(self._roi_dict.values())
if roi_tuple != DEFAULT_ROI and self._show_boxes:
draw_box(
draw,
roi_tuple,
img.width,
img.height,
text="ROI",
color=SILVER,
)
for obj in targets:
if not self._show_boxes:
break
name = obj["name"]
confidence = obj["confidence"]
box = obj["bounding_box"]
centroid = obj["centroid"]
box_label = f"{name}: {confidence:.0f}%"
boxColor = PURPLE
if obj["ignore"] == "true":
boxColor = GREEN
elif obj["object_type"]==PERSON:
boxColor = RED
draw_box(
draw,
(box["y_min"], box["x_min"], box["y_max"], box["x_max"]),
img.width,
img.height,
text=box_label,
color=boxColor,
)
# draw bullseye
draw.text(
(centroid["x"] * img.width, centroid["y"] * img.height),
text="X",
fill=boxColor,
)
# Save images, returning the path of saved image as str
latest_save_path = (
directory
/ f"{get_valid_filename(self._name.replace('deepstack_object_', '')).lower()}_latest.{self._save_file_format}"
)
img.save(latest_save_path)
_LOGGER.info("Deepstack saved file %s", latest_save_path)
saved_image_path = latest_save_path
if self._save_timestamped_file:
timestamp_save_path = (
directory
/ f"{self._name.replace('deepstack_object_', '')}_{self._last_detection}.{self._save_file_format}"
)
img.save(timestamp_save_path)
_LOGGER.info("Deepstack saved file %s", timestamp_save_path)
saved_image_path = timestamp_save_path
# Only update last_filename if state > 0
if (self._state > 0): self._last_filename = saved_image_path
return str(saved_image_path)