custom_components/deepstack_object/image_processing.py

"""
Component that will perform object detection and identification via deepstack.

For more details about this platform, please refer to the documentation at
https://home-assistant.io/components/image_processing.deepstack_object
"""
from collections import namedtuple, Counter
import datetime
import io
import logging
import os
import re
from datetime import timedelta
from typing import Tuple, Dict, List
from pathlib import Path

from PIL import Image, ImageDraw

import deepstack.core as ds
import homeassistant.helpers.config_validation as cv
import homeassistant.util.dt as dt_util
import voluptuous as vol
from homeassistant.util.pil import draw_box
from homeassistant.components.image_processing import (
    ATTR_CONFIDENCE,
    CONF_CONFIDENCE,
    CONF_ENTITY_ID,
    CONF_NAME,
    CONF_SOURCE,
    DEFAULT_CONFIDENCE,
    DOMAIN,
    PLATFORM_SCHEMA,
    ImageProcessingEntity,
)
from homeassistant.const import (
    ATTR_ENTITY_ID,
    ATTR_NAME,
    CONF_IP_ADDRESS,
    CONF_PORT,
)
from homeassistant.core import split_entity_id

_LOGGER = logging.getLogger(__name__)

ANIMAL = "animal"
ANIMALS = [
    "bird",
    "cat",
    "dog",
    "horse",
    "sheep",
    "cow",
    "elephant",
    "bear",
    "zebra",
    "giraffe",
]
OTHER = "other"
PERSON = "person"
VEHICLE = "vehicle"
VEHICLES = ["bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck"]
OBJECT_TYPES = [ANIMAL, OTHER, PERSON, VEHICLE]


CONF_API_KEY = "api_key"
CONF_TARGET = "target"
CONF_TARGETS = "targets"
CONF_TIMEOUT = "timeout"
CONF_SAVE_FILE_FORMAT = "save_file_format"
CONF_SAVE_FILE_FOLDER = "save_file_folder"
CONF_SAVE_TIMESTAMPTED_FILE = "save_timestamped_file"
CONF_ALWAYS_SAVE_LATEST_FILE = "always_save_latest_file"
CONF_SHOW_BOXES = "show_boxes"
CONF_PREV_OBJECTS_IGNORE = "prev_objects_ignore"    #giddy - boolean
CONF_PREV_OBJECTS_PCT = "prev_objects_pct"  #giddy - 0-100% PERCENTAGE difference to ignore prev objects
CONF_ROI_Y_MIN = "roi_y_min"
CONF_ROI_X_MIN = "roi_x_min"
CONF_ROI_Y_MAX = "roi_y_max"
CONF_ROI_X_MAX = "roi_x_max"
CONF_SCALE = "scale"
CONF_CUSTOM_MODEL = "custom_model"

DATETIME_FORMAT = "%Y-%m-%d_%H-%M-%S"   #giddy - removed microseconds
DEFAULT_API_KEY = ""
DEFAULT_TARGETS = [{CONF_TARGET: PERSON}]
DEFAULT_TIMEOUT = 10
DEFAULT_PREV_OBJECTS_PCT = 0.02
DEFAULT_ROI_Y_MIN = 0.0
DEFAULT_ROI_Y_MAX = 1.0
DEFAULT_ROI_X_MIN = 0.0
DEFAULT_ROI_X_MAX = 1.0
DEAULT_SCALE = 1.0
DEFAULT_ROI = (
    DEFAULT_ROI_Y_MIN,
    DEFAULT_ROI_X_MIN,
    DEFAULT_ROI_Y_MAX,
    DEFAULT_ROI_X_MAX,
)

EVENT_OBJECT_DETECTED = "deepstack.object_detected"
BOX = "box"
FILE = "file"
OBJECT = "object"
SAVED_FILE = "saved_file"
MIN_CONFIDENCE = 0.1
JPG = "jpg"
PNG = "png"

# rgb(red, green, blue)
RED = (255, 0, 0)  # For objects within the ROI
GREEN = (0, 255, 0)  # For ignored objects previously detected
PURPLE = (170, 0, 255)    # For vehicles
SILVER = (192, 192, 192)  #giddy - For ROI box
YELLOW = (255, 255, 0)  # Unused

TARGETS_SCHEMA = {
    vol.Required(CONF_TARGET): cv.string,
    vol.Optional(CONF_CONFIDENCE): vol.All(
        vol.Coerce(float), vol.Range(min=10, max=100)
    ),
}


PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
    {
        vol.Required(CONF_IP_ADDRESS): cv.string,
        vol.Required(CONF_PORT): cv.port,
        vol.Optional(CONF_API_KEY, default=DEFAULT_API_KEY): cv.string,
        vol.Optional(CONF_TIMEOUT, default=DEFAULT_TIMEOUT): cv.positive_int,
        vol.Optional(CONF_CUSTOM_MODEL, default=""): cv.string,
        vol.Optional(CONF_TARGETS, default=DEFAULT_TARGETS): vol.All(
            cv.ensure_list, [vol.Schema(TARGETS_SCHEMA)]
        ),
        vol.Optional(CONF_ROI_Y_MIN, default=DEFAULT_ROI_Y_MIN): cv.small_float,
        vol.Optional(CONF_ROI_X_MIN, default=DEFAULT_ROI_X_MIN): cv.small_float,
        vol.Optional(CONF_ROI_Y_MAX, default=DEFAULT_ROI_Y_MAX): cv.small_float,
        vol.Optional(CONF_ROI_X_MAX, default=DEFAULT_ROI_X_MAX): cv.small_float,
        vol.Optional(CONF_SCALE, default=DEAULT_SCALE): vol.All(
            vol.Coerce(float, vol.Range(min=0.1, max=1))
        ),
        vol.Optional(CONF_SAVE_FILE_FOLDER): cv.isdir,
        vol.Optional(CONF_SAVE_FILE_FORMAT, default=JPG): vol.In([JPG, PNG]),
        vol.Optional(CONF_SAVE_TIMESTAMPTED_FILE, default=False): cv.boolean,
        vol.Optional(CONF_ALWAYS_SAVE_LATEST_FILE, default=False): cv.boolean,
        vol.Optional(CONF_SHOW_BOXES, default=True): cv.boolean,
        vol.Optional(CONF_PREV_OBJECTS_IGNORE, default=True): cv.boolean,
        vol.Optional(CONF_PREV_OBJECTS_PCT, default=DEFAULT_PREV_OBJECTS_PCT): cv.small_float,
    }
)

Box = namedtuple("Box", "y_min x_min y_max x_max")
Point = namedtuple("Point", "y x")


def point_in_box(box: Box, point: Point) -> bool:
    """Return true if point lies in box"""
    if (box.x_min <= point.x <= box.x_max) and (box.y_min <= point.y <= box.y_max):
        return True
    return False


def object_in_roi(roi: dict, centroid: dict) -> bool:
    """Convenience to convert dicts to the Point and Box."""
    target_center_point = Point(centroid["y"], centroid["x"])
    roi_box = Box(roi["y_min"], roi["x_min"], roi["y_max"], roi["x_max"])
    return point_in_box(roi_box, target_center_point)


def get_valid_filename(name: str) -> str:
    return re.sub(r"(?u)[^-\w.]", "", str(name).strip().replace(" ", "_"))


def get_object_type(object_name: str) -> str:
    if object_name == PERSON:
        return PERSON
    elif object_name in ANIMALS:
        return ANIMAL
    elif object_name in VEHICLES:
        return VEHICLE
    else:
        return OTHER


def get_objects(predictions: list, img_width: int, img_height: int) -> List[Dict]:
    """Return objects with formatting and extra info."""
    objects = []
    decimal_places = 3
    for pred in predictions:
        box_width = pred["x_max"] - pred["x_min"]
        box_height = pred["y_max"] - pred["y_min"]
        box = {
            "height": round(box_height / img_height, decimal_places),
            "width": round(box_width / img_width, decimal_places),
            "y_min": round(pred["y_min"] / img_height, decimal_places),
            "x_min": round(pred["x_min"] / img_width, decimal_places),
            "y_max": round(pred["y_max"] / img_height, decimal_places),
            "x_max": round(pred["x_max"] / img_width, decimal_places),
        }
        box_area = round(box["height"] * box["width"], decimal_places)
        centroid = {
            "x": round(box["x_min"] + (box["width"] / 2), decimal_places),
            "y": round(box["y_min"] + (box["height"] / 2), decimal_places),
        }
        name = pred["label"]
        object_type = get_object_type(name)
        confidence = round(pred["confidence"] * 100, decimal_places)

        objects.append(
            {
                "bounding_box": box,
                "box_area": box_area,
                "centroid": centroid,
                "name": name,
                "object_type": object_type,
                "confidence": confidence,
            }
        )
    return objects


def setup_platform(hass, config, add_devices, discovery_info=None):
    """Set up the classifier."""
    save_file_folder = config.get(CONF_SAVE_FILE_FOLDER)
    if save_file_folder:
        save_file_folder = Path(save_file_folder)

    entities = []
    for camera in config[CONF_SOURCE]:
        object_entity = ObjectClassifyEntity(
            ip_address=config.get(CONF_IP_ADDRESS),
            port=config.get(CONF_PORT),
            api_key=config.get(CONF_API_KEY),
            timeout=config.get(CONF_TIMEOUT),
            custom_model=config.get(CONF_CUSTOM_MODEL),
            targets=config.get(CONF_TARGETS),
            confidence=config.get(CONF_CONFIDENCE),
            roi_y_min=config[CONF_ROI_Y_MIN],
            roi_x_min=config[CONF_ROI_X_MIN],
            roi_y_max=config[CONF_ROI_Y_MAX],
            roi_x_max=config[CONF_ROI_X_MAX],
            scale=config[CONF_SCALE],
            show_boxes=config[CONF_SHOW_BOXES],
            prev_objs_ignore=config[CONF_PREV_OBJECTS_IGNORE],
            prev_objs_pct=config[CONF_PREV_OBJECTS_PCT],
            save_file_folder=save_file_folder,
            save_file_format=config[CONF_SAVE_FILE_FORMAT],
            save_timestamped_file=config.get(CONF_SAVE_TIMESTAMPTED_FILE),
            always_save_latest_file=config.get(CONF_ALWAYS_SAVE_LATEST_FILE),
            camera_entity=camera.get(CONF_ENTITY_ID),
            name=camera.get(CONF_NAME),
        )
        entities.append(object_entity)
    add_devices(entities)


class ObjectClassifyEntity(ImageProcessingEntity):
    """Perform a object classification."""

    def __init__(
        self,
        ip_address,
        port,
        api_key,
        timeout,
        custom_model,
        targets,
        confidence,
        roi_y_min,
        roi_x_min,
        roi_y_max,
        roi_x_max,
        scale,
        show_boxes,
        prev_objs_ignore,
        prev_objs_pct,
        save_file_folder,
        save_file_format,
        save_timestamped_file,
        always_save_latest_file,
        camera_entity,
        name=None,
    ):
        """Init with the API key and model id."""
        super().__init__()
        self._dsobject = ds.DeepstackObject(
            ip=ip_address,
            port=port,
            api_key=api_key,
            timeout=timeout,
            min_confidence=MIN_CONFIDENCE,
            custom_model=custom_model,
        )
        self._custom_model = custom_model
        self._confidence = confidence
        self._summary = {}
        self._targets = targets
        for target in self._targets:
            if CONF_CONFIDENCE not in target.keys():
                target.update({CONF_CONFIDENCE: self._confidence})
        self._targets_names = [
            target[CONF_TARGET] for target in targets
        ]  # can be a name or a type
        self._camera = camera_entity
        if name:
            self._name = name
        else:
            camera_name = split_entity_id(camera_entity)[1]
            self._name = "deepstack_object_{}".format(camera_name)

        self._state = None
        self._objects = []  # The parsed raw data
        self._targets_found = []
        self._targets_last = {} # used for storing the previous/last objects
        self._targets_last[self._camera] = []
        self._targets_latest = {} # used for comparing latest objects
        self._targets_latest[self._camera] = []
        self._last_detection = None
        self._last_filename = None

        self._roi_dict = {
            "y_min": roi_y_min,
            "x_min": roi_x_min,
            "y_max": roi_y_max,
            "x_max": roi_x_max,
        }
        self._scale = scale
        self._show_boxes = show_boxes
        self._prev_objs_ignore = prev_objs_ignore
        self._prev_objs_pct = prev_objs_pct
        self._image_width = None
        self._image_height = None
        self._save_file_folder = save_file_folder
        self._save_file_format = save_file_format
        self._always_save_latest_file = always_save_latest_file
        self._save_timestamped_file = save_timestamped_file
        self._always_save_latest_file = always_save_latest_file
        self._image = None

    def process_image(self, image):
        """Process an image."""
        self._image = Image.open(io.BytesIO(bytearray(image)))
        self._image_width, self._image_height = self._image.size

        # resize image if different then default
        if self._scale != DEAULT_SCALE:
            newsize = (self._image_width * self._scale, self._image_width * self._scale)
            self._image.thumbnail(newsize, Image.ANTIALIAS)
            self._image_width, self._image_height = self._image.size
            with io.BytesIO() as output:
                self._image.save(output, format="JPEG")
                image = output.getvalue()
            _LOGGER.debug(
                (
                    f"Image scaled with : {self._scale} W={self._image_width} H={self._image_height}"
                )
            )

        self._state = None
        self._objects = []  # The parsed raw data
        self._targets_found = []
        self._summary = {}
        saved_image_path = None

        try:
            predictions = self._dsobject.detect(image)
        except ds.DeepstackException as exc:
            _LOGGER.error("Deepstack error : %s", exc)
            return

        self._objects = get_objects(predictions, self._image_width, self._image_height)
        self._targets_found = []
        real_targets_found = []     #only real targets excluding ignored targets
        latest_targets_cp = {}    #make copy of _targets_latest and pop matches, so its faster
        if self._targets_latest and self._camera in self._targets_latest:
            latest_targets_cp = self._targets_latest[self._camera]
            self._targets_last[self._camera] = self._targets_latest[self._camera]   #set the last targets to the existing latest

        confidence = DEFAULT_CONFIDENCE
        ignore_count = 0
        target_count = 0
        for obj in self._objects:
            if not (
                (obj["name"] in self._targets_names)
                or (obj["object_type"] in self._targets_names)
            ):
                continue
            ## Then check if the type has a configured confidence, if yes assign
            ## Then if a confidence for a named object, this takes precedence over type confidence
            for target in self._targets:
                if obj["object_type"] == target[CONF_TARGET]:
                    confidence = target[CONF_CONFIDENCE]
            for target in self._targets:
                if obj["name"] == target[CONF_TARGET]:
                    confidence = target[CONF_CONFIDENCE]
            if obj["confidence"] > confidence:	#FIXED CRASH here since confidence initialized as NONE and it doesn't match any targets from both loops above!
                if not object_in_roi(self._roi_dict, obj["centroid"]):
                    continue
                # Ignore target if it was previously detected
                ignore = "false"
                if self._prev_objs_ignore:
                    for last in latest_targets_cp:
                        if obj["name"] == last['name']:     #FIXED CRASH here cuz last contains keys, should be last['name']
                            objBox = obj["bounding_box"]
                            lasBox = last["bounding_box"]
                            if (round(abs(objBox["x_min"]-lasBox["x_min"]),5) < self._prev_objs_pct) \
                            and (round(abs(objBox["x_max"]-lasBox["x_max"]),5) < self._prev_objs_pct) \
                            and (round(abs(objBox["y_min"]-lasBox["y_min"]),5) < self._prev_objs_pct) \
                            and (round(abs(objBox["y_max"]-lasBox["y_max"]),5) < self._prev_objs_pct):
                                ignore = "true"
                                ignore_count += 1
                                break
                        else: continue

                obj["ignore"] = ignore
                obj["ignoreCount"] = ignore_count
                self._targets_found.append(obj)

                if ignore == "false": 
                    target_count += 1
                    real_targets_found.append(obj)

        #END for obj in self._objects

        real_targets_found = [
            obj["name"] for obj in real_targets_found   #self._targets_found
        ]  # Just the list of target names, e.g. [car, car, person]
        self._summary = dict(Counter(real_targets_found))  # e.g. {'car':2, 'person':1}

        target_event_data = {}
        self._state = target_count  #len(self._targets_found)
        if self._state > 0:
            # Set last_detection time
            self._last_detection = dt_util.now().strftime(DATETIME_FORMAT)
            # Fire 1 event of all detected objects, with 'targets_found' and 'summary'
            target_event_data[ATTR_ENTITY_ID] = self.entity_id
            target_event_data["targets_found"] = real_targets_found
            target_event_data["summary"] = self._summary
            if saved_image_path:
                target_event_data[SAVED_FILE] = saved_image_path
            self.hass.bus.fire(EVENT_OBJECT_DETECTED, target_event_data)

        if self._save_file_folder:
            if self._state > 0 or self._always_save_latest_file:
                saved_image_path = self.save_image(
                    self._targets_found,
                    self._save_file_folder,
                )

        # for target in self._targets_found:
        #     target_event_data = target.copy()
        #     target_event_data[ATTR_ENTITY_ID] = self.entity_id
        #     if saved_image_path:
        #         target_event_data[SAVED_FILE] = saved_image_path
        #     self.hass.bus.fire(EVENT_OBJECT_DETECTED, target_event_data)

        self._targets_latest[self._camera] = self._targets_found    #save the targets for next time

    @property
    def camera_entity(self):
        """Return camera entity id from process pictures."""
        return self._camera

    @property
    def state(self):
        """Return the state of the entity."""
        return self._state

    @property
    def name(self):
        """Return the name of the sensor."""
        return self._name

    @property
    def unit_of_measurement(self):
        """Return the unit of measurement."""
        return "targets"

    @property
    def should_poll(self):
        """Return the polling state."""
        return False

    @property
    def extra_state_attributes(self) -> Dict:
        """Return device specific state attributes."""
        attr = {}
        attr["targets"] = self._targets
        attr["targets_last"] = [
            {obj["name"]: obj["confidence"], "bounding_box": obj["bounding_box"]} for obj in self._targets_last[self._camera]
        ]
        attr["targets_found"] = [
            {obj["name"]: obj["confidence"], "bounding_box": obj["bounding_box"], "ignore": obj["ignore"]} for obj in self._targets_found
        ]
        attr["summary"] = self._summary
        if self._last_detection:
            attr["last_target_detection"] = self._last_detection
        if self._last_filename:
            attr["last_filename"] = str(self._last_filename)    #convert path to str otherwise will get error "Object of type PosixPath is not JSON serializable"
        if self._custom_model:
            attr["custom_model"] = self._custom_model
        attr["all_objects"] = [
            {obj["name"]: obj["confidence"], "bounding_box": obj["bounding_box"]} for obj in self._objects
        ]
        if self._save_file_folder:
            attr[CONF_SAVE_FILE_FOLDER] = str(self._save_file_folder)
            attr[CONF_SAVE_FILE_FORMAT] = self._save_file_format
            attr[CONF_SAVE_TIMESTAMPTED_FILE] = self._save_timestamped_file
            attr[CONF_ALWAYS_SAVE_LATEST_FILE] = self._always_save_latest_file
        return attr

    def save_image(self, targets, directory) -> str:
        """Draws the actual bounding box of the detected objects.

        Returns: saved_image_path, which is the path to the saved timestamped file if configured, else the default saved image.
        """
        try:
            img = self._image.convert("RGB")
        except UnidentifiedImageError:
            _LOGGER.warning("Deepstack unable to process image, bad data")
            return
        draw = ImageDraw.Draw(img)

        roi_tuple = tuple(self._roi_dict.values())
        if roi_tuple != DEFAULT_ROI and self._show_boxes:
            draw_box(
                draw,
                roi_tuple,
                img.width,
                img.height,
                text="ROI",
                color=SILVER,
            )

        for obj in targets:
            if not self._show_boxes:
                break
            name = obj["name"]
            confidence = obj["confidence"]
            box = obj["bounding_box"]
            centroid = obj["centroid"]
            box_label = f"{name}: {confidence:.0f}%"
            boxColor = PURPLE
            if obj["ignore"] == "true":
                boxColor = GREEN
            elif obj["object_type"]==PERSON:
                boxColor = RED

            draw_box(
                draw,
                (box["y_min"], box["x_min"], box["y_max"], box["x_max"]),
                img.width,
                img.height,
                text=box_label,
                color=boxColor,
            )

            # draw bullseye
            draw.text(
                (centroid["x"] * img.width, centroid["y"] * img.height),
                text="X",
                fill=boxColor,
            )

        # Save images, returning the path of saved image as str
        latest_save_path = (
            directory
            / f"{get_valid_filename(self._name.replace('deepstack_object_', '')).lower()}_latest.{self._save_file_format}"
        )
        img.save(latest_save_path)
        _LOGGER.info("Deepstack saved file %s", latest_save_path)
        saved_image_path = latest_save_path

        if self._save_timestamped_file:
            timestamp_save_path = (
                directory
                / f"{self._name.replace('deepstack_object_', '')}_{self._last_detection}.{self._save_file_format}"
            )
            img.save(timestamp_save_path)
            _LOGGER.info("Deepstack saved file %s", timestamp_save_path)
            saved_image_path = timestamp_save_path

        # Only update last_filename if state > 0
        if (self._state > 0): self._last_filename = saved_image_path

        return str(saved_image_path)