Closes Calendar is deleted every update #95, genAI title now used for…

… events
valentinfrlch · Dec 26, 2024 · 1ab09f6 · 1ab09f6
1 parent 38973c8
commit 1ab09f6
Show file tree

Hide file tree

Showing 5 changed files with 80 additions and 41 deletions.
diff --git a/custom_components/llmvision/__init__.py b/custom_components/llmvision/__init__.py
@@ -43,7 +43,7 @@
 from .calendar import SemanticIndex
 from .providers import Request
 from .media_handlers import MediaProcessor
-import os
+import os, re
 from datetime import timedelta
 from homeassistant.util import dt as dt_util
 from homeassistant.config_entries import ConfigEntry
@@ -162,28 +162,40 @@ async def _remember(hass, call, start, response) -> None:
 
         if config_entry is None:
             raise ServiceValidationError(
-                f"'Event Calendar' config not found")
+                f"Config entry not found. Please create the 'Event Calendar' config entry first.")
 
         semantic_index = SemanticIndex(hass, config_entry)
 
-        title = response.get("title", "Unknown object seen")
-
-        if call.image_entities and len(call.image_entities) > 0:
-            camera_name = call.image_entities[0]
-        elif call.video_paths and len(call.video_paths) > 0:
-            camera_name = call.video_paths[0].split(
-                "/")[-1].replace(".mp4", "")
-        else:
-            camera_name = "Unknown"
-
-        camera_name = camera_name.replace(
-            "camera.", "").replace("image.", "").capitalize()
+        if "title" in response:
+            title = response.get("title", "Unknown object seen")
+            if call.image_entities and len(call.image_entities) > 0:
+                camera_name = call.image_entities[0]
+            elif call.video_paths and len(call.video_paths) > 0:
+                camera_name = call.video_paths[0].split(
+                    "/")[-1].replace(".mp4", "")
+            else:
+                camera_name = "File Input"
+
+        if "title" not in response:
+            if call.image_entities and len(call.image_entities) > 0:
+                camera_name = call.image_entities[0]
+                title = "Motion detected near " + camera_name
+            elif call.video_paths and len(call.video_paths) > 0:
+                camera_name = call.video_paths[0].split(
+                    "/")[-1].replace(".mp4", "")
+                title = "Motion detected in " + camera_name
+            else:
+                camera_name = "File Input"
+                title = "Motion detected"
+
+        if "response_text" not in response:
+            raise ValueError("response_text is missing in the response")
 
         await semantic_index.remember(
             start=start,
             end=dt_util.now() + timedelta(minutes=1),
-            label=title + " near " + camera_name if camera_name != "Unknown" else title,
-            camera_name=camera_name if camera_name != "Unknown" else "Image Input",
+            label=title,
+            camera_name=camera_name,
             summary=response["response_text"]
         )
 
@@ -192,11 +204,12 @@ async def _update_sensor(hass, sensor_entity: str, new_value: str | int, type: s
     """Update the value of a sensor entity."""
     # Attempt to parse the response
     if type == "boolean" and new_value.lower() not in ["on", "off"]:
-        if new_value.lower() in ["true", "false"]:
-            new_value = "on" if new_value.lower() == "true" else "off"
-        elif new_value.split(" ")[0].replace(",", "").lower() == "yes":
+        new_value_lower = new_value.lower()
+        if new_value_lower in ["true", "false"]:
+            new_value = "on" if new_value_lower == "true" else "off"
+        elif re.match(r"^\s*yes\s*[,]*", new_value_lower):
             new_value = "on"
-        elif new_value.split(" ")[0].replace(",", "").lower() == "no":
+        elif re.match(r"^\s*no\s*[,]*", new_value_lower):
             new_value = "off"
         else:
             raise ServiceValidationError(
@@ -217,7 +230,8 @@ async def _update_sensor(hass, sensor_entity: str, new_value: str | int, type: s
         _LOGGER.info(
             f"Updating sensor {sensor_entity} with new value: {new_value}")
         try:
-            current_attributes = hass.states.get(sensor_entity).attributes.copy()
+            current_attributes = hass.states.get(
+                sensor_entity).attributes.copy()
             hass.states.async_set(sensor_entity, new_value, current_attributes)
         except Exception as e:
             _LOGGER.error(f"Failed to update sensor {sensor_entity}: {e}")
@@ -244,15 +258,18 @@ def __init__(self, data_call):
             "\n") if data_call.data.get(EVENT_ID) else None
         self.interval = int(data_call.data.get(INTERVAL, 2))
         self.duration = int(data_call.data.get(DURATION, 10))
-        self.frigate_retry_attempts = int(data_call.data.get(FRIGATE_RETRY_ATTEMPTS, 2))
-        self.frigate_retry_seconds = int(data_call.data.get(FRIGATE_RETRY_SECONDS, 1))
+        self.frigate_retry_attempts = int(
+            data_call.data.get(FRIGATE_RETRY_ATTEMPTS, 2))
+        self.frigate_retry_seconds = int(
+            data_call.data.get(FRIGATE_RETRY_SECONDS, 1))
         self.max_frames = int(data_call.data.get(MAX_FRAMES, 3))
         self.target_width = data_call.data.get(TARGET_WIDTH, 3840)
         self.temperature = float(data_call.data.get(TEMPERATURE, 0.3))
         self.max_tokens = int(data_call.data.get(MAXTOKENS, 100))
         self.include_filename = data_call.data.get(INCLUDE_FILENAME, False)
         self.expose_images = data_call.data.get(EXPOSE_IMAGES, False)
-        self.expose_images_persist = data_call.data.get(EXPOSE_IMAGES_PERSIST, False)
+        self.expose_images_persist = data_call.data.get(
+            EXPOSE_IMAGES_PERSIST, False)
         self.generate_title = data_call.data.get(GENERATE_TITLE, False)
         self.sensor_entity = data_call.data.get(SENSOR_ENTITY)
         # ------------ Added during call ------------
@@ -297,7 +314,7 @@ async def video_analyzer(data_call):
         start = dt_util.now()
         call = ServiceCallData(data_call).get_service_call_data()
         call.message = "The attached images are frames from a video. " + call.message
-        
+
         request = Request(hass,
                           message=call.message,
                           max_tokens=call.max_tokens,

diff --git a/custom_components/llmvision/calendar.py b/custom_components/llmvision/calendar.py
@@ -39,6 +39,8 @@ def __init__(self, hass: HomeAssistant, config_entry: ConfigEntry):
         self._file_path = os.path.join(
             self.hass.config.path("llmvision"), "events.json"
         )
+        # Ensure the directory exists
+        os.makedirs(os.path.dirname(self._file_path), exist_ok=True)
         self.hass.loop.create_task(self.async_update())
 
     def _ensure_datetime(self, dt):

diff --git a/custom_components/llmvision/media_handlers.py b/custom_components/llmvision/media_handlers.py
@@ -422,7 +422,7 @@ async def add_videos(self, video_paths, event_ids, max_frames, target_width, inc
                             sorted_frames.append(frames[0])
 
                         # Add frames to client
-                        for counter, frame_path, _ in enumerate(sorted_frames, start=1):
+                        for counter, (frame_path, _) in enumerate(sorted_frames, start=1):
                             resized_image = await self.resize_image(image_path=frame_path, target_width=target_width)
                             if expose_images:
                                 persist_filename = f"/config/www/llmvision/" + frame_path.split("/")[-1]

diff --git a/custom_components/llmvision/providers.py b/custom_components/llmvision/providers.py
@@ -3,6 +3,7 @@
 from homeassistant.helpers.aiohttp_client import async_get_clientsession
 import logging
 import inspect
+import re
 from .const import (
     DOMAIN,
     CONF_OPENAI_API_KEY,
@@ -145,19 +146,15 @@ async def call(self, call):
 
         elif provider == 'Anthropic':
             api_key = config.get(CONF_ANTHROPIC_API_KEY)
-
             provider_instance = Anthropic(self.hass, api_key=api_key)
 
         elif provider == 'Google':
             api_key = config.get(CONF_GOOGLE_API_KEY)
-
-            provider_instance = Google(self.hass, api_key=api_key, endpoint={
-                'base_url': ENDPOINT_GOOGLE, 'model': call.model
-            })
+            model = call.model if call.model and call.model != "None" else "gemini-1.5-flash-latest"
+            provider_instance = Google(self.hass, api_key=api_key, endpoint={'base_url': ENDPOINT_GOOGLE, 'model': model})
 
         elif provider == 'Groq':
             api_key = config.get(CONF_GROQ_API_KEY)
-
             provider_instance = Groq(self.hass, api_key=api_key)
 
         elif provider == 'LocalAI':
@@ -204,7 +201,7 @@ async def call(self, call):
             call.message = gen_title_prompt.format(response=response_text)
             gen_title = await provider_instance.title_request(call)
 
-            return {"title": gen_title.replace(".", "").replace("'", ""), "response_text": response_text}
+            return {"title": re.sub(r'[^a-zA-Z0-9\s]', '', gen_title), "response_text": response_text}
         else:
             return {"response_text": response_text}
 
@@ -283,7 +280,7 @@ async def vision_request(self, call) -> str:
 
     async def title_request(self, call) -> str:
         call.temperature = 0.1
-        call.max_tokens = 3
+        call.max_tokens = 5
         data = self._prepare_text_data(call)
         return await self._make_request(data)
 

diff --git a/custom_components/llmvision/services.yaml b/custom_components/llmvision/services.yaml
@@ -92,7 +92,7 @@ image_analyzer:
     generate_title:
       name: Generate Title
       required: false
-      description: Generate a title. (Used for notifications)
+      description: Generate a title. (Used for notifications and remembered events)
       default: false
       selector:
         boolean:
@@ -104,6 +104,15 @@ image_analyzer:
       default: false
       selector:
         boolean:
+    expose_images_persist:
+      name: Persist Exposed Images
+      description: (Experimental) Normally exposed images are re-written with each new event. Setting this to true will include the Frigate eventID, if available, as part of the filename. If there is no 
+          Frigate eventID, a uid will be used instead.
+      required: false
+      example: false
+      default: false
+      selector:
+        boolean:
 
 video_analyzer:
   name: Video Analyzer
@@ -158,8 +167,8 @@ video_analyzer:
           multiline: true
     frigate_retry_attempts:
       name: Frigate Retry Attempts
-      description: How many times to retry fetching the video clip from Frigate.  Clips are not always available from Frigate as soon as the event has ended.  
-          Slower machines or longer clips may need additional attempts.  Increase this if you see errors fetching the clips from Frigate in your automation traces.
+      description: How many times to retry fetching the video clip from Frigate. Clips are not always available from Frigate as soon as the event has ended.  
+          Slower machines or longer clips may need additional attempts. Increase this if you see errors fetching the clips from Frigate in your automation traces.
       required: false
       example: 2
       default: 2
@@ -170,8 +179,8 @@ video_analyzer:
           step: 1
     frigate_retry_seconds:
       name: Frigate Retry Seconds
-      description: How long to wait between retries to fetch the video clip from Frigate.  Clips are not always available from Frigate as soon as the event has ended.  
-          Slower machines or longer clips may need additional attempts.  Increase this if you see errors fetching the clips from Frigate in your automation traces.
+      description: How long to wait between retries to fetch the video clip from Frigate. Clips are not always available from Frigate as soon as the event has ended.  
+          Slower machines or longer clips may need additional attempts. Increase this if you see errors fetching the clips from Frigate in your automation traces.
       required: false
       example: 1
       default: 1
@@ -230,6 +239,13 @@ video_analyzer:
           min: 0.0
           max: 1.0
           step: 0.1
+    generate_title:
+      name: Generate Title
+      required: false
+      description: Generate a title. (Used for notifications and remembered events)
+      default: false
+      selector:
+        boolean:
     expose_images:
       name: Expose Images
       description: (Experimental) Expose analyzed frames after processing. This will save analyzed frames in /www/llmvision so they can be used for notifications. (Only works for entity input, include camera name should be enabled). Existing files will be overwritten.
@@ -240,8 +256,8 @@ video_analyzer:
         boolean:
     expose_images_persist:
       name: Persist Exposed Images
-      description: Normally exposed images are re-written with each new event.  Setting this to true will include the Frigate eventID, if available, as part of the filename.  If there is no 
-          Frigate eventID, a guid will be used instead.
+      description: (Experimental) Normally exposed images are re-written with each new event. Setting this to true will include the Frigate eventID, if available, as part of the filename. If there is no 
+          Frigate eventID, a uid will be used instead.
       required: false
       example: false
       default: false
@@ -352,6 +368,13 @@ stream_analyzer:
           min: 0.1
           max: 1.0
           step: 0.1
+    generate_title:
+      name: Generate Title
+      required: false
+      description: Generate a title. (Used for notifications and remembered events)
+      default: false
+      selector:
+        boolean:
     expose_images:
       name: Expose Images
       description: (Experimental) Expose analyzed frames after processing. This will save analyzed frames in /www/llmvision so they can be used for notifications. (Only works for entity input, include camera name should be enabled). Existing files will be overwritten.