diff --git a/custom_components/llmvision/__init__.py b/custom_components/llmvision/__init__.py index 07aece5..d0b7d3f 100644 --- a/custom_components/llmvision/__init__.py +++ b/custom_components/llmvision/__init__.py @@ -43,7 +43,7 @@ from .calendar import SemanticIndex from .providers import Request from .media_handlers import MediaProcessor -import os +import os, re from datetime import timedelta from homeassistant.util import dt as dt_util from homeassistant.config_entries import ConfigEntry @@ -162,28 +162,40 @@ async def _remember(hass, call, start, response) -> None: if config_entry is None: raise ServiceValidationError( - f"'Event Calendar' config not found") + f"Config entry not found. Please create the 'Event Calendar' config entry first.") semantic_index = SemanticIndex(hass, config_entry) - title = response.get("title", "Unknown object seen") - - if call.image_entities and len(call.image_entities) > 0: - camera_name = call.image_entities[0] - elif call.video_paths and len(call.video_paths) > 0: - camera_name = call.video_paths[0].split( - "/")[-1].replace(".mp4", "") - else: - camera_name = "Unknown" - - camera_name = camera_name.replace( - "camera.", "").replace("image.", "").capitalize() + if "title" in response: + title = response.get("title", "Unknown object seen") + if call.image_entities and len(call.image_entities) > 0: + camera_name = call.image_entities[0] + elif call.video_paths and len(call.video_paths) > 0: + camera_name = call.video_paths[0].split( + "/")[-1].replace(".mp4", "") + else: + camera_name = "File Input" + + if "title" not in response: + if call.image_entities and len(call.image_entities) > 0: + camera_name = call.image_entities[0] + title = "Motion detected near " + camera_name + elif call.video_paths and len(call.video_paths) > 0: + camera_name = call.video_paths[0].split( + "/")[-1].replace(".mp4", "") + title = "Motion detected in " + camera_name + else: + camera_name = "File Input" + title = "Motion detected" + + if "response_text" not in response: + raise ValueError("response_text is missing in the response") await semantic_index.remember( start=start, end=dt_util.now() + timedelta(minutes=1), - label=title + " near " + camera_name if camera_name != "Unknown" else title, - camera_name=camera_name if camera_name != "Unknown" else "Image Input", + label=title, + camera_name=camera_name, summary=response["response_text"] ) @@ -192,11 +204,12 @@ async def _update_sensor(hass, sensor_entity: str, new_value: str | int, type: s """Update the value of a sensor entity.""" # Attempt to parse the response if type == "boolean" and new_value.lower() not in ["on", "off"]: - if new_value.lower() in ["true", "false"]: - new_value = "on" if new_value.lower() == "true" else "off" - elif new_value.split(" ")[0].replace(",", "").lower() == "yes": + new_value_lower = new_value.lower() + if new_value_lower in ["true", "false"]: + new_value = "on" if new_value_lower == "true" else "off" + elif re.match(r"^\s*yes\s*[,]*", new_value_lower): new_value = "on" - elif new_value.split(" ")[0].replace(",", "").lower() == "no": + elif re.match(r"^\s*no\s*[,]*", new_value_lower): new_value = "off" else: raise ServiceValidationError( @@ -217,7 +230,8 @@ async def _update_sensor(hass, sensor_entity: str, new_value: str | int, type: s _LOGGER.info( f"Updating sensor {sensor_entity} with new value: {new_value}") try: - current_attributes = hass.states.get(sensor_entity).attributes.copy() + current_attributes = hass.states.get( + sensor_entity).attributes.copy() hass.states.async_set(sensor_entity, new_value, current_attributes) except Exception as e: _LOGGER.error(f"Failed to update sensor {sensor_entity}: {e}") @@ -244,15 +258,18 @@ def __init__(self, data_call): "\n") if data_call.data.get(EVENT_ID) else None self.interval = int(data_call.data.get(INTERVAL, 2)) self.duration = int(data_call.data.get(DURATION, 10)) - self.frigate_retry_attempts = int(data_call.data.get(FRIGATE_RETRY_ATTEMPTS, 2)) - self.frigate_retry_seconds = int(data_call.data.get(FRIGATE_RETRY_SECONDS, 1)) + self.frigate_retry_attempts = int( + data_call.data.get(FRIGATE_RETRY_ATTEMPTS, 2)) + self.frigate_retry_seconds = int( + data_call.data.get(FRIGATE_RETRY_SECONDS, 1)) self.max_frames = int(data_call.data.get(MAX_FRAMES, 3)) self.target_width = data_call.data.get(TARGET_WIDTH, 3840) self.temperature = float(data_call.data.get(TEMPERATURE, 0.3)) self.max_tokens = int(data_call.data.get(MAXTOKENS, 100)) self.include_filename = data_call.data.get(INCLUDE_FILENAME, False) self.expose_images = data_call.data.get(EXPOSE_IMAGES, False) - self.expose_images_persist = data_call.data.get(EXPOSE_IMAGES_PERSIST, False) + self.expose_images_persist = data_call.data.get( + EXPOSE_IMAGES_PERSIST, False) self.generate_title = data_call.data.get(GENERATE_TITLE, False) self.sensor_entity = data_call.data.get(SENSOR_ENTITY) # ------------ Added during call ------------ @@ -297,7 +314,7 @@ async def video_analyzer(data_call): start = dt_util.now() call = ServiceCallData(data_call).get_service_call_data() call.message = "The attached images are frames from a video. " + call.message - + request = Request(hass, message=call.message, max_tokens=call.max_tokens, diff --git a/custom_components/llmvision/calendar.py b/custom_components/llmvision/calendar.py index 70a2eaa..4268df6 100644 --- a/custom_components/llmvision/calendar.py +++ b/custom_components/llmvision/calendar.py @@ -39,6 +39,8 @@ def __init__(self, hass: HomeAssistant, config_entry: ConfigEntry): self._file_path = os.path.join( self.hass.config.path("llmvision"), "events.json" ) + # Ensure the directory exists + os.makedirs(os.path.dirname(self._file_path), exist_ok=True) self.hass.loop.create_task(self.async_update()) def _ensure_datetime(self, dt): diff --git a/custom_components/llmvision/media_handlers.py b/custom_components/llmvision/media_handlers.py index bf43f53..5084d4f 100644 --- a/custom_components/llmvision/media_handlers.py +++ b/custom_components/llmvision/media_handlers.py @@ -422,7 +422,7 @@ async def add_videos(self, video_paths, event_ids, max_frames, target_width, inc sorted_frames.append(frames[0]) # Add frames to client - for counter, frame_path, _ in enumerate(sorted_frames, start=1): + for counter, (frame_path, _) in enumerate(sorted_frames, start=1): resized_image = await self.resize_image(image_path=frame_path, target_width=target_width) if expose_images: persist_filename = f"/config/www/llmvision/" + frame_path.split("/")[-1] diff --git a/custom_components/llmvision/providers.py b/custom_components/llmvision/providers.py index 3e939eb..71a381d 100644 --- a/custom_components/llmvision/providers.py +++ b/custom_components/llmvision/providers.py @@ -3,6 +3,7 @@ from homeassistant.helpers.aiohttp_client import async_get_clientsession import logging import inspect +import re from .const import ( DOMAIN, CONF_OPENAI_API_KEY, @@ -145,19 +146,15 @@ async def call(self, call): elif provider == 'Anthropic': api_key = config.get(CONF_ANTHROPIC_API_KEY) - provider_instance = Anthropic(self.hass, api_key=api_key) elif provider == 'Google': api_key = config.get(CONF_GOOGLE_API_KEY) - - provider_instance = Google(self.hass, api_key=api_key, endpoint={ - 'base_url': ENDPOINT_GOOGLE, 'model': call.model - }) + model = call.model if call.model and call.model != "None" else "gemini-1.5-flash-latest" + provider_instance = Google(self.hass, api_key=api_key, endpoint={'base_url': ENDPOINT_GOOGLE, 'model': model}) elif provider == 'Groq': api_key = config.get(CONF_GROQ_API_KEY) - provider_instance = Groq(self.hass, api_key=api_key) elif provider == 'LocalAI': @@ -204,7 +201,7 @@ async def call(self, call): call.message = gen_title_prompt.format(response=response_text) gen_title = await provider_instance.title_request(call) - return {"title": gen_title.replace(".", "").replace("'", ""), "response_text": response_text} + return {"title": re.sub(r'[^a-zA-Z0-9\s]', '', gen_title), "response_text": response_text} else: return {"response_text": response_text} @@ -283,7 +280,7 @@ async def vision_request(self, call) -> str: async def title_request(self, call) -> str: call.temperature = 0.1 - call.max_tokens = 3 + call.max_tokens = 5 data = self._prepare_text_data(call) return await self._make_request(data) diff --git a/custom_components/llmvision/services.yaml b/custom_components/llmvision/services.yaml index f430a87..136fe50 100644 --- a/custom_components/llmvision/services.yaml +++ b/custom_components/llmvision/services.yaml @@ -92,7 +92,7 @@ image_analyzer: generate_title: name: Generate Title required: false - description: Generate a title. (Used for notifications) + description: Generate a title. (Used for notifications and remembered events) default: false selector: boolean: @@ -104,6 +104,15 @@ image_analyzer: default: false selector: boolean: + expose_images_persist: + name: Persist Exposed Images + description: (Experimental) Normally exposed images are re-written with each new event. Setting this to true will include the Frigate eventID, if available, as part of the filename. If there is no + Frigate eventID, a uid will be used instead. + required: false + example: false + default: false + selector: + boolean: video_analyzer: name: Video Analyzer @@ -158,8 +167,8 @@ video_analyzer: multiline: true frigate_retry_attempts: name: Frigate Retry Attempts - description: How many times to retry fetching the video clip from Frigate. Clips are not always available from Frigate as soon as the event has ended. - Slower machines or longer clips may need additional attempts. Increase this if you see errors fetching the clips from Frigate in your automation traces. + description: How many times to retry fetching the video clip from Frigate. Clips are not always available from Frigate as soon as the event has ended. + Slower machines or longer clips may need additional attempts. Increase this if you see errors fetching the clips from Frigate in your automation traces. required: false example: 2 default: 2 @@ -170,8 +179,8 @@ video_analyzer: step: 1 frigate_retry_seconds: name: Frigate Retry Seconds - description: How long to wait between retries to fetch the video clip from Frigate. Clips are not always available from Frigate as soon as the event has ended. - Slower machines or longer clips may need additional attempts. Increase this if you see errors fetching the clips from Frigate in your automation traces. + description: How long to wait between retries to fetch the video clip from Frigate. Clips are not always available from Frigate as soon as the event has ended. + Slower machines or longer clips may need additional attempts. Increase this if you see errors fetching the clips from Frigate in your automation traces. required: false example: 1 default: 1 @@ -230,6 +239,13 @@ video_analyzer: min: 0.0 max: 1.0 step: 0.1 + generate_title: + name: Generate Title + required: false + description: Generate a title. (Used for notifications and remembered events) + default: false + selector: + boolean: expose_images: name: Expose Images description: (Experimental) Expose analyzed frames after processing. This will save analyzed frames in /www/llmvision so they can be used for notifications. (Only works for entity input, include camera name should be enabled). Existing files will be overwritten. @@ -240,8 +256,8 @@ video_analyzer: boolean: expose_images_persist: name: Persist Exposed Images - description: Normally exposed images are re-written with each new event. Setting this to true will include the Frigate eventID, if available, as part of the filename. If there is no - Frigate eventID, a guid will be used instead. + description: (Experimental) Normally exposed images are re-written with each new event. Setting this to true will include the Frigate eventID, if available, as part of the filename. If there is no + Frigate eventID, a uid will be used instead. required: false example: false default: false @@ -352,6 +368,13 @@ stream_analyzer: min: 0.1 max: 1.0 step: 0.1 + generate_title: + name: Generate Title + required: false + description: Generate a title. (Used for notifications and remembered events) + default: false + selector: + boolean: expose_images: name: Expose Images description: (Experimental) Expose analyzed frames after processing. This will save analyzed frames in /www/llmvision so they can be used for notifications. (Only works for entity input, include camera name should be enabled). Existing files will be overwritten.