Skip to content

Commit

Permalink
Closes Calendar is deleted every update #95, genAI title now used for…
Browse files Browse the repository at this point in the history
… events
  • Loading branch information
valentinfrlch committed Dec 26, 2024
1 parent 38973c8 commit 1ab09f6
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 41 deletions.
67 changes: 42 additions & 25 deletions custom_components/llmvision/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from .calendar import SemanticIndex
from .providers import Request
from .media_handlers import MediaProcessor
import os
import os, re
from datetime import timedelta
from homeassistant.util import dt as dt_util
from homeassistant.config_entries import ConfigEntry
Expand Down Expand Up @@ -162,28 +162,40 @@ async def _remember(hass, call, start, response) -> None:

if config_entry is None:
raise ServiceValidationError(
f"'Event Calendar' config not found")
f"Config entry not found. Please create the 'Event Calendar' config entry first.")

semantic_index = SemanticIndex(hass, config_entry)

title = response.get("title", "Unknown object seen")

if call.image_entities and len(call.image_entities) > 0:
camera_name = call.image_entities[0]
elif call.video_paths and len(call.video_paths) > 0:
camera_name = call.video_paths[0].split(
"/")[-1].replace(".mp4", "")
else:
camera_name = "Unknown"

camera_name = camera_name.replace(
"camera.", "").replace("image.", "").capitalize()
if "title" in response:
title = response.get("title", "Unknown object seen")
if call.image_entities and len(call.image_entities) > 0:
camera_name = call.image_entities[0]
elif call.video_paths and len(call.video_paths) > 0:
camera_name = call.video_paths[0].split(
"/")[-1].replace(".mp4", "")
else:
camera_name = "File Input"

if "title" not in response:
if call.image_entities and len(call.image_entities) > 0:
camera_name = call.image_entities[0]
title = "Motion detected near " + camera_name
elif call.video_paths and len(call.video_paths) > 0:
camera_name = call.video_paths[0].split(
"/")[-1].replace(".mp4", "")
title = "Motion detected in " + camera_name
else:
camera_name = "File Input"
title = "Motion detected"

if "response_text" not in response:
raise ValueError("response_text is missing in the response")

await semantic_index.remember(
start=start,
end=dt_util.now() + timedelta(minutes=1),
label=title + " near " + camera_name if camera_name != "Unknown" else title,
camera_name=camera_name if camera_name != "Unknown" else "Image Input",
label=title,
camera_name=camera_name,
summary=response["response_text"]
)

Expand All @@ -192,11 +204,12 @@ async def _update_sensor(hass, sensor_entity: str, new_value: str | int, type: s
"""Update the value of a sensor entity."""
# Attempt to parse the response
if type == "boolean" and new_value.lower() not in ["on", "off"]:
if new_value.lower() in ["true", "false"]:
new_value = "on" if new_value.lower() == "true" else "off"
elif new_value.split(" ")[0].replace(",", "").lower() == "yes":
new_value_lower = new_value.lower()
if new_value_lower in ["true", "false"]:
new_value = "on" if new_value_lower == "true" else "off"
elif re.match(r"^\s*yes\s*[,]*", new_value_lower):
new_value = "on"
elif new_value.split(" ")[0].replace(",", "").lower() == "no":
elif re.match(r"^\s*no\s*[,]*", new_value_lower):
new_value = "off"
else:
raise ServiceValidationError(
Expand All @@ -217,7 +230,8 @@ async def _update_sensor(hass, sensor_entity: str, new_value: str | int, type: s
_LOGGER.info(
f"Updating sensor {sensor_entity} with new value: {new_value}")
try:
current_attributes = hass.states.get(sensor_entity).attributes.copy()
current_attributes = hass.states.get(
sensor_entity).attributes.copy()
hass.states.async_set(sensor_entity, new_value, current_attributes)
except Exception as e:
_LOGGER.error(f"Failed to update sensor {sensor_entity}: {e}")
Expand All @@ -244,15 +258,18 @@ def __init__(self, data_call):
"\n") if data_call.data.get(EVENT_ID) else None
self.interval = int(data_call.data.get(INTERVAL, 2))
self.duration = int(data_call.data.get(DURATION, 10))
self.frigate_retry_attempts = int(data_call.data.get(FRIGATE_RETRY_ATTEMPTS, 2))
self.frigate_retry_seconds = int(data_call.data.get(FRIGATE_RETRY_SECONDS, 1))
self.frigate_retry_attempts = int(
data_call.data.get(FRIGATE_RETRY_ATTEMPTS, 2))
self.frigate_retry_seconds = int(
data_call.data.get(FRIGATE_RETRY_SECONDS, 1))
self.max_frames = int(data_call.data.get(MAX_FRAMES, 3))
self.target_width = data_call.data.get(TARGET_WIDTH, 3840)
self.temperature = float(data_call.data.get(TEMPERATURE, 0.3))
self.max_tokens = int(data_call.data.get(MAXTOKENS, 100))
self.include_filename = data_call.data.get(INCLUDE_FILENAME, False)
self.expose_images = data_call.data.get(EXPOSE_IMAGES, False)
self.expose_images_persist = data_call.data.get(EXPOSE_IMAGES_PERSIST, False)
self.expose_images_persist = data_call.data.get(
EXPOSE_IMAGES_PERSIST, False)
self.generate_title = data_call.data.get(GENERATE_TITLE, False)
self.sensor_entity = data_call.data.get(SENSOR_ENTITY)
# ------------ Added during call ------------
Expand Down Expand Up @@ -297,7 +314,7 @@ async def video_analyzer(data_call):
start = dt_util.now()
call = ServiceCallData(data_call).get_service_call_data()
call.message = "The attached images are frames from a video. " + call.message

request = Request(hass,
message=call.message,
max_tokens=call.max_tokens,
Expand Down
2 changes: 2 additions & 0 deletions custom_components/llmvision/calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def __init__(self, hass: HomeAssistant, config_entry: ConfigEntry):
self._file_path = os.path.join(
self.hass.config.path("llmvision"), "events.json"
)
# Ensure the directory exists
os.makedirs(os.path.dirname(self._file_path), exist_ok=True)
self.hass.loop.create_task(self.async_update())

def _ensure_datetime(self, dt):
Expand Down
2 changes: 1 addition & 1 deletion custom_components/llmvision/media_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ async def add_videos(self, video_paths, event_ids, max_frames, target_width, inc
sorted_frames.append(frames[0])

# Add frames to client
for counter, frame_path, _ in enumerate(sorted_frames, start=1):
for counter, (frame_path, _) in enumerate(sorted_frames, start=1):
resized_image = await self.resize_image(image_path=frame_path, target_width=target_width)
if expose_images:
persist_filename = f"/config/www/llmvision/" + frame_path.split("/")[-1]
Expand Down
13 changes: 5 additions & 8 deletions custom_components/llmvision/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from homeassistant.helpers.aiohttp_client import async_get_clientsession
import logging
import inspect
import re
from .const import (
DOMAIN,
CONF_OPENAI_API_KEY,
Expand Down Expand Up @@ -145,19 +146,15 @@ async def call(self, call):

elif provider == 'Anthropic':
api_key = config.get(CONF_ANTHROPIC_API_KEY)

provider_instance = Anthropic(self.hass, api_key=api_key)

elif provider == 'Google':
api_key = config.get(CONF_GOOGLE_API_KEY)

provider_instance = Google(self.hass, api_key=api_key, endpoint={
'base_url': ENDPOINT_GOOGLE, 'model': call.model
})
model = call.model if call.model and call.model != "None" else "gemini-1.5-flash-latest"
provider_instance = Google(self.hass, api_key=api_key, endpoint={'base_url': ENDPOINT_GOOGLE, 'model': model})

elif provider == 'Groq':
api_key = config.get(CONF_GROQ_API_KEY)

provider_instance = Groq(self.hass, api_key=api_key)

elif provider == 'LocalAI':
Expand Down Expand Up @@ -204,7 +201,7 @@ async def call(self, call):
call.message = gen_title_prompt.format(response=response_text)
gen_title = await provider_instance.title_request(call)

return {"title": gen_title.replace(".", "").replace("'", ""), "response_text": response_text}
return {"title": re.sub(r'[^a-zA-Z0-9\s]', '', gen_title), "response_text": response_text}
else:
return {"response_text": response_text}

Expand Down Expand Up @@ -283,7 +280,7 @@ async def vision_request(self, call) -> str:

async def title_request(self, call) -> str:
call.temperature = 0.1
call.max_tokens = 3
call.max_tokens = 5
data = self._prepare_text_data(call)
return await self._make_request(data)

Expand Down
37 changes: 30 additions & 7 deletions custom_components/llmvision/services.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ image_analyzer:
generate_title:
name: Generate Title
required: false
description: Generate a title. (Used for notifications)
description: Generate a title. (Used for notifications and remembered events)
default: false
selector:
boolean:
Expand All @@ -104,6 +104,15 @@ image_analyzer:
default: false
selector:
boolean:
expose_images_persist:
name: Persist Exposed Images
description: (Experimental) Normally exposed images are re-written with each new event. Setting this to true will include the Frigate eventID, if available, as part of the filename. If there is no
Frigate eventID, a uid will be used instead.
required: false
example: false
default: false
selector:
boolean:

video_analyzer:
name: Video Analyzer
Expand Down Expand Up @@ -158,8 +167,8 @@ video_analyzer:
multiline: true
frigate_retry_attempts:
name: Frigate Retry Attempts
description: How many times to retry fetching the video clip from Frigate. Clips are not always available from Frigate as soon as the event has ended.
Slower machines or longer clips may need additional attempts. Increase this if you see errors fetching the clips from Frigate in your automation traces.
description: How many times to retry fetching the video clip from Frigate. Clips are not always available from Frigate as soon as the event has ended.
Slower machines or longer clips may need additional attempts. Increase this if you see errors fetching the clips from Frigate in your automation traces.
required: false
example: 2
default: 2
Expand All @@ -170,8 +179,8 @@ video_analyzer:
step: 1
frigate_retry_seconds:
name: Frigate Retry Seconds
description: How long to wait between retries to fetch the video clip from Frigate. Clips are not always available from Frigate as soon as the event has ended.
Slower machines or longer clips may need additional attempts. Increase this if you see errors fetching the clips from Frigate in your automation traces.
description: How long to wait between retries to fetch the video clip from Frigate. Clips are not always available from Frigate as soon as the event has ended.
Slower machines or longer clips may need additional attempts. Increase this if you see errors fetching the clips from Frigate in your automation traces.
required: false
example: 1
default: 1
Expand Down Expand Up @@ -230,6 +239,13 @@ video_analyzer:
min: 0.0
max: 1.0
step: 0.1
generate_title:
name: Generate Title
required: false
description: Generate a title. (Used for notifications and remembered events)
default: false
selector:
boolean:
expose_images:
name: Expose Images
description: (Experimental) Expose analyzed frames after processing. This will save analyzed frames in /www/llmvision so they can be used for notifications. (Only works for entity input, include camera name should be enabled). Existing files will be overwritten.
Expand All @@ -240,8 +256,8 @@ video_analyzer:
boolean:
expose_images_persist:
name: Persist Exposed Images
description: Normally exposed images are re-written with each new event. Setting this to true will include the Frigate eventID, if available, as part of the filename. If there is no
Frigate eventID, a guid will be used instead.
description: (Experimental) Normally exposed images are re-written with each new event. Setting this to true will include the Frigate eventID, if available, as part of the filename. If there is no
Frigate eventID, a uid will be used instead.
required: false
example: false
default: false
Expand Down Expand Up @@ -352,6 +368,13 @@ stream_analyzer:
min: 0.1
max: 1.0
step: 0.1
generate_title:
name: Generate Title
required: false
description: Generate a title. (Used for notifications and remembered events)
default: false
selector:
boolean:
expose_images:
name: Expose Images
description: (Experimental) Expose analyzed frames after processing. This will save analyzed frames in /www/llmvision so they can be used for notifications. (Only works for entity input, include camera name should be enabled). Existing files will be overwritten.
Expand Down

0 comments on commit 1ab09f6

Please sign in to comment.