diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a05a5ee
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.DS_Store
+playground.py
+.gitignore
diff --git a/README.md b/README.md
index d1658b4..a9bf30b 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
-
+
diff --git a/custom_components/llmvision/__init__.py b/custom_components/llmvision/__init__.py
index f4857ff..57e420e 100644
--- a/custom_components/llmvision/__init__.py
+++ b/custom_components/llmvision/__init__.py
@@ -23,16 +23,25 @@
VIDEO_FILE,
EVENT_ID,
INTERVAL,
+ DURATION,
TEMPERATURE,
DETAIL,
INCLUDE_FILENAME
)
+from homeassistant.config_entries import ConfigEntry
from .request_handlers import RequestHandler
from .media_handlers import MediaProcessor
from homeassistant.core import SupportsResponse
+import logging
+
+_LOGGER = logging.getLogger(__name__)
+
async def async_setup_entry(hass, entry):
- """Save config entry to hass.data"""
+ """Save config entry to hass.data with the same unique identifier as the config entry"""
+ # Use the entry_id from the config entry as the UID
+ entry_uid = entry.entry_id
+
# Get all entries from config flow
openai_api_key = entry.data.get(CONF_OPENAI_API_KEY)
anthropic_api_key = entry.data.get(CONF_ANTHROPIC_API_KEY)
@@ -51,34 +60,65 @@ async def async_setup_entry(hass, entry):
if DOMAIN not in hass.data:
hass.data[DOMAIN] = {}
- # Merge the new data with the existing data
- hass.data[DOMAIN].update({
- key: value
- for key, value in {
- CONF_OPENAI_API_KEY: openai_api_key,
- CONF_ANTHROPIC_API_KEY: anthropic_api_key,
- CONF_GOOGLE_API_KEY: google_api_key,
- CONF_GROQ_API_KEY: groq_api_key,
- CONF_LOCALAI_IP_ADDRESS: localai_ip_address,
- CONF_LOCALAI_PORT: localai_port,
- CONF_LOCALAI_HTTPS: localai_https,
- CONF_OLLAMA_IP_ADDRESS: ollama_ip_address,
- CONF_OLLAMA_PORT: ollama_port,
- CONF_OLLAMA_HTTPS: ollama_https,
- CONF_CUSTOM_OPENAI_ENDPOINT: custom_openai_endpoint,
- CONF_CUSTOM_OPENAI_API_KEY: custom_openai_api_key
- }.items()
- if value is not None
- })
+ # Create a dictionary for the entry data
+ entry_data = {
+ CONF_OPENAI_API_KEY: openai_api_key,
+ CONF_ANTHROPIC_API_KEY: anthropic_api_key,
+ CONF_GOOGLE_API_KEY: google_api_key,
+ CONF_GROQ_API_KEY: groq_api_key,
+ CONF_LOCALAI_IP_ADDRESS: localai_ip_address,
+ CONF_LOCALAI_PORT: localai_port,
+ CONF_LOCALAI_HTTPS: localai_https,
+ CONF_OLLAMA_IP_ADDRESS: ollama_ip_address,
+ CONF_OLLAMA_PORT: ollama_port,
+ CONF_OLLAMA_HTTPS: ollama_https,
+ CONF_CUSTOM_OPENAI_ENDPOINT: custom_openai_endpoint,
+ CONF_CUSTOM_OPENAI_API_KEY: custom_openai_api_key
+ }
+
+ # Filter out None values
+ filtered_entry_data = {key: value for key,
+ value in entry_data.items() if value is not None}
+
+ # Store the filtered entry data under the entry_id
+ hass.data[DOMAIN][entry_uid] = filtered_entry_data
return True
+async def async_remove_entry(hass, entry):
+ """Remove config entry from hass.data"""
+ # Use the entry_id from the config entry as the UID
+ entry_uid = entry.entry_id
+
+ if entry_uid in hass.data[DOMAIN]:
+ # Remove the entry from hass.data
+ _LOGGER.info(f"Removing {entry.title} from hass.data")
+ hass.data[DOMAIN].pop(entry_uid)
+ else:
+ _LOGGER.warning(
+ f"Entry {entry.title} not found but was requested to be removed")
+
+ return True
+
+
+async def async_unload_entry(hass, entry) -> bool: return True
+
+
+async def async_migrate_entry(hass, config_entry: ConfigEntry) -> bool:
+ if DOMAIN not in hass.data:
+ return True
+ else:
+ return False
+
+
class ServiceCallData:
"""Store service call data and set default values"""
+
def __init__(self, data_call):
self.provider = str(data_call.data.get(PROVIDER))
- self.model = str(data_call.data.get(MODEL, self._default_model(self.provider)))
+ self.model = str(data_call.data.get(
+ MODEL))
self.message = str(data_call.data.get(MESSAGE)[0:2000])
self.image_paths = data_call.data.get(IMAGE_FILE, "").split(
"\n") if data_call.data.get(IMAGE_FILE) else None
@@ -88,37 +128,22 @@ def __init__(self, data_call):
self.event_id = data_call.data.get(EVENT_ID, "").split(
"\n") if data_call.data.get(EVENT_ID) else None
self.interval = int(data_call.data.get(INTERVAL, 3))
+ self.duration = int(data_call.data.get(DURATION, 10))
self.target_width = data_call.data.get(TARGET_WIDTH, 1280)
- self.temperature = float(data_call.data.get(TEMPERATURE, 0.5))
+ self.temperature = float(data_call.data.get(TEMPERATURE, 0.3))
self.max_tokens = int(data_call.data.get(MAXTOKENS, 100))
self.detail = str(data_call.data.get(DETAIL, "auto"))
self.include_filename = data_call.data.get(INCLUDE_FILENAME, False)
def get_service_call_data(self):
return self
-
- def _default_model(self, provider):
- if provider == "OpenAI":
- return "gpt-4o-mini"
- elif provider == "Anthropic":
- return "claude-3-5-sonnet-20240620"
- elif provider == "Google":
- return "gemini-1.5-flash-latest"
- elif provider == "Groq":
- return "llava-v1.5-7b-4096-preview"
- elif provider == "LocalAI":
- return "gpt-4-vision-preview"
- elif provider == "Ollama":
- return "llava-phi3:latest"
- elif provider == "Custom OpenAI":
- return "gpt-4o-mini"
def setup(hass, config):
async def image_analyzer(data_call):
"""Handle the service call to analyze an image with LLM Vision"""
-
- # Initialize call objecto with service call data
+
+ # Initialize call object with service call data
call = ServiceCallData(data_call).get_service_call_data()
# Initialize the RequestHandler client
client = RequestHandler(hass,
@@ -130,7 +155,11 @@ async def image_analyzer(data_call):
# Fetch and preprocess images
processor = MediaProcessor(hass, client)
# Send images to RequestHandler client
- client = await processor.add_images(call.image_entities, call.image_paths, call.target_width, call.include_filename)
+ client = await processor.add_images(image_entities=call.image_entities,
+ image_paths=call.image_paths,
+ target_width=call.target_width,
+ include_filename=call.include_filename
+ )
# Validate configuration, input data and make the call
response = await client.make_request(call)
@@ -146,10 +175,35 @@ async def video_analyzer(data_call):
temperature=call.temperature,
detail=call.detail)
processor = MediaProcessor(hass, client)
- client = await processor.add_videos(call.video_paths, call.event_id, call.interval, call.target_width, call.include_filename)
+ client = await processor.add_videos(video_paths=call.video_paths,
+ event_ids=call.event_id,
+ interval=call.interval,
+ target_width=call.target_width,
+ include_filename=call.include_filename
+ )
response = await client.make_request(call)
return response
+ async def stream_analyzer(data_call):
+ """Handle the service call to analyze a stream (future implementation)"""
+ call = ServiceCallData(data_call).get_service_call_data()
+ call.message = "The attached images are frames from a live camera feed. " + call.message
+ client = RequestHandler(hass,
+ message=call.message,
+ max_tokens=call.max_tokens,
+ temperature=call.temperature,
+ detail=call.detail)
+ processor = MediaProcessor(hass, client)
+ client = await processor.add_streams(image_entities=call.image_entities,
+ duration=call.duration,
+ interval=call.interval,
+ target_width=call.target_width,
+ include_filename=call.include_filename
+ )
+ response = await client.make_request(call)
+ return response
+
+ # Register services
hass.services.register(
DOMAIN, "image_analyzer", image_analyzer,
supports_response=SupportsResponse.ONLY
@@ -158,6 +212,9 @@ async def video_analyzer(data_call):
DOMAIN, "video_analyzer", video_analyzer,
supports_response=SupportsResponse.ONLY
)
+ hass.services.register(
+ DOMAIN, "stream_analyzer", stream_analyzer,
+ supports_response=SupportsResponse.ONLY
+ )
return True
-
\ No newline at end of file
diff --git a/custom_components/llmvision/config_flow.py b/custom_components/llmvision/config_flow.py
index 75ad814..ecca4e3 100644
--- a/custom_components/llmvision/config_flow.py
+++ b/custom_components/llmvision/config_flow.py
@@ -202,7 +202,7 @@ def get_configured_providers(self):
class llmvisionConfigFlow(config_entries.ConfigFlow, domain=DOMAIN):
- VERSION = 1
+ VERSION = 2
async def handle_provider(self, provider, configured_providers):
if provider in configured_providers:
@@ -267,7 +267,7 @@ async def async_step_localai(self, user_input=None):
try:
await validator.localai()
# add the mode to user_input
- return self.async_create_entry(title="LLM Vision LocalAI", data=user_input)
+ return self.async_create_entry(title=f"LocalAI ({user_input[CONF_LOCALAI_IP_ADDRESS]})", data=user_input)
except ServiceValidationError as e:
_LOGGER.error(f"Validation failed: {e}")
return self.async_show_form(
@@ -295,7 +295,7 @@ async def async_step_ollama(self, user_input=None):
try:
await validator.ollama()
# add the mode to user_input
- return self.async_create_entry(title="LLM Vision Ollama", data=user_input)
+ return self.async_create_entry(title=f"Ollama ({user_input[CONF_OLLAMA_IP_ADDRESS]})", data=user_input)
except ServiceValidationError as e:
_LOGGER.error(f"Validation failed: {e}")
return self.async_show_form(
@@ -322,7 +322,7 @@ async def async_step_openai(self, user_input=None):
await validator.openai()
# add the mode to user_input
user_input["provider"] = self.init_info["provider"]
- return self.async_create_entry(title="LLM Vision OpenAI", data=user_input)
+ return self.async_create_entry(title="OpenAI", data=user_input)
except ServiceValidationError as e:
_LOGGER.error(f"Validation failed: {e}")
return self.async_show_form(
@@ -349,7 +349,7 @@ async def async_step_anthropic(self, user_input=None):
await validator.anthropic()
# add the mode to user_input
user_input["provider"] = self.init_info["provider"]
- return self.async_create_entry(title="LLM Vision Anthropic", data=user_input)
+ return self.async_create_entry(title="Anthropic Claude", data=user_input)
except ServiceValidationError as e:
_LOGGER.error(f"Validation failed: {e}")
return self.async_show_form(
@@ -376,7 +376,7 @@ async def async_step_google(self, user_input=None):
await validator.google()
# add the mode to user_input
user_input["provider"] = self.init_info["provider"]
- return self.async_create_entry(title="LLM Vision Google", data=user_input)
+ return self.async_create_entry(title="Google Gemini", data=user_input)
except ServiceValidationError as e:
_LOGGER.error(f"Validation failed: {e}")
return self.async_show_form(
@@ -403,7 +403,7 @@ async def async_step_groq(self, user_input=None):
await validator.groq()
# add the mode to user_input
user_input["provider"] = self.init_info["provider"]
- return self.async_create_entry(title="LLM Vision Groq", data=user_input)
+ return self.async_create_entry(title="Groq", data=user_input)
except ServiceValidationError as e:
_LOGGER.error(f"Validation failed: {e}")
return self.async_show_form(
@@ -431,7 +431,7 @@ async def async_step_custom_openai(self, user_input=None):
await validator.custom_openai()
# add the mode to user_input
user_input["provider"] = self.init_info["provider"]
- return self.async_create_entry(title="LLM Vision Custom OpenAI", data=user_input)
+ return self.async_create_entry(title="Custom OpenAI compatible Provider", data=user_input)
except ServiceValidationError as e:
_LOGGER.error(f"Validation failed: {e}")
return self.async_show_form(
diff --git a/custom_components/llmvision/const.py b/custom_components/llmvision/const.py
index 16fa5a5..012b33f 100644
--- a/custom_components/llmvision/const.py
+++ b/custom_components/llmvision/const.py
@@ -28,20 +28,22 @@
VIDEO_FILE = 'video_file'
EVENT_ID = 'event_id'
INTERVAL = 'interval'
+DURATION = 'duration'
DETAIL = 'detail'
TEMPERATURE = 'temperature'
INCLUDE_FILENAME = 'include_filename'
# Error messages
-ERROR_OPENAI_NOT_CONFIGURED = "OpenAI provider is not configured"
-ERROR_ANTHROPIC_NOT_CONFIGURED = "Anthropic provider is not configured"
-ERROR_GOOGLE_NOT_CONFIGURED = "Google provider is not configured"
-ERROR_GROQ_NOT_CONFIGURED = "Groq provider is not configured"
-ERROR_LOCALAI_NOT_CONFIGURED = "LocalAI provider is not configured"
-ERROR_OLLAMA_NOT_CONFIGURED = "Ollama provider is not configured"
+ERROR_OPENAI_NOT_CONFIGURED = "OpenAI is not configured"
+ERROR_ANTHROPIC_NOT_CONFIGURED = "Anthropic is not configured"
+ERROR_GOOGLE_NOT_CONFIGURED = "Google is not configured"
+ERROR_GROQ_NOT_CONFIGURED = "Groq is not configured"
+ERROR_GROQ_MULTIPLE_IMAGES = "Groq does not support videos or streams"
+ERROR_LOCALAI_NOT_CONFIGURED = "LocalAI is not configured"
+ERROR_OLLAMA_NOT_CONFIGURED = "Ollama is not configured"
ERROR_CUSTOM_OPENAI_NOT_CONFIGURED = "Custom OpenAI provider is not configured"
ERROR_NO_IMAGE_INPUT = "No image input provided"
-ERROR_HANDSHAKE_FAILED = "Handshake with LocalAI server failed"
+ERROR_HANDSHAKE_FAILED = "Connection could not be established"
# Versions
# https://docs.anthropic.com/en/api/versioning
diff --git a/custom_components/llmvision/icons.json b/custom_components/llmvision/icons.json
index 161a690..25b446c 100644
--- a/custom_components/llmvision/icons.json
+++ b/custom_components/llmvision/icons.json
@@ -1,6 +1,7 @@
{
"services": {
- "image_analyzer": "mdi:cube-scan",
- "video_analyzer": "mdi:cube-scan"
+ "image_analyzer": "mdi:image-search",
+ "video_analyzer": "mdi:movie-search",
+ "stream_analyzer": "mdi:video-wireless"
}
}
\ No newline at end of file
diff --git a/custom_components/llmvision/manifest.json b/custom_components/llmvision/manifest.json
index a86500b..638fe84 100644
--- a/custom_components/llmvision/manifest.json
+++ b/custom_components/llmvision/manifest.json
@@ -6,5 +6,5 @@
"documentation": "https://github.com/valentinfrlch/ha-llmvision",
"iot_class": "cloud_polling",
"issue_tracker": "https://github.com/valentinfrlch/ha-llmvision/issues",
- "version": "1.1.3"
+ "version": "1.2.0"
}
\ No newline at end of file
diff --git a/custom_components/llmvision/media_handlers.py b/custom_components/llmvision/media_handlers.py
index 5faff5c..80ddd93 100644
--- a/custom_components/llmvision/media_handlers.py
+++ b/custom_components/llmvision/media_handlers.py
@@ -30,6 +30,11 @@ def _save_clip(self, clip_data, clip_path):
with open(clip_path, "wb") as f:
f.write(clip_data)
+ def _convert_to_rgb(self, img):
+ if img.mode == 'RGBA' or img.format == 'GIF':
+ img = img.convert('RGB')
+ return img
+
async def resize_image(self, target_width, image_path=None, image_data=None, img=None):
"""Resize image to target_width"""
if image_path:
@@ -38,9 +43,7 @@ async def resize_image(self, target_width, image_path=None, image_data=None, img
with img:
# Check if the image is a GIF and convert if necessary
_LOGGER.debug(f"Image format: {img.format}")
- if img.format == 'GIF':
- # Convert GIF to RGB
- img = img.convert('RGB')
+ img = self._convert_to_rgb(img)
# calculate new height based on aspect ratio
width, height = img.size
aspect_ratio = width / height
@@ -60,9 +63,7 @@ async def resize_image(self, target_width, image_path=None, image_data=None, img
img = await self.hass.loop.run_in_executor(None, Image.open, img_byte_arr)
with img:
_LOGGER.debug(f"Image format: {img.format}")
- if img.format == 'GIF':
- # Convert GIF to RGB
- img = img.convert('RGB')
+ img = self._convert_to_rgb(img)
# calculate new height based on aspect ratio
width, height = img.size
aspect_ratio = width / height
@@ -74,6 +75,7 @@ async def resize_image(self, target_width, image_path=None, image_data=None, img
base64_image = await self._encode_image(img)
elif img:
with img:
+ img = self._convert_to_rgb(img)
# calculate new height based on aspect ratio
width, height = img.size
aspect_ratio = width / height
@@ -86,6 +88,54 @@ async def resize_image(self, target_width, image_path=None, image_data=None, img
return base64_image
+ async def record(self, image_entities, duration, interval, target_width, include_filename):
+ """Wrapper for client.add_frame with integrated recorder
+
+ Args:
+ image_entities (list[string]): List of camera entities to record
+ duration (float): Duration in seconds to record
+ target_width (int): Target width for the images in pixels
+ """
+ import time
+ import asyncio
+
+ camera_frames = {}
+
+ # Record on a separate thread for each camera
+ async def record_camera(image_entity, camera_number):
+ start = time.time()
+ frame_counter = 0
+ frames = {}
+ while time.time() - start < duration:
+ base_url = get_url(self.hass)
+ frame_url = base_url + \
+ self.hass.states.get(image_entity).attributes.get(
+ 'entity_picture')
+ frame_data = await self.client._fetch(frame_url)
+
+ # use either entity name or assign number to each camera
+ frames.update({image_entity.replace(
+ "camera.", "") + " frame " + str(frame_counter) if include_filename else "camera " + str(camera_number) + " frame " + str(frame_counter): frame_data})
+
+ frame_counter += 1
+
+ await asyncio.sleep(interval)
+ camera_frames.update({image_entity: frames})
+
+ _LOGGER.info(f"Recording {', '.join([entity.replace(
+ 'camera.', '') for entity in image_entities])} for {duration} seconds")
+
+ # start threads for each camera
+ await asyncio.gather(*(record_camera(image_entity, image_entities.index(image_entity)) for image_entity in image_entities))
+
+ # add frames to client
+ for frame in camera_frames:
+ for frame_name in camera_frames[frame]:
+ self.client.add_frame(
+ base64_image=await self.resize_image(target_width=target_width, image_data=camera_frames[frame][frame_name]),
+ filename=frame_name
+ )
+
async def add_images(self, image_entities, image_paths, target_width, include_filename):
"""Wrapper for client.add_frame for images"""
if image_entities:
@@ -98,19 +148,12 @@ async def add_images(self, image_entities, image_paths, target_width, include_fi
image_data = await self.client._fetch(image_url)
# If entity snapshot requested, use entity name as 'filename'
- if include_filename:
- entity_name = self.hass.states.get(
- image_entity).attributes.get('friendly_name')
+ self.client.add_frame(
+ base64_image=await self.resize_image(target_width=target_width, image_data=image_data),
+ filename=self.hass.states.get(
+ image_entity).attributes.get('friendly_name') if include_filename else ""
+ )
- self.client.add_frame(
- base64_image=await self.resize_image(target_width=target_width, image_data=image_data),
- filename=entity_name
- )
- else:
- self.client.add_frame(
- base64_image=await self.resize_image(target_width=target_width, image_data=image_data),
- filename=""
- )
except AttributeError as e:
raise ServiceValidationError(
f"Entity {image_entity} does not exist")
@@ -136,11 +179,11 @@ async def add_images(self, image_entities, image_paths, target_width, include_fi
return self.client
async def add_videos(self, video_paths, event_ids, interval, target_width, include_filename):
+ """Wrapper for client.add_frame for videos"""
tmp_clips_dir = f"/config/custom_components/{DOMAIN}/tmp_clips"
tmp_frames_dir = f"/config/custom_components/{DOMAIN}/tmp_frames"
if not video_paths:
video_paths = []
- """Wrapper for client.add_frame for videos"""
if event_ids:
for event_id in event_ids:
try:
@@ -151,14 +194,17 @@ async def add_videos(self, video_paths, event_ids, interval, target_width, inclu
os.makedirs(tmp_clips_dir, exist_ok=True)
_LOGGER.info(f"Created {tmp_clips_dir}")
# save clip to file with event_id as filename
- clip_path = os.path.join(tmp_clips_dir, event_id.split("-")[-1] + ".mp4")
+ clip_path = os.path.join(
+ tmp_clips_dir, event_id.split("-")[-1] + ".mp4")
await self.hass.loop.run_in_executor(None, self._save_clip, clip_data, clip_path)
- _LOGGER.info(f"Saved frigate clip to {clip_path} (temporarily)")
+ _LOGGER.info(
+ f"Saved frigate clip to {clip_path} (temporarily)")
# append to video_paths
video_paths.append(clip_path)
except AttributeError as e:
- raise ServiceValidationError(f"Failed to fetch frigate clip {event_id}: {e}")
+ raise ServiceValidationError(
+ f"Failed to fetch frigate clip {event_id}: {e}")
if video_paths:
_LOGGER.debug(f"Processing videos: {video_paths}")
for video_path in video_paths:
@@ -170,7 +216,8 @@ async def add_videos(self, video_paths, event_ids, interval, target_width, inclu
if os.path.exists(tmp_frames_dir):
_LOGGER.debug(f"Created {tmp_frames_dir}")
else:
- _LOGGER.error(f"Failed to create temp directory {tmp_frames_dir}")
+ _LOGGER.error(
+ f"Failed to create temp directory {tmp_frames_dir}")
ffmpeg_cmd = [
"ffmpeg",
@@ -185,7 +232,8 @@ async def add_videos(self, video_paths, event_ids, interval, target_width, inclu
for frame_file in await self.hass.loop.run_in_executor(None, os.listdir, tmp_frames_dir):
_LOGGER.debug(f"Adding frame {frame_file}")
frame_counter = 0
- frame_path = os.path.join(tmp_frames_dir, frame_file)
+ frame_path = os.path.join(
+ tmp_frames_dir, frame_file)
# Remove transparency for compatibility
with Image.open(frame_path) as img:
@@ -218,4 +266,9 @@ async def add_videos(self, video_paths, event_ids, interval, target_width, inclu
f"Deleted tmp folder: {tmp_frames_dir}")
except FileNotFoundError as e:
_LOGGER.error(f"Failed to delete tmp folders: {e}")
- return self.client
\ No newline at end of file
+ return self.client
+
+ async def add_streams(self, image_entities, duration, interval, target_width, include_filename):
+ if image_entities:
+ await self.record(image_entities, duration, interval, target_width, include_filename)
+ return self.client
diff --git a/custom_components/llmvision/request_handlers.py b/custom_components/llmvision/request_handlers.py
index c9a40e2..9418980 100644
--- a/custom_components/llmvision/request_handlers.py
+++ b/custom_components/llmvision/request_handlers.py
@@ -23,6 +23,7 @@
ERROR_ANTHROPIC_NOT_CONFIGURED,
ERROR_GOOGLE_NOT_CONFIGURED,
ERROR_GROQ_NOT_CONFIGURED,
+ ERROR_GROQ_MULTIPLE_IMAGES,
ERROR_LOCALAI_NOT_CONFIGURED,
ERROR_OLLAMA_NOT_CONFIGURED,
ERROR_NO_IMAGE_INPUT
@@ -30,6 +31,7 @@
_LOGGER = logging.getLogger(__name__)
+
def sanitize_data(data):
"""Remove long string data from request data to reduce log size"""
if isinstance(data, dict):
@@ -42,6 +44,45 @@ def sanitize_data(data):
return data
+def get_provider(hass, provider_uid):
+ """Translate the UID of the config entry into the provider name."""
+ _LOGGER.info(f"llmvision storage: {hass.data[DOMAIN]}")
+ if DOMAIN not in hass.data:
+ return None
+
+ entry_data = hass.data[DOMAIN].get(provider_uid)
+ if not entry_data:
+ return None
+
+ if CONF_OPENAI_API_KEY in entry_data:
+ return "OpenAI"
+ elif CONF_ANTHROPIC_API_KEY in entry_data:
+ return "Anthropic"
+ elif CONF_GOOGLE_API_KEY in entry_data:
+ return "Google"
+ elif CONF_GROQ_API_KEY in entry_data:
+ return "Groq"
+ elif CONF_LOCALAI_IP_ADDRESS in entry_data:
+ return "LocalAI"
+ elif CONF_OLLAMA_IP_ADDRESS in entry_data:
+ return "Ollama"
+ elif CONF_CUSTOM_OPENAI_API_KEY in entry_data:
+ return "Custom OpenAI"
+
+ return None
+
+
+default_model = lambda provider: {
+ "OpenAI": "gpt-4o-mini",
+ "Anthropic": "claude-3-5-sonnet-20240620",
+ "Google": "gemini-1.5-flash-latest",
+ "Groq": "llava-v1.5-7b-4096-preview",
+ "LocalAI": "gpt-4-vision-preview",
+ "Ollama": "llava-phi3:latest",
+ "Custom OpenAI": "gpt-4o-mini"
+}.get(provider, "gpt-4o-mini") # Default value if provider is not found
+
+
class RequestHandler:
def __init__(self, hass, message, max_tokens, temperature, detail):
self.session = async_get_clientsession(hass)
@@ -54,43 +95,52 @@ def __init__(self, hass, message, max_tokens, temperature, detail):
self.filenames = []
async def make_request(self, call):
- if call.provider == 'OpenAI':
- api_key = self.hass.data.get(DOMAIN).get(CONF_OPENAI_API_KEY)
- model = call.model
- self._validate_call(provider=call.provider,
+ entry_id = call.provider
+ provider = get_provider(self.hass, entry_id)
+ model = call.model if call.model != "None" else default_model(provider)
+ _LOGGER.info(f"Provider: {provider}")
+ _LOGGER.info(f"Model Default: {model}")
+ _LOGGER.info(f"Model: {call.model} tyle: {type(call.model)}")
+
+ if provider == 'OpenAI':
+ api_key = self.hass.data.get(DOMAIN).get(
+ entry_id).get(CONF_OPENAI_API_KEY)
+ self._validate_call(provider=provider,
api_key=api_key,
base64_images=self.base64_images)
response_text = await self.openai(model=model, api_key=api_key)
- elif call.provider == 'Anthropic':
- api_key = self.hass.data.get(DOMAIN).get(CONF_ANTHROPIC_API_KEY)
- model = call.model
- self._validate_call(provider=call.provider,
+ elif provider == 'Anthropic':
+ api_key = self.hass.data.get(DOMAIN).get(
+ entry_id).get(CONF_ANTHROPIC_API_KEY)
+ self._validate_call(provider=provider,
api_key=api_key,
base64_images=self.base64_images)
response_text = await self.anthropic(model=model, api_key=api_key)
- elif call.provider == 'Google':
- api_key = self.hass.data.get(DOMAIN).get(CONF_GOOGLE_API_KEY)
- model = call.model
- self._validate_call(provider=call.provider,
+ elif provider == 'Google':
+ api_key = self.hass.data.get(DOMAIN).get(
+ entry_id).get(CONF_GOOGLE_API_KEY)
+ self._validate_call(provider=provider,
api_key=api_key,
base64_images=self.base64_images)
response_text = await self.google(model=model, api_key=api_key)
- elif call.provider == 'Groq':
- api_key = self.hass.data.get(DOMAIN).get(CONF_GROQ_API_KEY)
- model = call.model
- self._validate_call(provider=call.provider,
+ elif provider == 'Groq':
+ api_key = self.hass.data.get(DOMAIN).get(
+ entry_id).get(CONF_GROQ_API_KEY)
+ self._validate_call(provider=provider,
api_key=api_key,
base64_images=self.base64_images)
response_text = await self.groq(model=model, api_key=api_key)
- elif call.provider == 'LocalAI':
+ elif provider == 'LocalAI':
ip_address = self.hass.data.get(
- DOMAIN, {}).get(CONF_LOCALAI_IP_ADDRESS)
+ DOMAIN).get(
+ entry_id).get(CONF_LOCALAI_IP_ADDRESS)
port = self.hass.data.get(
- DOMAIN, {}).get(CONF_LOCALAI_PORT)
+ DOMAIN).get(
+ entry_id).get(CONF_LOCALAI_PORT)
https = self.hass.data.get(
- DOMAIN, {}).get(CONF_LOCALAI_HTTPS, False)
- model = call.model
- self._validate_call(provider=call.provider,
+ DOMAIN).get(
+ entry_id).get(CONF_LOCALAI_HTTPS, False)
+ self._validate_call(provider=provider,
api_key=None,
base64_images=self.base64_images,
ip_address=ip_address,
@@ -99,14 +149,16 @@ async def make_request(self, call):
ip_address=ip_address,
port=port,
https=https)
- elif call.provider == 'Ollama':
+ elif provider == 'Ollama':
ip_address = self.hass.data.get(
- DOMAIN, {}).get(CONF_OLLAMA_IP_ADDRESS)
- port = self.hass.data.get(DOMAIN, {}).get(CONF_OLLAMA_PORT)
- https = self.hass.data.get(DOMAIN, {}).get(
+ DOMAIN).get(
+ entry_id).get(CONF_OLLAMA_IP_ADDRESS)
+ port = self.hass.data.get(DOMAIN).get(
+ entry_id).get(CONF_OLLAMA_PORT)
+ https = self.hass.data.get(DOMAIN).get(
+ entry_id).get(
CONF_OLLAMA_HTTPS, False)
- model = call.model
- self._validate_call(provider=call.provider,
+ self._validate_call(provider=provider,
api_key=None,
base64_images=self.base64_images,
ip_address=ip_address,
@@ -115,14 +167,14 @@ async def make_request(self, call):
ip_address=ip_address,
port=port,
https=https)
- elif call.provider == 'Custom OpenAI':
+ elif provider == 'Custom OpenAI':
api_key = self.hass.data.get(DOMAIN).get(
+ entry_id).get(
CONF_CUSTOM_OPENAI_API_KEY, "")
endpoint = self.hass.data.get(DOMAIN).get(
+ entry_id).get(
CONF_CUSTOM_OPENAI_ENDPOINT)
-
- model = call.model
- self._validate_call(provider=call.provider,
+ self._validate_call(provider=provider,
api_key=api_key,
base64_images=self.base64_images)
response_text = await self.openai(model=model, api_key=api_key, endpoint=endpoint)
@@ -399,6 +451,8 @@ def _validate_call(self, provider, api_key, base64_images, ip_address=None, port
elif provider == 'Groq':
if not api_key:
raise ServiceValidationError(ERROR_GROQ_NOT_CONFIGURED)
+ if len(base64_images) > 1:
+ raise ServiceValidationError(ERROR_GROQ_MULTIPLE_IMAGES)
# Checks for LocalAI
elif provider == 'LocalAI':
if not ip_address or not port:
diff --git a/custom_components/llmvision/services.yaml b/custom_components/llmvision/services.yaml
index f0e5eed..1f80b2d 100644
--- a/custom_components/llmvision/services.yaml
+++ b/custom_components/llmvision/services.yaml
@@ -4,33 +4,24 @@ image_analyzer:
fields:
provider:
name: Provider
- description: 'Provider to use'
+ description: 'Configuration to use'
required: true
- default: 'OpenAI'
selector:
- select:
- options:
- - 'OpenAI'
- - 'Anthropic'
- - 'Google'
- - 'Groq'
- - 'Ollama'
- - 'LocalAI'
- - 'Custom OpenAI'
+ config_entry:
+ integration: llmvision
model:
name: Model
required: false
- description: 'Model to use'
+ description: 'Model to use. Uncheck for default.'
example: "gpt-4o-mini"
- default: "gpt-4o-mini"
selector:
text:
multiline: false
message:
name: Prompt
required: true
- description: 'Prompt'
- example: "Describe the person present in the photo"
+ description: 'Model prompt'
+ example: "Describe the image"
selector:
text:
multiline: true
@@ -38,7 +29,7 @@ image_analyzer:
name: Image File
required: false
description: 'Local path to image'
- example: "/config/www/images/img.jpg"
+ example: "/config/www/tmp/front_door.jpg"
selector:
text:
multiline: true
@@ -53,25 +44,26 @@ image_analyzer:
multiple: true
include_filename:
name: Include Filename
- required: false
+ required: true
description: 'Include filename in the request'
+ example: false
default: false
selector:
boolean:
target_width:
name: Target Width
required: false
- description: 'Width in pixels to downscale to'
+ description: 'Width in pixels to downscale'
example: 1280
default: 1280
selector:
number:
min: 512
- max: 3840
+ max: 1920
detail:
name: Detail
required: false
- description: "Detail parameter, leave empty for 'auto'"
+ description: "Detail parameter. Leave empty for 'auto'"
default: 'high'
selector:
select:
@@ -86,17 +78,17 @@ image_analyzer:
default: 100
selector:
number:
- min: 10
- max: 1000
+ min: 1
+ max: 300
temperature:
name: Temperature
required: true
description: 'Randomness. Lower is more accurate, higher is more creative'
- example: 0.5
- default: 0.5
+ example: 0.2
+ default: 0.2
selector:
number:
- min: 0.0
+ min: 0.1
max: 1.0
step: 0.1
@@ -106,31 +98,24 @@ video_analyzer:
fields:
provider:
name: Provider
- description: 'Provider to use'
+ description: 'Configuration to use'
required: true
- default: 'OpenAI'
selector:
- select:
- options:
- - 'OpenAI'
- - 'Anthropic'
- - 'Google'
- - 'Ollama'
- - 'LocalAI'
+ config_entry:
+ integration: llmvision
model:
name: Model
required: false
- description: 'Model to use'
+ description: 'Model to use. Uncheck for default.'
example: "gpt-4o-mini"
- default: "gpt-4o-mini"
selector:
text:
multiline: false
message:
name: Prompt
required: true
- description: 'Prompt'
- example: "Describe what's going on in the video"
+ description: 'Model prompt'
+ example: "Describe what happens in the video"
selector:
text:
multiline: true
@@ -162,21 +147,22 @@ video_analyzer:
max: 60
include_filename:
name: Include Filename
- required: false
+ required: true
description: 'Include filename in the request'
+ example: false
default: false
selector:
boolean:
target_width:
name: Target Width
required: false
- description: 'Width in pixels to downscale to'
+ description: 'Width in pixels to downscale'
example: 1280
default: 1280
selector:
number:
min: 512
- max: 3840
+ max: 1920
detail:
name: Detail
required: false
@@ -195,16 +181,122 @@ video_analyzer:
default: 100
selector:
number:
- min: 10
- max: 1000
+ min: 1
+ max: 300
temperature:
name: Temperature
required: true
description: 'Randomness. Lower is more accurate, higher is more creative'
- example: 0.5
- default: 0.5
+ example: 0.2
+ default: 0.2
selector:
number:
min: 0.0
max: 1.0
+ step: 0.1
+
+stream_analyzer:
+ name: Stream Analyzer
+ description: Analyze a live camera stream with AI
+ fields:
+ provider:
+ name: Provider
+ description: 'Configuration to use'
+ required: true
+ selector:
+ config_entry:
+ integration: llmvision
+ model:
+ name: Model
+ required: false
+ description: 'Model to use. Uncheck for default.'
+ example: "gpt-4o-mini"
+ selector:
+ text:
+ multiline: false
+ message:
+ name: Prompt
+ required: true
+ description: 'Model prompt'
+ example: "Describe what happens in the camera feed"
+ selector:
+ text:
+ multiline: true
+ image_entity:
+ name: Camera Entity
+ required: true
+ description: 'Camera entity to stream'
+ example: 'camera.front_door'
+ selector:
+ entity:
+ domain: ["camera"]
+ multiple: true
+ interval:
+ name: Interval
+ description: Analyze frames every seconds
+ required: true
+ example: 2
+ default: 2
+ selector:
+ number:
+ min: 1
+ max: 10
+ duration:
+ name: Recording Duration
+ required: true
+ description: 'How long to record in seconds'
+ example: 5
+ default: 5
+ selector:
+ number:
+ min: 0
+ max: 60
+ include_filename:
+ name: Include camera name
+ required: true
+ description: 'Include camera name in request'
+ example: false
+ default: false
+ selector:
+ boolean:
+ target_width:
+ name: Target Width
+ required: false
+ description: 'Width in pixels to downscale'
+ example: 1280
+ default: 1280
+ selector:
+ number:
+ min: 512
+ max: 1920
+ detail:
+ name: Detail
+ required: false
+ description: "Detail parameter, leave empty for 'auto'"
+ default: 'high'
+ selector:
+ select:
+ options:
+ - 'high'
+ - 'low'
+ max_tokens:
+ name: Maximum Tokens
+ description: 'Maximum number of tokens to generate'
+ required: true
+ example: 100
+ default: 100
+ selector:
+ number:
+ min: 1
+ max: 300
+ temperature:
+ name: Temperature
+ required: true
+ description: 'Randomness. Lower is more accurate, higher is more creative'
+ example: 0.2
+ default: 0.2
+ selector:
+ number:
+ min: 0.1
+ max: 1.0
step: 0.1
\ No newline at end of file