diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a05a5ee --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.DS_Store +playground.py +.gitignore diff --git a/README.md b/README.md index d1658b4..a9bf30b 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@

- + Issues diff --git a/custom_components/llmvision/__init__.py b/custom_components/llmvision/__init__.py index f4857ff..57e420e 100644 --- a/custom_components/llmvision/__init__.py +++ b/custom_components/llmvision/__init__.py @@ -23,16 +23,25 @@ VIDEO_FILE, EVENT_ID, INTERVAL, + DURATION, TEMPERATURE, DETAIL, INCLUDE_FILENAME ) +from homeassistant.config_entries import ConfigEntry from .request_handlers import RequestHandler from .media_handlers import MediaProcessor from homeassistant.core import SupportsResponse +import logging + +_LOGGER = logging.getLogger(__name__) + async def async_setup_entry(hass, entry): - """Save config entry to hass.data""" + """Save config entry to hass.data with the same unique identifier as the config entry""" + # Use the entry_id from the config entry as the UID + entry_uid = entry.entry_id + # Get all entries from config flow openai_api_key = entry.data.get(CONF_OPENAI_API_KEY) anthropic_api_key = entry.data.get(CONF_ANTHROPIC_API_KEY) @@ -51,34 +60,65 @@ async def async_setup_entry(hass, entry): if DOMAIN not in hass.data: hass.data[DOMAIN] = {} - # Merge the new data with the existing data - hass.data[DOMAIN].update({ - key: value - for key, value in { - CONF_OPENAI_API_KEY: openai_api_key, - CONF_ANTHROPIC_API_KEY: anthropic_api_key, - CONF_GOOGLE_API_KEY: google_api_key, - CONF_GROQ_API_KEY: groq_api_key, - CONF_LOCALAI_IP_ADDRESS: localai_ip_address, - CONF_LOCALAI_PORT: localai_port, - CONF_LOCALAI_HTTPS: localai_https, - CONF_OLLAMA_IP_ADDRESS: ollama_ip_address, - CONF_OLLAMA_PORT: ollama_port, - CONF_OLLAMA_HTTPS: ollama_https, - CONF_CUSTOM_OPENAI_ENDPOINT: custom_openai_endpoint, - CONF_CUSTOM_OPENAI_API_KEY: custom_openai_api_key - }.items() - if value is not None - }) + # Create a dictionary for the entry data + entry_data = { + CONF_OPENAI_API_KEY: openai_api_key, + CONF_ANTHROPIC_API_KEY: anthropic_api_key, + CONF_GOOGLE_API_KEY: google_api_key, + CONF_GROQ_API_KEY: groq_api_key, + CONF_LOCALAI_IP_ADDRESS: localai_ip_address, + CONF_LOCALAI_PORT: localai_port, + CONF_LOCALAI_HTTPS: localai_https, + CONF_OLLAMA_IP_ADDRESS: ollama_ip_address, + CONF_OLLAMA_PORT: ollama_port, + CONF_OLLAMA_HTTPS: ollama_https, + CONF_CUSTOM_OPENAI_ENDPOINT: custom_openai_endpoint, + CONF_CUSTOM_OPENAI_API_KEY: custom_openai_api_key + } + + # Filter out None values + filtered_entry_data = {key: value for key, + value in entry_data.items() if value is not None} + + # Store the filtered entry data under the entry_id + hass.data[DOMAIN][entry_uid] = filtered_entry_data return True +async def async_remove_entry(hass, entry): + """Remove config entry from hass.data""" + # Use the entry_id from the config entry as the UID + entry_uid = entry.entry_id + + if entry_uid in hass.data[DOMAIN]: + # Remove the entry from hass.data + _LOGGER.info(f"Removing {entry.title} from hass.data") + hass.data[DOMAIN].pop(entry_uid) + else: + _LOGGER.warning( + f"Entry {entry.title} not found but was requested to be removed") + + return True + + +async def async_unload_entry(hass, entry) -> bool: return True + + +async def async_migrate_entry(hass, config_entry: ConfigEntry) -> bool: + if DOMAIN not in hass.data: + return True + else: + return False + + class ServiceCallData: """Store service call data and set default values""" + def __init__(self, data_call): self.provider = str(data_call.data.get(PROVIDER)) - self.model = str(data_call.data.get(MODEL, self._default_model(self.provider))) + self.model = str(data_call.data.get( + MODEL)) self.message = str(data_call.data.get(MESSAGE)[0:2000]) self.image_paths = data_call.data.get(IMAGE_FILE, "").split( "\n") if data_call.data.get(IMAGE_FILE) else None @@ -88,37 +128,22 @@ def __init__(self, data_call): self.event_id = data_call.data.get(EVENT_ID, "").split( "\n") if data_call.data.get(EVENT_ID) else None self.interval = int(data_call.data.get(INTERVAL, 3)) + self.duration = int(data_call.data.get(DURATION, 10)) self.target_width = data_call.data.get(TARGET_WIDTH, 1280) - self.temperature = float(data_call.data.get(TEMPERATURE, 0.5)) + self.temperature = float(data_call.data.get(TEMPERATURE, 0.3)) self.max_tokens = int(data_call.data.get(MAXTOKENS, 100)) self.detail = str(data_call.data.get(DETAIL, "auto")) self.include_filename = data_call.data.get(INCLUDE_FILENAME, False) def get_service_call_data(self): return self - - def _default_model(self, provider): - if provider == "OpenAI": - return "gpt-4o-mini" - elif provider == "Anthropic": - return "claude-3-5-sonnet-20240620" - elif provider == "Google": - return "gemini-1.5-flash-latest" - elif provider == "Groq": - return "llava-v1.5-7b-4096-preview" - elif provider == "LocalAI": - return "gpt-4-vision-preview" - elif provider == "Ollama": - return "llava-phi3:latest" - elif provider == "Custom OpenAI": - return "gpt-4o-mini" def setup(hass, config): async def image_analyzer(data_call): """Handle the service call to analyze an image with LLM Vision""" - - # Initialize call objecto with service call data + + # Initialize call object with service call data call = ServiceCallData(data_call).get_service_call_data() # Initialize the RequestHandler client client = RequestHandler(hass, @@ -130,7 +155,11 @@ async def image_analyzer(data_call): # Fetch and preprocess images processor = MediaProcessor(hass, client) # Send images to RequestHandler client - client = await processor.add_images(call.image_entities, call.image_paths, call.target_width, call.include_filename) + client = await processor.add_images(image_entities=call.image_entities, + image_paths=call.image_paths, + target_width=call.target_width, + include_filename=call.include_filename + ) # Validate configuration, input data and make the call response = await client.make_request(call) @@ -146,10 +175,35 @@ async def video_analyzer(data_call): temperature=call.temperature, detail=call.detail) processor = MediaProcessor(hass, client) - client = await processor.add_videos(call.video_paths, call.event_id, call.interval, call.target_width, call.include_filename) + client = await processor.add_videos(video_paths=call.video_paths, + event_ids=call.event_id, + interval=call.interval, + target_width=call.target_width, + include_filename=call.include_filename + ) response = await client.make_request(call) return response + async def stream_analyzer(data_call): + """Handle the service call to analyze a stream (future implementation)""" + call = ServiceCallData(data_call).get_service_call_data() + call.message = "The attached images are frames from a live camera feed. " + call.message + client = RequestHandler(hass, + message=call.message, + max_tokens=call.max_tokens, + temperature=call.temperature, + detail=call.detail) + processor = MediaProcessor(hass, client) + client = await processor.add_streams(image_entities=call.image_entities, + duration=call.duration, + interval=call.interval, + target_width=call.target_width, + include_filename=call.include_filename + ) + response = await client.make_request(call) + return response + + # Register services hass.services.register( DOMAIN, "image_analyzer", image_analyzer, supports_response=SupportsResponse.ONLY @@ -158,6 +212,9 @@ async def video_analyzer(data_call): DOMAIN, "video_analyzer", video_analyzer, supports_response=SupportsResponse.ONLY ) + hass.services.register( + DOMAIN, "stream_analyzer", stream_analyzer, + supports_response=SupportsResponse.ONLY + ) return True - \ No newline at end of file diff --git a/custom_components/llmvision/config_flow.py b/custom_components/llmvision/config_flow.py index 75ad814..ecca4e3 100644 --- a/custom_components/llmvision/config_flow.py +++ b/custom_components/llmvision/config_flow.py @@ -202,7 +202,7 @@ def get_configured_providers(self): class llmvisionConfigFlow(config_entries.ConfigFlow, domain=DOMAIN): - VERSION = 1 + VERSION = 2 async def handle_provider(self, provider, configured_providers): if provider in configured_providers: @@ -267,7 +267,7 @@ async def async_step_localai(self, user_input=None): try: await validator.localai() # add the mode to user_input - return self.async_create_entry(title="LLM Vision LocalAI", data=user_input) + return self.async_create_entry(title=f"LocalAI ({user_input[CONF_LOCALAI_IP_ADDRESS]})", data=user_input) except ServiceValidationError as e: _LOGGER.error(f"Validation failed: {e}") return self.async_show_form( @@ -295,7 +295,7 @@ async def async_step_ollama(self, user_input=None): try: await validator.ollama() # add the mode to user_input - return self.async_create_entry(title="LLM Vision Ollama", data=user_input) + return self.async_create_entry(title=f"Ollama ({user_input[CONF_OLLAMA_IP_ADDRESS]})", data=user_input) except ServiceValidationError as e: _LOGGER.error(f"Validation failed: {e}") return self.async_show_form( @@ -322,7 +322,7 @@ async def async_step_openai(self, user_input=None): await validator.openai() # add the mode to user_input user_input["provider"] = self.init_info["provider"] - return self.async_create_entry(title="LLM Vision OpenAI", data=user_input) + return self.async_create_entry(title="OpenAI", data=user_input) except ServiceValidationError as e: _LOGGER.error(f"Validation failed: {e}") return self.async_show_form( @@ -349,7 +349,7 @@ async def async_step_anthropic(self, user_input=None): await validator.anthropic() # add the mode to user_input user_input["provider"] = self.init_info["provider"] - return self.async_create_entry(title="LLM Vision Anthropic", data=user_input) + return self.async_create_entry(title="Anthropic Claude", data=user_input) except ServiceValidationError as e: _LOGGER.error(f"Validation failed: {e}") return self.async_show_form( @@ -376,7 +376,7 @@ async def async_step_google(self, user_input=None): await validator.google() # add the mode to user_input user_input["provider"] = self.init_info["provider"] - return self.async_create_entry(title="LLM Vision Google", data=user_input) + return self.async_create_entry(title="Google Gemini", data=user_input) except ServiceValidationError as e: _LOGGER.error(f"Validation failed: {e}") return self.async_show_form( @@ -403,7 +403,7 @@ async def async_step_groq(self, user_input=None): await validator.groq() # add the mode to user_input user_input["provider"] = self.init_info["provider"] - return self.async_create_entry(title="LLM Vision Groq", data=user_input) + return self.async_create_entry(title="Groq", data=user_input) except ServiceValidationError as e: _LOGGER.error(f"Validation failed: {e}") return self.async_show_form( @@ -431,7 +431,7 @@ async def async_step_custom_openai(self, user_input=None): await validator.custom_openai() # add the mode to user_input user_input["provider"] = self.init_info["provider"] - return self.async_create_entry(title="LLM Vision Custom OpenAI", data=user_input) + return self.async_create_entry(title="Custom OpenAI compatible Provider", data=user_input) except ServiceValidationError as e: _LOGGER.error(f"Validation failed: {e}") return self.async_show_form( diff --git a/custom_components/llmvision/const.py b/custom_components/llmvision/const.py index 16fa5a5..012b33f 100644 --- a/custom_components/llmvision/const.py +++ b/custom_components/llmvision/const.py @@ -28,20 +28,22 @@ VIDEO_FILE = 'video_file' EVENT_ID = 'event_id' INTERVAL = 'interval' +DURATION = 'duration' DETAIL = 'detail' TEMPERATURE = 'temperature' INCLUDE_FILENAME = 'include_filename' # Error messages -ERROR_OPENAI_NOT_CONFIGURED = "OpenAI provider is not configured" -ERROR_ANTHROPIC_NOT_CONFIGURED = "Anthropic provider is not configured" -ERROR_GOOGLE_NOT_CONFIGURED = "Google provider is not configured" -ERROR_GROQ_NOT_CONFIGURED = "Groq provider is not configured" -ERROR_LOCALAI_NOT_CONFIGURED = "LocalAI provider is not configured" -ERROR_OLLAMA_NOT_CONFIGURED = "Ollama provider is not configured" +ERROR_OPENAI_NOT_CONFIGURED = "OpenAI is not configured" +ERROR_ANTHROPIC_NOT_CONFIGURED = "Anthropic is not configured" +ERROR_GOOGLE_NOT_CONFIGURED = "Google is not configured" +ERROR_GROQ_NOT_CONFIGURED = "Groq is not configured" +ERROR_GROQ_MULTIPLE_IMAGES = "Groq does not support videos or streams" +ERROR_LOCALAI_NOT_CONFIGURED = "LocalAI is not configured" +ERROR_OLLAMA_NOT_CONFIGURED = "Ollama is not configured" ERROR_CUSTOM_OPENAI_NOT_CONFIGURED = "Custom OpenAI provider is not configured" ERROR_NO_IMAGE_INPUT = "No image input provided" -ERROR_HANDSHAKE_FAILED = "Handshake with LocalAI server failed" +ERROR_HANDSHAKE_FAILED = "Connection could not be established" # Versions # https://docs.anthropic.com/en/api/versioning diff --git a/custom_components/llmvision/icons.json b/custom_components/llmvision/icons.json index 161a690..25b446c 100644 --- a/custom_components/llmvision/icons.json +++ b/custom_components/llmvision/icons.json @@ -1,6 +1,7 @@ { "services": { - "image_analyzer": "mdi:cube-scan", - "video_analyzer": "mdi:cube-scan" + "image_analyzer": "mdi:image-search", + "video_analyzer": "mdi:movie-search", + "stream_analyzer": "mdi:video-wireless" } } \ No newline at end of file diff --git a/custom_components/llmvision/manifest.json b/custom_components/llmvision/manifest.json index a86500b..638fe84 100644 --- a/custom_components/llmvision/manifest.json +++ b/custom_components/llmvision/manifest.json @@ -6,5 +6,5 @@ "documentation": "https://github.com/valentinfrlch/ha-llmvision", "iot_class": "cloud_polling", "issue_tracker": "https://github.com/valentinfrlch/ha-llmvision/issues", - "version": "1.1.3" + "version": "1.2.0" } \ No newline at end of file diff --git a/custom_components/llmvision/media_handlers.py b/custom_components/llmvision/media_handlers.py index 5faff5c..80ddd93 100644 --- a/custom_components/llmvision/media_handlers.py +++ b/custom_components/llmvision/media_handlers.py @@ -30,6 +30,11 @@ def _save_clip(self, clip_data, clip_path): with open(clip_path, "wb") as f: f.write(clip_data) + def _convert_to_rgb(self, img): + if img.mode == 'RGBA' or img.format == 'GIF': + img = img.convert('RGB') + return img + async def resize_image(self, target_width, image_path=None, image_data=None, img=None): """Resize image to target_width""" if image_path: @@ -38,9 +43,7 @@ async def resize_image(self, target_width, image_path=None, image_data=None, img with img: # Check if the image is a GIF and convert if necessary _LOGGER.debug(f"Image format: {img.format}") - if img.format == 'GIF': - # Convert GIF to RGB - img = img.convert('RGB') + img = self._convert_to_rgb(img) # calculate new height based on aspect ratio width, height = img.size aspect_ratio = width / height @@ -60,9 +63,7 @@ async def resize_image(self, target_width, image_path=None, image_data=None, img img = await self.hass.loop.run_in_executor(None, Image.open, img_byte_arr) with img: _LOGGER.debug(f"Image format: {img.format}") - if img.format == 'GIF': - # Convert GIF to RGB - img = img.convert('RGB') + img = self._convert_to_rgb(img) # calculate new height based on aspect ratio width, height = img.size aspect_ratio = width / height @@ -74,6 +75,7 @@ async def resize_image(self, target_width, image_path=None, image_data=None, img base64_image = await self._encode_image(img) elif img: with img: + img = self._convert_to_rgb(img) # calculate new height based on aspect ratio width, height = img.size aspect_ratio = width / height @@ -86,6 +88,54 @@ async def resize_image(self, target_width, image_path=None, image_data=None, img return base64_image + async def record(self, image_entities, duration, interval, target_width, include_filename): + """Wrapper for client.add_frame with integrated recorder + + Args: + image_entities (list[string]): List of camera entities to record + duration (float): Duration in seconds to record + target_width (int): Target width for the images in pixels + """ + import time + import asyncio + + camera_frames = {} + + # Record on a separate thread for each camera + async def record_camera(image_entity, camera_number): + start = time.time() + frame_counter = 0 + frames = {} + while time.time() - start < duration: + base_url = get_url(self.hass) + frame_url = base_url + \ + self.hass.states.get(image_entity).attributes.get( + 'entity_picture') + frame_data = await self.client._fetch(frame_url) + + # use either entity name or assign number to each camera + frames.update({image_entity.replace( + "camera.", "") + " frame " + str(frame_counter) if include_filename else "camera " + str(camera_number) + " frame " + str(frame_counter): frame_data}) + + frame_counter += 1 + + await asyncio.sleep(interval) + camera_frames.update({image_entity: frames}) + + _LOGGER.info(f"Recording {', '.join([entity.replace( + 'camera.', '') for entity in image_entities])} for {duration} seconds") + + # start threads for each camera + await asyncio.gather(*(record_camera(image_entity, image_entities.index(image_entity)) for image_entity in image_entities)) + + # add frames to client + for frame in camera_frames: + for frame_name in camera_frames[frame]: + self.client.add_frame( + base64_image=await self.resize_image(target_width=target_width, image_data=camera_frames[frame][frame_name]), + filename=frame_name + ) + async def add_images(self, image_entities, image_paths, target_width, include_filename): """Wrapper for client.add_frame for images""" if image_entities: @@ -98,19 +148,12 @@ async def add_images(self, image_entities, image_paths, target_width, include_fi image_data = await self.client._fetch(image_url) # If entity snapshot requested, use entity name as 'filename' - if include_filename: - entity_name = self.hass.states.get( - image_entity).attributes.get('friendly_name') + self.client.add_frame( + base64_image=await self.resize_image(target_width=target_width, image_data=image_data), + filename=self.hass.states.get( + image_entity).attributes.get('friendly_name') if include_filename else "" + ) - self.client.add_frame( - base64_image=await self.resize_image(target_width=target_width, image_data=image_data), - filename=entity_name - ) - else: - self.client.add_frame( - base64_image=await self.resize_image(target_width=target_width, image_data=image_data), - filename="" - ) except AttributeError as e: raise ServiceValidationError( f"Entity {image_entity} does not exist") @@ -136,11 +179,11 @@ async def add_images(self, image_entities, image_paths, target_width, include_fi return self.client async def add_videos(self, video_paths, event_ids, interval, target_width, include_filename): + """Wrapper for client.add_frame for videos""" tmp_clips_dir = f"/config/custom_components/{DOMAIN}/tmp_clips" tmp_frames_dir = f"/config/custom_components/{DOMAIN}/tmp_frames" if not video_paths: video_paths = [] - """Wrapper for client.add_frame for videos""" if event_ids: for event_id in event_ids: try: @@ -151,14 +194,17 @@ async def add_videos(self, video_paths, event_ids, interval, target_width, inclu os.makedirs(tmp_clips_dir, exist_ok=True) _LOGGER.info(f"Created {tmp_clips_dir}") # save clip to file with event_id as filename - clip_path = os.path.join(tmp_clips_dir, event_id.split("-")[-1] + ".mp4") + clip_path = os.path.join( + tmp_clips_dir, event_id.split("-")[-1] + ".mp4") await self.hass.loop.run_in_executor(None, self._save_clip, clip_data, clip_path) - _LOGGER.info(f"Saved frigate clip to {clip_path} (temporarily)") + _LOGGER.info( + f"Saved frigate clip to {clip_path} (temporarily)") # append to video_paths video_paths.append(clip_path) except AttributeError as e: - raise ServiceValidationError(f"Failed to fetch frigate clip {event_id}: {e}") + raise ServiceValidationError( + f"Failed to fetch frigate clip {event_id}: {e}") if video_paths: _LOGGER.debug(f"Processing videos: {video_paths}") for video_path in video_paths: @@ -170,7 +216,8 @@ async def add_videos(self, video_paths, event_ids, interval, target_width, inclu if os.path.exists(tmp_frames_dir): _LOGGER.debug(f"Created {tmp_frames_dir}") else: - _LOGGER.error(f"Failed to create temp directory {tmp_frames_dir}") + _LOGGER.error( + f"Failed to create temp directory {tmp_frames_dir}") ffmpeg_cmd = [ "ffmpeg", @@ -185,7 +232,8 @@ async def add_videos(self, video_paths, event_ids, interval, target_width, inclu for frame_file in await self.hass.loop.run_in_executor(None, os.listdir, tmp_frames_dir): _LOGGER.debug(f"Adding frame {frame_file}") frame_counter = 0 - frame_path = os.path.join(tmp_frames_dir, frame_file) + frame_path = os.path.join( + tmp_frames_dir, frame_file) # Remove transparency for compatibility with Image.open(frame_path) as img: @@ -218,4 +266,9 @@ async def add_videos(self, video_paths, event_ids, interval, target_width, inclu f"Deleted tmp folder: {tmp_frames_dir}") except FileNotFoundError as e: _LOGGER.error(f"Failed to delete tmp folders: {e}") - return self.client \ No newline at end of file + return self.client + + async def add_streams(self, image_entities, duration, interval, target_width, include_filename): + if image_entities: + await self.record(image_entities, duration, interval, target_width, include_filename) + return self.client diff --git a/custom_components/llmvision/request_handlers.py b/custom_components/llmvision/request_handlers.py index c9a40e2..9418980 100644 --- a/custom_components/llmvision/request_handlers.py +++ b/custom_components/llmvision/request_handlers.py @@ -23,6 +23,7 @@ ERROR_ANTHROPIC_NOT_CONFIGURED, ERROR_GOOGLE_NOT_CONFIGURED, ERROR_GROQ_NOT_CONFIGURED, + ERROR_GROQ_MULTIPLE_IMAGES, ERROR_LOCALAI_NOT_CONFIGURED, ERROR_OLLAMA_NOT_CONFIGURED, ERROR_NO_IMAGE_INPUT @@ -30,6 +31,7 @@ _LOGGER = logging.getLogger(__name__) + def sanitize_data(data): """Remove long string data from request data to reduce log size""" if isinstance(data, dict): @@ -42,6 +44,45 @@ def sanitize_data(data): return data +def get_provider(hass, provider_uid): + """Translate the UID of the config entry into the provider name.""" + _LOGGER.info(f"llmvision storage: {hass.data[DOMAIN]}") + if DOMAIN not in hass.data: + return None + + entry_data = hass.data[DOMAIN].get(provider_uid) + if not entry_data: + return None + + if CONF_OPENAI_API_KEY in entry_data: + return "OpenAI" + elif CONF_ANTHROPIC_API_KEY in entry_data: + return "Anthropic" + elif CONF_GOOGLE_API_KEY in entry_data: + return "Google" + elif CONF_GROQ_API_KEY in entry_data: + return "Groq" + elif CONF_LOCALAI_IP_ADDRESS in entry_data: + return "LocalAI" + elif CONF_OLLAMA_IP_ADDRESS in entry_data: + return "Ollama" + elif CONF_CUSTOM_OPENAI_API_KEY in entry_data: + return "Custom OpenAI" + + return None + + +default_model = lambda provider: { + "OpenAI": "gpt-4o-mini", + "Anthropic": "claude-3-5-sonnet-20240620", + "Google": "gemini-1.5-flash-latest", + "Groq": "llava-v1.5-7b-4096-preview", + "LocalAI": "gpt-4-vision-preview", + "Ollama": "llava-phi3:latest", + "Custom OpenAI": "gpt-4o-mini" +}.get(provider, "gpt-4o-mini") # Default value if provider is not found + + class RequestHandler: def __init__(self, hass, message, max_tokens, temperature, detail): self.session = async_get_clientsession(hass) @@ -54,43 +95,52 @@ def __init__(self, hass, message, max_tokens, temperature, detail): self.filenames = [] async def make_request(self, call): - if call.provider == 'OpenAI': - api_key = self.hass.data.get(DOMAIN).get(CONF_OPENAI_API_KEY) - model = call.model - self._validate_call(provider=call.provider, + entry_id = call.provider + provider = get_provider(self.hass, entry_id) + model = call.model if call.model != "None" else default_model(provider) + _LOGGER.info(f"Provider: {provider}") + _LOGGER.info(f"Model Default: {model}") + _LOGGER.info(f"Model: {call.model} tyle: {type(call.model)}") + + if provider == 'OpenAI': + api_key = self.hass.data.get(DOMAIN).get( + entry_id).get(CONF_OPENAI_API_KEY) + self._validate_call(provider=provider, api_key=api_key, base64_images=self.base64_images) response_text = await self.openai(model=model, api_key=api_key) - elif call.provider == 'Anthropic': - api_key = self.hass.data.get(DOMAIN).get(CONF_ANTHROPIC_API_KEY) - model = call.model - self._validate_call(provider=call.provider, + elif provider == 'Anthropic': + api_key = self.hass.data.get(DOMAIN).get( + entry_id).get(CONF_ANTHROPIC_API_KEY) + self._validate_call(provider=provider, api_key=api_key, base64_images=self.base64_images) response_text = await self.anthropic(model=model, api_key=api_key) - elif call.provider == 'Google': - api_key = self.hass.data.get(DOMAIN).get(CONF_GOOGLE_API_KEY) - model = call.model - self._validate_call(provider=call.provider, + elif provider == 'Google': + api_key = self.hass.data.get(DOMAIN).get( + entry_id).get(CONF_GOOGLE_API_KEY) + self._validate_call(provider=provider, api_key=api_key, base64_images=self.base64_images) response_text = await self.google(model=model, api_key=api_key) - elif call.provider == 'Groq': - api_key = self.hass.data.get(DOMAIN).get(CONF_GROQ_API_KEY) - model = call.model - self._validate_call(provider=call.provider, + elif provider == 'Groq': + api_key = self.hass.data.get(DOMAIN).get( + entry_id).get(CONF_GROQ_API_KEY) + self._validate_call(provider=provider, api_key=api_key, base64_images=self.base64_images) response_text = await self.groq(model=model, api_key=api_key) - elif call.provider == 'LocalAI': + elif provider == 'LocalAI': ip_address = self.hass.data.get( - DOMAIN, {}).get(CONF_LOCALAI_IP_ADDRESS) + DOMAIN).get( + entry_id).get(CONF_LOCALAI_IP_ADDRESS) port = self.hass.data.get( - DOMAIN, {}).get(CONF_LOCALAI_PORT) + DOMAIN).get( + entry_id).get(CONF_LOCALAI_PORT) https = self.hass.data.get( - DOMAIN, {}).get(CONF_LOCALAI_HTTPS, False) - model = call.model - self._validate_call(provider=call.provider, + DOMAIN).get( + entry_id).get(CONF_LOCALAI_HTTPS, False) + self._validate_call(provider=provider, api_key=None, base64_images=self.base64_images, ip_address=ip_address, @@ -99,14 +149,16 @@ async def make_request(self, call): ip_address=ip_address, port=port, https=https) - elif call.provider == 'Ollama': + elif provider == 'Ollama': ip_address = self.hass.data.get( - DOMAIN, {}).get(CONF_OLLAMA_IP_ADDRESS) - port = self.hass.data.get(DOMAIN, {}).get(CONF_OLLAMA_PORT) - https = self.hass.data.get(DOMAIN, {}).get( + DOMAIN).get( + entry_id).get(CONF_OLLAMA_IP_ADDRESS) + port = self.hass.data.get(DOMAIN).get( + entry_id).get(CONF_OLLAMA_PORT) + https = self.hass.data.get(DOMAIN).get( + entry_id).get( CONF_OLLAMA_HTTPS, False) - model = call.model - self._validate_call(provider=call.provider, + self._validate_call(provider=provider, api_key=None, base64_images=self.base64_images, ip_address=ip_address, @@ -115,14 +167,14 @@ async def make_request(self, call): ip_address=ip_address, port=port, https=https) - elif call.provider == 'Custom OpenAI': + elif provider == 'Custom OpenAI': api_key = self.hass.data.get(DOMAIN).get( + entry_id).get( CONF_CUSTOM_OPENAI_API_KEY, "") endpoint = self.hass.data.get(DOMAIN).get( + entry_id).get( CONF_CUSTOM_OPENAI_ENDPOINT) - - model = call.model - self._validate_call(provider=call.provider, + self._validate_call(provider=provider, api_key=api_key, base64_images=self.base64_images) response_text = await self.openai(model=model, api_key=api_key, endpoint=endpoint) @@ -399,6 +451,8 @@ def _validate_call(self, provider, api_key, base64_images, ip_address=None, port elif provider == 'Groq': if not api_key: raise ServiceValidationError(ERROR_GROQ_NOT_CONFIGURED) + if len(base64_images) > 1: + raise ServiceValidationError(ERROR_GROQ_MULTIPLE_IMAGES) # Checks for LocalAI elif provider == 'LocalAI': if not ip_address or not port: diff --git a/custom_components/llmvision/services.yaml b/custom_components/llmvision/services.yaml index f0e5eed..1f80b2d 100644 --- a/custom_components/llmvision/services.yaml +++ b/custom_components/llmvision/services.yaml @@ -4,33 +4,24 @@ image_analyzer: fields: provider: name: Provider - description: 'Provider to use' + description: 'Configuration to use' required: true - default: 'OpenAI' selector: - select: - options: - - 'OpenAI' - - 'Anthropic' - - 'Google' - - 'Groq' - - 'Ollama' - - 'LocalAI' - - 'Custom OpenAI' + config_entry: + integration: llmvision model: name: Model required: false - description: 'Model to use' + description: 'Model to use. Uncheck for default.' example: "gpt-4o-mini" - default: "gpt-4o-mini" selector: text: multiline: false message: name: Prompt required: true - description: 'Prompt' - example: "Describe the person present in the photo" + description: 'Model prompt' + example: "Describe the image" selector: text: multiline: true @@ -38,7 +29,7 @@ image_analyzer: name: Image File required: false description: 'Local path to image' - example: "/config/www/images/img.jpg" + example: "/config/www/tmp/front_door.jpg" selector: text: multiline: true @@ -53,25 +44,26 @@ image_analyzer: multiple: true include_filename: name: Include Filename - required: false + required: true description: 'Include filename in the request' + example: false default: false selector: boolean: target_width: name: Target Width required: false - description: 'Width in pixels to downscale to' + description: 'Width in pixels to downscale' example: 1280 default: 1280 selector: number: min: 512 - max: 3840 + max: 1920 detail: name: Detail required: false - description: "Detail parameter, leave empty for 'auto'" + description: "Detail parameter. Leave empty for 'auto'" default: 'high' selector: select: @@ -86,17 +78,17 @@ image_analyzer: default: 100 selector: number: - min: 10 - max: 1000 + min: 1 + max: 300 temperature: name: Temperature required: true description: 'Randomness. Lower is more accurate, higher is more creative' - example: 0.5 - default: 0.5 + example: 0.2 + default: 0.2 selector: number: - min: 0.0 + min: 0.1 max: 1.0 step: 0.1 @@ -106,31 +98,24 @@ video_analyzer: fields: provider: name: Provider - description: 'Provider to use' + description: 'Configuration to use' required: true - default: 'OpenAI' selector: - select: - options: - - 'OpenAI' - - 'Anthropic' - - 'Google' - - 'Ollama' - - 'LocalAI' + config_entry: + integration: llmvision model: name: Model required: false - description: 'Model to use' + description: 'Model to use. Uncheck for default.' example: "gpt-4o-mini" - default: "gpt-4o-mini" selector: text: multiline: false message: name: Prompt required: true - description: 'Prompt' - example: "Describe what's going on in the video" + description: 'Model prompt' + example: "Describe what happens in the video" selector: text: multiline: true @@ -162,21 +147,22 @@ video_analyzer: max: 60 include_filename: name: Include Filename - required: false + required: true description: 'Include filename in the request' + example: false default: false selector: boolean: target_width: name: Target Width required: false - description: 'Width in pixels to downscale to' + description: 'Width in pixels to downscale' example: 1280 default: 1280 selector: number: min: 512 - max: 3840 + max: 1920 detail: name: Detail required: false @@ -195,16 +181,122 @@ video_analyzer: default: 100 selector: number: - min: 10 - max: 1000 + min: 1 + max: 300 temperature: name: Temperature required: true description: 'Randomness. Lower is more accurate, higher is more creative' - example: 0.5 - default: 0.5 + example: 0.2 + default: 0.2 selector: number: min: 0.0 max: 1.0 + step: 0.1 + +stream_analyzer: + name: Stream Analyzer + description: Analyze a live camera stream with AI + fields: + provider: + name: Provider + description: 'Configuration to use' + required: true + selector: + config_entry: + integration: llmvision + model: + name: Model + required: false + description: 'Model to use. Uncheck for default.' + example: "gpt-4o-mini" + selector: + text: + multiline: false + message: + name: Prompt + required: true + description: 'Model prompt' + example: "Describe what happens in the camera feed" + selector: + text: + multiline: true + image_entity: + name: Camera Entity + required: true + description: 'Camera entity to stream' + example: 'camera.front_door' + selector: + entity: + domain: ["camera"] + multiple: true + interval: + name: Interval + description: Analyze frames every seconds + required: true + example: 2 + default: 2 + selector: + number: + min: 1 + max: 10 + duration: + name: Recording Duration + required: true + description: 'How long to record in seconds' + example: 5 + default: 5 + selector: + number: + min: 0 + max: 60 + include_filename: + name: Include camera name + required: true + description: 'Include camera name in request' + example: false + default: false + selector: + boolean: + target_width: + name: Target Width + required: false + description: 'Width in pixels to downscale' + example: 1280 + default: 1280 + selector: + number: + min: 512 + max: 1920 + detail: + name: Detail + required: false + description: "Detail parameter, leave empty for 'auto'" + default: 'high' + selector: + select: + options: + - 'high' + - 'low' + max_tokens: + name: Maximum Tokens + description: 'Maximum number of tokens to generate' + required: true + example: 100 + default: 100 + selector: + number: + min: 1 + max: 300 + temperature: + name: Temperature + required: true + description: 'Randomness. Lower is more accurate, higher is more creative' + example: 0.2 + default: 0.2 + selector: + number: + min: 0.1 + max: 1.0 step: 0.1 \ No newline at end of file