From 06287f7bd73c91217f7ebbc2b4438fd79e2db33c Mon Sep 17 00:00:00 2001
From: valentinfrlch
Date: Fri, 16 Aug 2024 08:37:09 +0200
Subject: [PATCH 01/12] WIP: Custom OpenAI compatible provider
---
custom_components/llmvision/config_flow.py | 44 ++++++++++++++++++-
.../llmvision/request_handlers.py | 17 +++++--
2 files changed, 56 insertions(+), 5 deletions(-)
diff --git a/custom_components/llmvision/config_flow.py b/custom_components/llmvision/config_flow.py
index e41df68..9263e40 100644
--- a/custom_components/llmvision/config_flow.py
+++ b/custom_components/llmvision/config_flow.py
@@ -13,6 +13,8 @@
CONF_OLLAMA_IP_ADDRESS,
CONF_OLLAMA_PORT,
CONF_OLLAMA_HTTPS,
+ CONF_CUSTOM_OPENAI_API_KEY,
+ CONF_CUSTOM_OPENAI_ENDPOINT,
VERSION_ANTHROPIC,
)
import voluptuous as vol
@@ -131,6 +133,15 @@ async def google(self):
if not await self._validate_api_key(self.user_input[CONF_GOOGLE_API_KEY]):
_LOGGER.error("Could not connect to Google server.")
raise ServiceValidationError("handshake_failed")
+ async def custom_openai(self):
+ self._validate_provider()
+ protocol = self.user_input[CONF_CUSTOM_OPENAI_ENDPOINT].split("://")[0]
+ base_url = self.user_input[CONF_CUSTOM_OPENAI_ENDPOINT].split("://")[1]
+ port = self.user_input[CONF_CUSTOM_OPENAI_ENDPOINT].split(":")[2].split("/")[0]
+ if not await self._handshake(base_url=base_url, port=":"+port, protocol=protocol, endpoint="/v1/models"):
+ _LOGGER.error("Could not connect to Custom OpenAI server.")
+ raise ServiceValidationError("handshake_failed")
+
def get_configured_providers(self):
providers = []
@@ -149,6 +160,8 @@ def get_configured_providers(self):
providers.append("LocalAI")
if CONF_OLLAMA_IP_ADDRESS in self.hass.data[DOMAIN] and CONF_OLLAMA_PORT in self.hass.data[DOMAIN]:
providers.append("Ollama")
+ if CONF_CUSTOM_OPENAI_API_KEY in self.hass.data[DOMAIN] and CONF_CUSTOM_OPENAI_ENDPOINT in self.hass.data[DOMAIN]:
+ providers.append("Custom OpenAI")
return providers
@@ -167,6 +180,7 @@ async def handle_provider(self, provider, configured_providers):
"Google": self.async_step_google,
"Ollama": self.async_step_ollama,
"LocalAI": self.async_step_localai,
+ "Custom OpenAI": self.async_step_custom_openai
}
step_method = provider_steps.get(provider)
@@ -180,7 +194,7 @@ async def async_step_user(self, user_input=None):
data_schema = vol.Schema({
vol.Required("provider", default="OpenAI"): selector({
"select": {
- "options": ["OpenAI", "Anthropic", "Google", "Ollama", "LocalAI"],
+ "options": ["OpenAI", "Anthropic", "Google", "Ollama", "LocalAI", "Custom (OpenAI compatible)"],
"mode": "dropdown",
"sort": False,
"custom_value": False
@@ -339,3 +353,31 @@ async def async_step_google(self, user_input=None):
step_id="google",
data_schema=data_schema,
)
+
+ async def async_step_custom_openai(self, user_input=None):
+ data_schema = vol.Schema({
+ vol.Required(CONF_CUSTOM_OPENAI_ENDPOINT): str,
+ vol.Required(CONF_CUSTOM_OPENAI_API_KEY): str,
+ })
+
+ if user_input is not None:
+ # save provider to user_input
+ user_input["provider"] = self.init_info["provider"]
+ validator = Validator(self.hass, user_input)
+ try:
+ await validator.openai()
+ # add the mode to user_input
+ user_input["provider"] = self.init_info["provider"]
+ return self.async_create_entry(title="LLM Vision Custom OpenAI", data=user_input)
+ except ServiceValidationError as e:
+ _LOGGER.error(f"Validation failed: {e}")
+ return self.async_show_form(
+ step_id="custom_openai",
+ data_schema=data_schema,
+ errors={"base": "handshake_failed"}
+ )
+
+ return self.async_show_form(
+ step_id="custom_openai",
+ data_schema=data_schema,
+ )
\ No newline at end of file
diff --git a/custom_components/llmvision/request_handlers.py b/custom_components/llmvision/request_handlers.py
index bf14fdc..2b117c9 100644
--- a/custom_components/llmvision/request_handlers.py
+++ b/custom_components/llmvision/request_handlers.py
@@ -13,7 +13,10 @@
CONF_OLLAMA_IP_ADDRESS,
CONF_OLLAMA_PORT,
CONF_OLLAMA_HTTPS,
+ CONF_CUSTOM_OPENAI_ENDPOINT,
+ CONF_CUSTOM_OPENAI_API_KEY,
VERSION_ANTHROPIC,
+ ENDPOINT_OPENAI,
ERROR_OPENAI_NOT_CONFIGURED,
ERROR_ANTHROPIC_NOT_CONFIGURED,
ERROR_GOOGLE_NOT_CONFIGURED,
@@ -103,7 +106,14 @@ async def make_request(self, call):
ip_address=ip_address,
port=port,
https=https)
-
+ elif call.provider == 'Custom OpenAI':
+ api_key = self.hass.data.get(DOMAIN).get(CONF_CUSTOM_OPENAI_API_KEY)
+ endpoint = self.hass.data.get(DOMAIN).get(CONF_CUSTOM_OPENAI_ENDPOINT)
+ model = call.model
+ self._validate_call(provider=call.provider,
+ api_key=api_key,
+ base64_images=self.base64_images)
+ response_text = await self.openai(model=model, api_key=api_key, endpoint=endpoint)
return {"response_text": response_text}
def add_frame(self, base64_image, filename):
@@ -111,8 +121,7 @@ def add_frame(self, base64_image, filename):
self.filenames.append(filename)
# Request Handlers
- async def openai(self, model, api_key):
- from .const import ENDPOINT_OPENAI
+ async def openai(self, model, api_key, endpoint=ENDPOINT_OPENAI):
# Set headers and payload
headers = {'Content-type': 'application/json',
'Authorization': 'Bearer ' + api_key}
@@ -138,7 +147,7 @@ async def openai(self, model, api_key):
)
response = await self._post(
- url=ENDPOINT_OPENAI, headers=headers, data=data)
+ url=endpoint, headers=headers, data=data)
response_text = response.get(
"choices")[0].get("message").get("content")
From 7a56b23abbfc9891fd5dc137ed8122a3740b2868 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Fr=C3=B6hlich?=
<85313672+valentinfrlch@users.noreply.github.com>
Date: Mon, 19 Aug 2024 17:54:27 +0200
Subject: [PATCH 02/12] Update README.md
---
README.md | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/README.md b/README.md
index b0f8ebd..26dd660 100644
--- a/README.md
+++ b/README.md
@@ -70,10 +70,11 @@ logger:
1. **New Provider**: NVIDIA ChatRTX
2. **New Provider**: Custom (OpenAI API compatible) Providers
-3. **HACS**: Include in HACS default
-4. [x] ~~**Feature**: HTTPS support for LocalAI and Ollama~~
-5. [x] ~~**Feature**: Support for video files~~
-6. [x] ~~**Feature**: Analyze Frigate Recordings using frigate's `event_id`~~
+3. **Animation Support**: Support for animated GIFs
+4. **HACS**: Include in HACS default
+5. [x] ~~**Feature**: HTTPS support for LocalAI and Ollama~~
+6. [x] ~~**Feature**: Support for video files~~
+7. [x] ~~**Feature**: Analyze Frigate Recordings using frigate's `event_id`~~
## How to report a bug or request a feature
From 743e818aabdfe3e13b1ab07cb34445396a175574 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Fr=C3=B6hlich?=
<85313672+valentinfrlch@users.noreply.github.com>
Date: Mon, 19 Aug 2024 19:19:02 +0200
Subject: [PATCH 03/12] Update README.md
---
README.md | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/README.md b/README.md
index 26dd660..f86d214 100644
--- a/README.md
+++ b/README.md
@@ -89,3 +89,8 @@ logger:
>
>[KBD]: https://github.com/valentinfrlch/ha-llmvision/issues/new/choose
+
+## Support
+You can support this project by starring this GitHub repository. If you want you can also buy me a coffee here:
+
+
From 60ebb97d41b6a6284d6eab38831c8cad7d651f5c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Fr=C3=B6hlich?=
<85313672+valentinfrlch@users.noreply.github.com>
Date: Mon, 19 Aug 2024 19:19:19 +0200
Subject: [PATCH 04/12] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index f86d214..886d185 100644
--- a/README.md
+++ b/README.md
@@ -91,6 +91,6 @@ logger:
## Support
-You can support this project by starring this GitHub repository. If you want you can also buy me a coffee here:
+You can support this project by starring this GitHub repository. If you want, you can also buy me a coffee here:
From 497fc64a33994b8595d2b6d89a2694fa6fea91cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Fr=C3=B6hlich?=
<85313672+valentinfrlch@users.noreply.github.com>
Date: Mon, 19 Aug 2024 21:44:49 +0200
Subject: [PATCH 05/12] Update manifest.json to 1.0.4
---
custom_components/llmvision/manifest.json | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/custom_components/llmvision/manifest.json b/custom_components/llmvision/manifest.json
index de58461..28f11a1 100644
--- a/custom_components/llmvision/manifest.json
+++ b/custom_components/llmvision/manifest.json
@@ -6,5 +6,5 @@
"documentation": "https://github.com/valentinfrlch/ha-llmvision",
"iot_class": "cloud_polling",
"issue_tracker": "https://github.com/valentinfrlch/ha-llmvision/issues",
- "version": "1.0.3"
-}
\ No newline at end of file
+ "version": "1.0.4"
+}
From 3129832bb9aa9693e1174f3d27596bfa7c441020 Mon Sep 17 00:00:00 2001
From: valentinfrlch
Date: Mon, 19 Aug 2024 22:11:16 +0200
Subject: [PATCH 06/12] Support for custom openAI API compatible providers
---
custom_components/llmvision/__init__.py | 8 +++
custom_components/llmvision/config_flow.py | 52 ++++++++++++++-----
custom_components/llmvision/manifest.json | 2 +-
.../llmvision/request_handlers.py | 12 ++++-
custom_components/llmvision/services.yaml | 1 +
custom_components/llmvision/strings.json | 8 +++
.../llmvision/translations/en.json | 8 +++
7 files changed, 75 insertions(+), 16 deletions(-)
diff --git a/custom_components/llmvision/__init__.py b/custom_components/llmvision/__init__.py
index bf806dc..e81bfc5 100644
--- a/custom_components/llmvision/__init__.py
+++ b/custom_components/llmvision/__init__.py
@@ -10,6 +10,8 @@
CONF_OLLAMA_IP_ADDRESS,
CONF_OLLAMA_PORT,
CONF_OLLAMA_HTTPS,
+ CONF_CUSTOM_OPENAI_ENDPOINT,
+ CONF_CUSTOM_OPENAI_API_KEY,
MODEL,
PROVIDER,
MAXTOKENS,
@@ -45,6 +47,8 @@ async def async_setup_entry(hass, entry):
ollama_ip_address = entry.data.get(CONF_OLLAMA_IP_ADDRESS)
ollama_port = entry.data.get(CONF_OLLAMA_PORT)
ollama_https = entry.data.get(CONF_OLLAMA_HTTPS)
+ custom_openai_endpoint = entry.data.get(CONF_CUSTOM_OPENAI_ENDPOINT)
+ custom_openai_api_key = entry.data.get(CONF_CUSTOM_OPENAI_API_KEY)
# Ensure DOMAIN exists in hass.data
if DOMAIN not in hass.data:
@@ -63,6 +67,8 @@ async def async_setup_entry(hass, entry):
CONF_OLLAMA_IP_ADDRESS: ollama_ip_address,
CONF_OLLAMA_PORT: ollama_port,
CONF_OLLAMA_HTTPS: ollama_https,
+ CONF_CUSTOM_OPENAI_ENDPOINT: custom_openai_endpoint,
+ CONF_CUSTOM_OPENAI_API_KEY: custom_openai_api_key
}.items()
if value is not None
})
@@ -104,6 +110,8 @@ def _default_model(self, provider):
return "gpt-4-vision-preview"
elif provider == "Ollama":
return "llava"
+ elif provider == "Custom OpenAI":
+ return "gpt-4o-mini"
def setup(hass, config):
diff --git a/custom_components/llmvision/config_flow.py b/custom_components/llmvision/config_flow.py
index 9263e40..f69a9fd 100644
--- a/custom_components/llmvision/config_flow.py
+++ b/custom_components/llmvision/config_flow.py
@@ -122,6 +122,39 @@ async def openai(self):
_LOGGER.error("Could not connect to OpenAI server.")
raise ServiceValidationError("handshake_failed")
+ async def custom_openai(self):
+ self._validate_provider()
+ _LOGGER.debug(f"Splits: {len(self.user_input[CONF_CUSTOM_OPENAI_ENDPOINT].split(":"))}")
+ # URL with port
+ try:
+ if len(self.user_input[CONF_CUSTOM_OPENAI_ENDPOINT].split(":")) > 2:
+ protocol = self.user_input[CONF_CUSTOM_OPENAI_ENDPOINT].split(
+ "://")[0]
+ base_url = self.user_input[CONF_CUSTOM_OPENAI_ENDPOINT].split(
+ "://")[1].split("/")[0]
+ port = ":" + self.user_input[CONF_CUSTOM_OPENAI_ENDPOINT].split(":")[
+ 1].split("/")[0]
+ # URL without port
+ else:
+ protocol = self.user_input[CONF_CUSTOM_OPENAI_ENDPOINT].split(
+ "://")[0]
+ base_url = self.user_input[CONF_CUSTOM_OPENAI_ENDPOINT].split(
+ "://")[1].split("/")[0]
+ port = ""
+ endpoint = "/v1/models"
+ header = {'Content-type': 'application/json',
+ 'Authorization': 'Bearer ' + self.user_input[CONF_CUSTOM_OPENAI_API_KEY]}
+ except Exception as e:
+ _LOGGER.error(f"Could not parse endpoint: {e}")
+ raise ServiceValidationError("endpoint_parse_failed")
+
+ _LOGGER.debug(
+ f"Connecting to: [protocol: {protocol}, base_url: {base_url}, port: {port}, endpoint: {endpoint}]")
+
+ if not await self._handshake(base_url=base_url, port=port, protocol=protocol, endpoint=endpoint, header=header):
+ _LOGGER.error("Could not connect to Custom OpenAI server.")
+ raise ServiceValidationError("handshake_failed")
+
async def anthropic(self):
self._validate_provider()
if not await self._validate_api_key(self.user_input[CONF_ANTHROPIC_API_KEY]):
@@ -133,15 +166,6 @@ async def google(self):
if not await self._validate_api_key(self.user_input[CONF_GOOGLE_API_KEY]):
_LOGGER.error("Could not connect to Google server.")
raise ServiceValidationError("handshake_failed")
- async def custom_openai(self):
- self._validate_provider()
- protocol = self.user_input[CONF_CUSTOM_OPENAI_ENDPOINT].split("://")[0]
- base_url = self.user_input[CONF_CUSTOM_OPENAI_ENDPOINT].split("://")[1]
- port = self.user_input[CONF_CUSTOM_OPENAI_ENDPOINT].split(":")[2].split("/")[0]
- if not await self._handshake(base_url=base_url, port=":"+port, protocol=protocol, endpoint="/v1/models"):
- _LOGGER.error("Could not connect to Custom OpenAI server.")
- raise ServiceValidationError("handshake_failed")
-
def get_configured_providers(self):
providers = []
@@ -160,7 +184,7 @@ def get_configured_providers(self):
providers.append("LocalAI")
if CONF_OLLAMA_IP_ADDRESS in self.hass.data[DOMAIN] and CONF_OLLAMA_PORT in self.hass.data[DOMAIN]:
providers.append("Ollama")
- if CONF_CUSTOM_OPENAI_API_KEY in self.hass.data[DOMAIN] and CONF_CUSTOM_OPENAI_ENDPOINT in self.hass.data[DOMAIN]:
+ if CONF_CUSTOM_OPENAI_ENDPOINT in self.hass.data[DOMAIN]:
providers.append("Custom OpenAI")
return providers
@@ -180,7 +204,7 @@ async def handle_provider(self, provider, configured_providers):
"Google": self.async_step_google,
"Ollama": self.async_step_ollama,
"LocalAI": self.async_step_localai,
- "Custom OpenAI": self.async_step_custom_openai
+ "Custom OpenAI": self.async_step_custom_openai,
}
step_method = provider_steps.get(provider)
@@ -194,7 +218,7 @@ async def async_step_user(self, user_input=None):
data_schema = vol.Schema({
vol.Required("provider", default="OpenAI"): selector({
"select": {
- "options": ["OpenAI", "Anthropic", "Google", "Ollama", "LocalAI", "Custom (OpenAI compatible)"],
+ "options": ["OpenAI", "Anthropic", "Google", "Ollama", "LocalAI", "Custom OpenAI"],
"mode": "dropdown",
"sort": False,
"custom_value": False
@@ -357,7 +381,7 @@ async def async_step_google(self, user_input=None):
async def async_step_custom_openai(self, user_input=None):
data_schema = vol.Schema({
vol.Required(CONF_CUSTOM_OPENAI_ENDPOINT): str,
- vol.Required(CONF_CUSTOM_OPENAI_API_KEY): str,
+ vol.Optional(CONF_CUSTOM_OPENAI_API_KEY): str,
})
if user_input is not None:
@@ -365,7 +389,7 @@ async def async_step_custom_openai(self, user_input=None):
user_input["provider"] = self.init_info["provider"]
validator = Validator(self.hass, user_input)
try:
- await validator.openai()
+ await validator.custom_openai()
# add the mode to user_input
user_input["provider"] = self.init_info["provider"]
return self.async_create_entry(title="LLM Vision Custom OpenAI", data=user_input)
diff --git a/custom_components/llmvision/manifest.json b/custom_components/llmvision/manifest.json
index de58461..acc2cfc 100644
--- a/custom_components/llmvision/manifest.json
+++ b/custom_components/llmvision/manifest.json
@@ -6,5 +6,5 @@
"documentation": "https://github.com/valentinfrlch/ha-llmvision",
"iot_class": "cloud_polling",
"issue_tracker": "https://github.com/valentinfrlch/ha-llmvision/issues",
- "version": "1.0.3"
+ "version": "1.0.5"
}
\ No newline at end of file
diff --git a/custom_components/llmvision/request_handlers.py b/custom_components/llmvision/request_handlers.py
index 2b117c9..5af79db 100644
--- a/custom_components/llmvision/request_handlers.py
+++ b/custom_components/llmvision/request_handlers.py
@@ -107,8 +107,15 @@ async def make_request(self, call):
port=port,
https=https)
elif call.provider == 'Custom OpenAI':
- api_key = self.hass.data.get(DOMAIN).get(CONF_CUSTOM_OPENAI_API_KEY)
+ api_key = self.hass.data.get(DOMAIN).get(
+ CONF_CUSTOM_OPENAI_API_KEY, "")
endpoint = self.hass.data.get(DOMAIN).get(CONF_CUSTOM_OPENAI_ENDPOINT)
+
+ # Additional debug logging
+ _LOGGER.debug(f"Data from DOMAIN: {self.hass.data.get(DOMAIN)}")
+ _LOGGER.debug(f"API Key: {api_key}")
+ _LOGGER.debug(f"Endpoint: {endpoint}")
+
model = call.model
self._validate_call(provider=call.provider,
api_key=api_key,
@@ -310,6 +317,9 @@ async def ollama(self, model, ip_address, port, https):
async def _post(self, url, headers, data):
"""Post data to url and return response data"""
_LOGGER.info(f"Request data: {sanitize_data(data)}")
+ _LOGGER.debug(
+ f"URL type: {type(url)}, Headers type: {type(headers)}, Data type: {type(data)}")
+
try:
response = await self.session.post(url, headers=headers, json=data)
except Exception as e:
diff --git a/custom_components/llmvision/services.yaml b/custom_components/llmvision/services.yaml
index c9855f6..a2d09e5 100644
--- a/custom_components/llmvision/services.yaml
+++ b/custom_components/llmvision/services.yaml
@@ -15,6 +15,7 @@ image_analyzer:
- 'Google'
- 'Ollama'
- 'LocalAI'
+ - 'Custom OpenAI'
model:
name: Model
required: false
diff --git a/custom_components/llmvision/strings.json b/custom_components/llmvision/strings.json
index b0bec17..5cf595b 100644
--- a/custom_components/llmvision/strings.json
+++ b/custom_components/llmvision/strings.json
@@ -43,6 +43,14 @@
"data": {
"google_api_key": "Your API key"
}
+ },
+ "custom_openai": {
+ "title": "Configure Custom OpenAI provider",
+ "description": "Important: Only works if the API is compatible with OpenAI's API. If the API doesn't require an API key, leave it empty.",
+ "data": {
+ "custom_openai_endpoint": "Custom Endpoint",
+ "custom_openai_api_key": "Your API key"
+ }
}
},
"error": {
diff --git a/custom_components/llmvision/translations/en.json b/custom_components/llmvision/translations/en.json
index b0bec17..5cf595b 100644
--- a/custom_components/llmvision/translations/en.json
+++ b/custom_components/llmvision/translations/en.json
@@ -43,6 +43,14 @@
"data": {
"google_api_key": "Your API key"
}
+ },
+ "custom_openai": {
+ "title": "Configure Custom OpenAI provider",
+ "description": "Important: Only works if the API is compatible with OpenAI's API. If the API doesn't require an API key, leave it empty.",
+ "data": {
+ "custom_openai_endpoint": "Custom Endpoint",
+ "custom_openai_api_key": "Your API key"
+ }
}
},
"error": {
From 9ccd5412032093c398a9a8aab301ac32f92e5e47 Mon Sep 17 00:00:00 2001
From: valentinfrlch
Date: Tue, 20 Aug 2024 09:00:24 +0200
Subject: [PATCH 07/12] Multiple Videos Remembered/Sent with Video Analyzer #56
Fixed cleanup process
---
custom_components/llmvision/media_handlers.py | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/custom_components/llmvision/media_handlers.py b/custom_components/llmvision/media_handlers.py
index 02c3099..cc804d2 100644
--- a/custom_components/llmvision/media_handlers.py
+++ b/custom_components/llmvision/media_handlers.py
@@ -127,8 +127,8 @@ async def add_images(self, image_entities, image_paths, target_width, include_fi
return self.client
async def add_videos(self, video_paths, event_ids, interval, target_width, include_filename):
- tmp_clips_dir = f"config/custom_components/{DOMAIN}/tmp_clips"
- tmp_frames_dir = f"config/custom_components/{DOMAIN}/tmp_frames"
+ tmp_clips_dir = f"/config/custom_components/{DOMAIN}/tmp_clips"
+ tmp_frames_dir = f"/config/custom_components/{DOMAIN}/tmp_frames"
if not video_paths:
video_paths = []
"""Wrapper for client.add_frame for videos"""
@@ -192,7 +192,14 @@ async def add_videos(self, video_paths, event_ids, interval, target_width, inclu
# Clean up tmp dirs
try:
await self.hass.loop.run_in_executor(None, shutil.rmtree, tmp_clips_dir)
+ _LOGGER.info(
+ f"Deleted tmp folder: {tmp_clips_dir}")
+ except FileNotFoundError as e:
+ _LOGGER.error(f"Failed to delete tmp folder: {e}")
+ try:
await self.hass.loop.run_in_executor(None, shutil.rmtree, tmp_frames_dir)
+ _LOGGER.info(
+ f"Deleted tmp folder: {tmp_frames_dir}")
except FileNotFoundError as e:
- pass
+ _LOGGER.error(f"Failed to delete tmp folders: {e}")
return self.client
\ No newline at end of file
From 57736f6769e2dff31f8f6d7fd8703eab9b8e7cdc Mon Sep 17 00:00:00 2001
From: valentinfrlch
Date: Tue, 20 Aug 2024 09:11:46 +0200
Subject: [PATCH 08/12] Gif Image Entity Input Support #54 Correctly convert
all input to jpg when input is GIF
---
custom_components/llmvision/media_handlers.py | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/custom_components/llmvision/media_handlers.py b/custom_components/llmvision/media_handlers.py
index cc804d2..9cb8bc3 100644
--- a/custom_components/llmvision/media_handlers.py
+++ b/custom_components/llmvision/media_handlers.py
@@ -36,6 +36,11 @@ async def resize_image(self, target_width, image_path=None, image_data=None, img
# Open the image file
img = await self.hass.loop.run_in_executor(None, Image.open, image_path)
with img:
+ # Check if the image is a GIF and convert if necessary
+ _LOGGER.debug(f"Image format: {img.format}")
+ if img.format == 'GIF':
+ # Convert GIF to RGB
+ img = img.convert('RGB')
# calculate new height based on aspect ratio
width, height = img.size
aspect_ratio = width / height
@@ -45,7 +50,7 @@ async def resize_image(self, target_width, image_path=None, image_data=None, img
if width > target_width or height > target_height:
img = img.resize((target_width, target_height))
- # Convert the image to base64
+ # Encode the image to base64
base64_image = await self._encode_image(img)
elif image_data:
@@ -54,6 +59,10 @@ async def resize_image(self, target_width, image_path=None, image_data=None, img
img_byte_arr.write(image_data)
img = await self.hass.loop.run_in_executor(None, Image.open, img_byte_arr)
with img:
+ _LOGGER.debug(f"Image format: {img.format}")
+ if img.format == 'GIF':
+ # Convert GIF to RGB
+ img = img.convert('RGB')
# calculate new height based on aspect ratio
width, height = img.size
aspect_ratio = width / height
From ea0a4223b11c1efc7d0add39a6e845ce692942cd Mon Sep 17 00:00:00 2001
From: valentinfrlch
Date: Tue, 20 Aug 2024 09:25:10 +0200
Subject: [PATCH 09/12] Updated version number
---
README.md | 2 +-
custom_components/llmvision/manifest.json | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index b0f8ebd..0b666d1 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
-
+
diff --git a/custom_components/llmvision/manifest.json b/custom_components/llmvision/manifest.json
index acc2cfc..220c447 100644
--- a/custom_components/llmvision/manifest.json
+++ b/custom_components/llmvision/manifest.json
@@ -6,5 +6,5 @@
"documentation": "https://github.com/valentinfrlch/ha-llmvision",
"iot_class": "cloud_polling",
"issue_tracker": "https://github.com/valentinfrlch/ha-llmvision/issues",
- "version": "1.0.5"
+ "version": "1.1.0"
}
\ No newline at end of file
From 214cc85c39b84d95068a2dc78d13c6560157bb69 Mon Sep 17 00:00:00 2001
From: valentinfrlch
Date: Tue, 20 Aug 2024 09:45:04 +0200
Subject: [PATCH 10/12] Throw exception when Custom OpenAI API requested but
not configured, added strings.json translations
---
.../llmvision/request_handlers.py | 4 ++
custom_components/llmvision/strings.json | 2 +-
.../llmvision/translations/de.json | 44 +++++++++------
.../llmvision/translations/en.json | 2 +-
.../llmvision/translations/fr.json | 55 -------------------
5 files changed, 32 insertions(+), 75 deletions(-)
delete mode 100644 custom_components/llmvision/translations/fr.json
diff --git a/custom_components/llmvision/request_handlers.py b/custom_components/llmvision/request_handlers.py
index 5af79db..fccc486 100644
--- a/custom_components/llmvision/request_handlers.py
+++ b/custom_components/llmvision/request_handlers.py
@@ -22,6 +22,7 @@
ERROR_GOOGLE_NOT_CONFIGURED,
ERROR_LOCALAI_NOT_CONFIGURED,
ERROR_OLLAMA_NOT_CONFIGURED,
+ ERROR_CUSTOM_OPENAI_NOT_CONFIGURED,
ERROR_NO_IMAGE_INPUT
)
@@ -371,6 +372,9 @@ def _validate_call(self, provider, api_key, base64_images, ip_address=None, port
elif provider == 'Ollama':
if not ip_address or not port:
raise ServiceValidationError(ERROR_OLLAMA_NOT_CONFIGURED)
+ elif provider == 'Custom OpenAI':
+ if not api_key:
+ raise ServiceValidationError(ERROR_CUSTOM_OPENAI_NOT_CONFIGURED)
# Check media input
if base64_images == []:
raise ServiceValidationError(ERROR_NO_IMAGE_INPUT)
diff --git a/custom_components/llmvision/strings.json b/custom_components/llmvision/strings.json
index 5cf595b..d4500af 100644
--- a/custom_components/llmvision/strings.json
+++ b/custom_components/llmvision/strings.json
@@ -46,7 +46,7 @@
},
"custom_openai": {
"title": "Configure Custom OpenAI provider",
- "description": "Important: Only works if the API is compatible with OpenAI's API. If the API doesn't require an API key, leave it empty.",
+ "description": "**Important**: Only works if the API is compatible with OpenAI's API. If the API doesn't require an API key, leave it empty. The endpoint should have the following format: `http(s)://baseURL(:port)/some/endpoint`",
"data": {
"custom_openai_endpoint": "Custom Endpoint",
"custom_openai_api_key": "Your API key"
diff --git a/custom_components/llmvision/translations/de.json b/custom_components/llmvision/translations/de.json
index 0533116..ed49ed4 100644
--- a/custom_components/llmvision/translations/de.json
+++ b/custom_components/llmvision/translations/de.json
@@ -2,53 +2,61 @@
"config": {
"step": {
"user": {
- "title": "Wählen Sie Ihren Provider",
- "description": "Wählen Sie den Anbieter den verwenden möchten."
+ "title": "Anbieter auswählen",
+ "description": "Wähle den Anbieter den du konfigurieren möchtest."
},
"localai": {
- "title": "Verbinden Sie sich mit Ihrem LocalAI-Server",
- "description": "Geben Sie die IP-Adresse und den Port Ihres LocalAI-Servers an.",
+ "title": "Mit LocalAI-Server verbinden",
+ "description": "Gib die IP-Adresse und den Port deines LocalAI-Servers an.",
"data": {
"localai_ip": "IP-Adresse",
"localai_port": "Port"
}
},
"ollama": {
- "title": "Verbinden Sie sich mit Ihrem Ollama-Server",
- "description": "Geben Sie die IP-Adresse und den Port Ihres Ollama-Servers an.",
+ "title": "Mit Ollama-Server verbinden",
+ "description": "Gib die IP-Adresse und den Port deines Ollama-Servers an.",
"data": {
"localai_ip": "IP-Addresse",
"localai_port": "Port"
}
},
"openai": {
- "title": "Fügen Sie den OpenAI-API-Schlüssel hinzu",
- "description": "Geben Sie einen gültigen OpenAI-API-Schlüssel an.",
+ "title": "OpenAI",
+ "description": "Gib einen gültigen OpenAI API-key ein.",
"data": {
- "api_key": "Ihr API-Schlüssel"
+ "api_key": "Dein API-key"
}
},
"anthropic": {
- "title": "Fügen Sie den Anthropic-API-Schlüssel hinzu",
- "description": "Geben Sie einen gültigen Anthropic-API-Schlüssel an.",
+ "title": "Anthropic",
+ "description": "Gib einen gültigen Anthropic API-key ein.",
"data": {
- "api_key": "Ihr API-Schlüssel"
+ "api_key": "Dein API-key"
}
},
"google": {
- "title": "Fügen Sie den Google Gemini-API-Schlüssel hinzu",
- "description": "Geben Sie einen gültigen Google Gemini-API-Schlüssel an.",
+ "title": "Google Gemini",
+ "description": "Gib einen gültigen Google Gemini API-key ein.",
"data": {
- "api_key": "Ihr API-Schlüssel"
+ "api_key": "Dein API-key"
+ }
+ },
+ "custom_openai": {
+ "title": "OpenAI-kompatiblen Provider konfigurieren",
+ "description": "**Wichtig**: Funktioniert nur mit OpenAI API kompatiblen APIs. 'Custom Endpoint' muss das folgende Format haben: `http(s)://baseURL(:port)/some/endpoint`",
+ "data": {
+ "custom_openai_endpoint": "Custom Endpoint",
+ "custom_openai_api_key": "Dein API-key"
}
}
},
"error": {
- "handshake_failed": "Verbindung zum Server konnte nicht hergestellt werden. Überprüfen Sie Ihren API-Schlüssel oder die IP und den Port",
- "empty_api_key": "Ungültiger API-Key"
+ "handshake_failed": "Verbindung zum Server konnte nicht hergestellt werden. Überprüfe deinen API-key oder IP und Port",
+ "empty_api_key": "Ungültiger API-key"
},
"abort": {
- "already_configured": "Anbieter ist bereits konfiguriert. Löschen Sie die vorhandene Konfiguration, um eine neue hinzuzufügen.",
+ "already_configured": "Anbieter ist bereits konfiguriert. Lösche die vorhandene Konfiguration, um eine neue hinzuzufügen.",
"unknown_provider": "Unbekannter Anbieter"
}
}
diff --git a/custom_components/llmvision/translations/en.json b/custom_components/llmvision/translations/en.json
index 5cf595b..d4500af 100644
--- a/custom_components/llmvision/translations/en.json
+++ b/custom_components/llmvision/translations/en.json
@@ -46,7 +46,7 @@
},
"custom_openai": {
"title": "Configure Custom OpenAI provider",
- "description": "Important: Only works if the API is compatible with OpenAI's API. If the API doesn't require an API key, leave it empty.",
+ "description": "**Important**: Only works if the API is compatible with OpenAI's API. If the API doesn't require an API key, leave it empty. The endpoint should have the following format: `http(s)://baseURL(:port)/some/endpoint`",
"data": {
"custom_openai_endpoint": "Custom Endpoint",
"custom_openai_api_key": "Your API key"
diff --git a/custom_components/llmvision/translations/fr.json b/custom_components/llmvision/translations/fr.json
deleted file mode 100644
index 274a5f1..0000000
--- a/custom_components/llmvision/translations/fr.json
+++ /dev/null
@@ -1,55 +0,0 @@
-{
- "config": {
- "step": {
- "user": {
- "title": "Choisissez votre fournisseur",
- "description": "Sélectionnez le fournisseur que vous souhaitez utiliser pour votre IA."
- },
- "localai": {
- "title": "Connectez-vous à votre serveur LocalAI",
- "description": "Fournissez l'adresse IP et le port de votre serveur LocalAI.",
- "data": {
- "localai_ip": "Adresse IP",
- "localai_port": "Port"
- }
- },
- "ollama": {
- "title": "Connectez-vous à votre serveur Ollama",
- "description": "Fournissez l'adresse IP et le port de votre serveur Ollama.",
- "data": {
- "localai_ip": "Adresse IP",
- "localai_port": "Port"
- }
- },
- "openai": {
- "title": "Ajoutez la clé API OpenAI",
- "description": "Fournissez une clé API OpenAI valide.",
- "data": {
- "api_key": "Votre clé API"
- }
- },
- "anthropic": {
- "title": "Ajoutez la clé API OpenAI",
- "description": "Fournissez une clé API Anthropic valide.",
- "data": {
- "api_key": "Votre clé API"
- }
- },
- "google": {
- "title": "Ajoutez la clé API Google Gemini",
- "description": "Fournissez une clé API Google Gemini valide.",
- "data": {
- "api_key": "Votre clé API"
- }
- }
- },
- "error": {
- "handshake_failed": "Impossible de se connecter au serveur. Vérifiez votre clé API ou l'adresse IP et le port.",
- "empty_api_key": "clé API invalide"
- },
- "abort": {
- "already_configured": "Le fournisseur est déjà configuré. Supprimez la configuration existante pour en ajouter une nouvelle.",
- "unknown_provider": "Fournisseur inconnu"
- }
- }
-}
\ No newline at end of file
From e44867db842c6577c4dea54f6a5256418f0252f1 Mon Sep 17 00:00:00 2001
From: valentinfrlch
Date: Tue, 20 Aug 2024 11:09:55 +0200
Subject: [PATCH 11/12] updated README
---
README.md | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/README.md b/README.md
index 0b666d1..625c6fe 100644
--- a/README.md
+++ b/README.md
@@ -31,20 +31,19 @@
**LLM Vision** is a Home Assistant integration to analyze images, videos and camera feeds using the vision capabilities of multimodal LLMs.
-Supported providers are OpenAI, Anthropic, Google Gemini, [LocalAI](https://github.com/mudler/LocalAI) and [Ollama](https://ollama.com/).
+Supported providers are OpenAI, Anthropic, Google Gemini, [LocalAI](https://github.com/mudler/LocalAI), [Ollama](https://ollama.com/) and any OpenAI compatible API.
## Features
-- Compatible with OpenAI, Anthropic Claude, Google Gemini, [LocalAI](https://github.com/mudler/LocalAI) and [Ollama](https://ollama.com/)
+- Compatible with OpenAI, Anthropic Claude, Google Gemini, [LocalAI](https://github.com/mudler/LocalAI), [Ollama](https://ollama.com/) and custom OpenAI compatible APIs
- Takes images and video from camera entities as input
- Takes local image and video files as input
- Images can be downscaled for faster processing
## Resources
-Check the docs for detailed instructions on how to set up LLM Vision and each of the supported providers as well as usage examples and service call parameters:
+Check the docs for detailed instructions on how to set up LLM Vision and each of the supported providers, get inspiration from examples or join the discussion on the Home Assistant Community.
-
+
-Check [📖 Examples](https://llm-vision.gitbook.io/examples/) on how you can integrate llmvision into your Home Assistant setup or join the [🗨️ discussion](https://community.home-assistant.io/t/gpt-4o-vision-capabilities-in-home-assistant/729241) on the Home Assistant Community.
## Installation
[![Open a repository inside the Home Assistant Community Store.](https://my.home-assistant.io/badges/hacs_repository.svg)](https://my.home-assistant.io/redirect/hacs_repository/?owner=valentinfrlch&repository=ha-llmvision&category=Integration)
@@ -69,11 +68,11 @@ logger:
> These are planned features and ideas. They are subject to change and may not be implemented in the order listed or at all.
1. **New Provider**: NVIDIA ChatRTX
-2. **New Provider**: Custom (OpenAI API compatible) Providers
3. **HACS**: Include in HACS default
-4. [x] ~~**Feature**: HTTPS support for LocalAI and Ollama~~
-5. [x] ~~**Feature**: Support for video files~~
-6. [x] ~~**Feature**: Analyze Frigate Recordings using frigate's `event_id`~~
+4. [x] ~~**New Provider**: Custom (OpenAI API compatible) Providers~~
+5. [x] ~~**Feature**: HTTPS support for LocalAI and Ollama~~
+6. [x] ~~**Feature**: Support for video files~~
+7. [x] ~~**Feature**: Analyze Frigate Recordings using frigate's `event_id`~~
## How to report a bug or request a feature
From da9467b66db1e24874d5700a06d19bc6976c6b12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Fr=C3=B6hlich?=
<85313672+valentinfrlch@users.noreply.github.com>
Date: Thu, 22 Aug 2024 20:08:03 +0200
Subject: [PATCH 12/12] Update link for community button
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 6592b31..4e8f137 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@ Supported providers are OpenAI, Anthropic, Google Gemini, [LocalAI](https://gith
## Resources
Check the docs for detailed instructions on how to set up LLM Vision and each of the supported providers, get inspiration from examples or join the discussion on the Home Assistant Community.
-
+
## Installation