(WIP): Rewrote providers from scratch, added support for Azure, rever…

…ted back to http requests, more robust validation; #64, #92
valentinfrlch · Dec 2, 2024 · 0610466 · 0610466
1 parent b532660
commit 0610466
Show file tree

Hide file tree

Showing 9 changed files with 631 additions and 619 deletions.
diff --git a/benchmark_visualization/benchmark_data.csv b/benchmark_visualization/benchmark_data.csv
@@ -1,4 +1,6 @@
 Model,Size,Date,Overall,Art & Design,Business,Science,Health & Medicine,Human. & Social Sci.,Tech & Eng.,Cost
+Llama 3.2 90B, 90B, 2024-09-25, 60.3, -, -, -, -, -, -, 0.00000001
+Llama 3.2 11B, 11B, 2024-09-25, 50.7, -, -, -, -, -, -, 0.00000001
 GPT-4o,-,2024-05-27,69.1,-,-,-,-,-,-,5
 GPT-4o mini,-,2024-05-27,59.4,-,-,-,-,-,-,0.15
 Gemini 1.5 Pro,-,2024-05-31,65.8,-,-,-,-,-,-,3.5
@@ -33,9 +35,10 @@ Yi-VL-6B*,6B,2024-01-23,39.1,52.5,30.7,31.3,38,53.3,35.7,
 InternVL-Chat-V1.1*,-,2024-01-27,39.1,56.7,34.7,31.3,39.3,57.5,27.1,
 Bunny-3B*,3B,2024-02-13,38.2,49.2,30.7,30.7,40.7,45,37.1,
 SVIT*,-,2023-12-26,38,52.5,27.3,28,42,51.7,33.8,
-MiniCPM-V*,-,2024-02-07,37.2,55.8,33.3,28,32.7,58.3,27.1,
-MiniCPM-V-2*,-,2024-04-16,37.1,63.3,28.7,30,30,56.7,27.1,
-LLaVA-1.5-13B,13B,2023-11-27,36.4,51.7,22.7,29.3,38.7,53.3,31.4,
+MiniCPM-V*,-,2024-02-07,37.2,55.8,33.3,28,32.7,58.3,27.1,0.00000001
+MiniCPM-V-2*,-,2024-04-16,37.1,63.3,28.7,30,30,56.7,27.1,0.00000001
+MiniCPM-V-2.6,-,2024-04-16,49.8,63.3,28.7,30,30,56.7,27.1,0.00000001
+LLaVA-1.5-13B,13B,2023-11-27,36.4,51.7,22.7,29.3,38.7,53.3,31.4,0.00000001
 Emu2-Chat*,-,2023-12-24,36.3,55,30,28.7,28.7,46.7,35.2,
 Qwen-VL-7B-Chat,-,2023-11-27,35.9,51.7,29.3,29.3,33.3,45,32.9,
 InstructBLIP-T5-XXL,-,2023-11-27,35.7,44.2,24,30.7,35.3,49.2,35.2,

diff --git a/benchmark_visualization/model_benchmark_visualizer.py b/benchmark_visualization/model_benchmark_visualizer.py
@@ -24,6 +24,8 @@ def category_name(model_name):
         return 'Anthropic Claude 3'
     elif "Gemini 1.5" in model_name:
         return 'Google Gemini 1.5'
+    elif "Llama 3.2" in model_name:
+        return 'Meta Llama 3.2'
     return 'Other'
 
 
@@ -47,7 +49,7 @@ def categorize_model(model_name):
 
     # Set order for legend
     category_order = ['GPT-4', 'Claude 3',
-                      'Claude 3.5', 'Gemini 1.5']  # Add 'Gemini 1.5'
+                      'Claude 3.5', 'Gemini 1.5']
     df['Category'] = pd.Categorical(
         df['Category'], categories=category_order, ordered=True)
     df = df.sort_values('Category')
@@ -63,16 +65,20 @@ def categorize_model(model_name):
         x = group_df['Cost'].astype(float)
         y = group_df['Overall'].astype(float)
 
-        # Fit a polynomial
-        z = np.polyfit(x, y, 2)
-        p = np.poly1d(z)
-
-        x_poly = np.linspace(x.min(), x.max(), 100)
-        y_poly = p(x_poly)
-
-        # Plot the poly line
-        fig.add_trace(go.Scatter(x=x_poly, y=y_poly, mode='lines',
-                      name=f"{category_name(category)}", line=dict(color=colors[category], width=6, dash='dash'), opacity=0.5))
+        try:
+            # Fit a polynomial
+            z = np.polyfit(x, y, 2)
+            p = np.poly1d(z)
+
+            x_poly = np.linspace(x.min(), x.max(), 100)
+            y_poly = p(x_poly)
+
+            # Plot the poly line
+            fig.add_trace(go.Scatter(x=x_poly, y=y_poly, mode='lines',
+                          name=f"{category_name(category)}", line=dict(color=colors[category], width=6, dash='dash'), opacity=0.5))
+        except np.linalg.LinAlgError:
+            print(f"LinAlgError: SVD did not converge for category {category}")
+            continue
 
     # plot the actual datapoints
     for index, model in df.iterrows():
@@ -102,7 +108,101 @@ def categorize_model(model_name):
     fig.write_image("benchmark_visualization/benchmark_visualization.jpg",
                     width=1920, height=1080, scale=1)
 
+    # Create a second visualization for open source models
+    fig_open_source = go.Figure()
+
+    def categorize_open_source_model(model_name):
+        """Categories open source models based on name"""
+        if "Llama" in model_name:
+            return 'Llama'
+        elif "LLaVA" in model_name:
+            return 'LLaVA'
+        elif "MiniCPM" in model_name:
+            return 'MiniCPM'
+        return 'Other'
+
+    # Categorize each open source model in the DataFrame
+    df['OpenSourceCategory'] = df['Model'].apply(categorize_open_source_model)
+
+    # Set order for legend
+    open_source_category_order = ['Llama', 'LLaVA', 'MiniCPM']
+    df['OpenSourceCategory'] = pd.Categorical(
+        df['OpenSourceCategory'], categories=open_source_category_order, ordered=True)
+    df = df.sort_values('OpenSourceCategory')
+
+    # Set colors for different open source models
+    open_source_colors = {'Llama': '#0081fb',
+                          'LLaVA': '#ff7f0e', 'MiniCPM': '#2ca02c', 'Other': 'gray'}
+
+    # Convert 'Size' column to float
+    def convert_size_to_float(size_str):
+        """Convert model size string to float"""
+        size_str = size_str.strip()  # Remove leading/trailing spaces
+        if size_str == "-":
+            return 0
+        if size_str.endswith('B'):
+            return float(size_str[:-1]) * 1e9
+        elif size_str.endswith('M'):
+            return float(size_str[:-1]) * 1e6
+        return float(size_str)
+
+    df['Size'] = df['Size'].apply(convert_size_to_float)
+
+    for category, group_df in df.groupby('OpenSourceCategory'):
+        if category not in ['Llama', 'LLaVA', 'MiniCPM']:
+            continue
+
+        x = group_df['Size'].astype(float)
+        y = group_df['Overall'].astype(float)
+
+        if len(x) == 0 or len(y) == 0:
+            continue
+
+        try:
+            # Fit a polynomial
+            z = np.polyfit(x, y, 2)
+            p = np.poly1d(z)
+
+            x_poly = np.linspace(x.min(), x.max(), 100)
+            y_poly = p(x_poly)
+
+            # Plot the poly line
+            fig_open_source.add_trace(go.Scatter(x=x_poly, y=y_poly, mode='lines',
+                          name=f"{category_name(category)}", line=dict(color=open_source_colors[category], width=6, dash='dash'), opacity=0.5))
+        except np.linalg.LinAlgError:
+            print(f"LinAlgError: SVD did not converge for category {category}")
+            continue
+
+    # plot the actual datapoints
+    for index, model in df.iterrows():
+        try:
+            fig_open_source.add_trace(go.Scatter(x=[model['Size']], y=[model['Overall']],
+                                     mode='markers',  # Removed 'text' from mode
+                                     name=model['Model'], marker=dict(size=20, color=open_source_colors[model['OpenSourceCategory']])))
+
+        except Exception as e:
+            continue
+
+        # Add model name
+        fig_open_source.add_annotation(x=model['Size'], y=model['Overall'],
+                           text=model['Model'],
+                           showarrow=False,
+                           yshift=-35)
+
+    fig_open_source.update_layout(title={'text': 'Performance vs Model Size of Open Source Models in LLM Vision',
+                             'font': {'size': 50}},
+                      xaxis_title='Model Size (Parameters)', yaxis_title='MMMU Score Average',
+                      paper_bgcolor='#0d1117', plot_bgcolor='#161b22',
+                      font=dict(color='white', family='Product Sans', size=25),
+                      xaxis=dict(color='white', linecolor='grey',
+                                 showgrid=False, zeroline=False),
+                      yaxis=dict(color='white', linecolor='grey',
+                                 showgrid=False, zeroline=False))
+    # Save the plot as an image
+    fig_open_source.write_image("benchmark_visualization/open_source_benchmark_visualization.jpg",
+                    width=1920, height=1080, scale=1)
+
 
 if __name__ == "__main__":
     df = read_benchmark_data()
-    create_benchmark_visualization(df)
+    create_benchmark_visualization(df)
diff --git a/benchmark_visualization/open_source_benchmark_visualization.jpg b/benchmark_visualization/open_source_benchmark_visualization.jpg
diff --git a/custom_components/llmvision/__init__.py b/custom_components/llmvision/__init__.py
@@ -3,8 +3,9 @@
     DOMAIN,
     CONF_OPENAI_API_KEY,
     CONF_AZURE_API_KEY,
-    CONF_AZURE_ENDPOINT,
     CONF_AZURE_VERSION,
+    CONF_AZURE_BASE_URL,
+    CONF_AZURE_DEPLOYMENT,
     CONF_ANTHROPIC_API_KEY,
     CONF_GOOGLE_API_KEY,
     CONF_GROQ_API_KEY,
@@ -59,7 +60,8 @@ async def async_setup_entry(hass, entry):
     # Get all entries from config flow
     openai_api_key = entry.data.get(CONF_OPENAI_API_KEY)
     azure_api_key = entry.data.get(CONF_AZURE_API_KEY)
-    azure_endpoint = entry.data.get(CONF_AZURE_ENDPOINT)
+    azure_base_url = entry.data.get(CONF_AZURE_BASE_URL)
+    azure_deployment = entry.data.get(CONF_AZURE_DEPLOYMENT)
     azure_version = entry.data.get(CONF_AZURE_VERSION)
     anthropic_api_key = entry.data.get(CONF_ANTHROPIC_API_KEY)
     google_api_key = entry.data.get(CONF_GOOGLE_API_KEY)
@@ -82,7 +84,8 @@ async def async_setup_entry(hass, entry):
     entry_data = {
         CONF_OPENAI_API_KEY: openai_api_key,
         CONF_AZURE_API_KEY: azure_api_key,
-        CONF_AZURE_ENDPOINT: azure_endpoint,
+        CONF_AZURE_BASE_URL: azure_base_url,
+        CONF_AZURE_DEPLOYMENT: azure_deployment,
         CONF_AZURE_VERSION: azure_version,
         CONF_ANTHROPIC_API_KEY: anthropic_api_key,
         CONF_GOOGLE_API_KEY: google_api_key,
@@ -144,7 +147,7 @@ async def async_migrate_entry(hass, config_entry: ConfigEntry) -> bool:
         return False
 
 
-async def _remember(hass, call, start, response):
+async def _remember(hass, call, start, response) -> None:
     if call.remember:
         # Find semantic index config
         config_entry = None
@@ -182,7 +185,7 @@ async def _remember(hass, call, start, response):
         )
 
 
-async def _update_sensor(hass, sensor_entity, new_value):
+async def _update_sensor(hass, sensor_entity, new_value) -> None:
     """Update the value of a sensor entity."""
     if sensor_entity:
         _LOGGER.info(
@@ -222,6 +225,9 @@ def __init__(self, data_call):
         self.expose_images = data_call.data.get(EXPOSE_IMAGES, False)
         self.generate_title = data_call.data.get(GENERATE_TITLE, False)
         self.sensor_entity = data_call.data.get(SENSOR_ENTITY)
+        # ------------ Added during call ------------
+        # self.base64_images : List[str] = []
+        # self.filenames : List[str] = []
 
     def get_service_call_data(self):
         return self