Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor providers into classes, Add Azure #144

Merged
merged 10 commits into from
Dec 26, 2024
9 changes: 6 additions & 3 deletions benchmark_visualization/benchmark_data.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
Model,Size,Date,Overall,Art & Design,Business,Science,Health & Medicine,Human. & Social Sci.,Tech & Eng.,Cost
Llama 3.2 90B, 90B, 2024-09-25, 60.3, -, -, -, -, -, -, 0.00000001
Llama 3.2 11B, 11B, 2024-09-25, 50.7, -, -, -, -, -, -, 0.00000001
GPT-4o,-,2024-05-27,69.1,-,-,-,-,-,-,5
GPT-4o mini,-,2024-05-27,59.4,-,-,-,-,-,-,0.15
Gemini 1.5 Pro,-,2024-05-31,65.8,-,-,-,-,-,-,3.5
Expand Down Expand Up @@ -33,9 +35,10 @@ Yi-VL-6B*,6B,2024-01-23,39.1,52.5,30.7,31.3,38,53.3,35.7,
InternVL-Chat-V1.1*,-,2024-01-27,39.1,56.7,34.7,31.3,39.3,57.5,27.1,
Bunny-3B*,3B,2024-02-13,38.2,49.2,30.7,30.7,40.7,45,37.1,
SVIT*,-,2023-12-26,38,52.5,27.3,28,42,51.7,33.8,
MiniCPM-V*,-,2024-02-07,37.2,55.8,33.3,28,32.7,58.3,27.1,
MiniCPM-V-2*,-,2024-04-16,37.1,63.3,28.7,30,30,56.7,27.1,
LLaVA-1.5-13B,13B,2023-11-27,36.4,51.7,22.7,29.3,38.7,53.3,31.4,
MiniCPM-V*,-,2024-02-07,37.2,55.8,33.3,28,32.7,58.3,27.1,0.00000001
MiniCPM-V-2*,-,2024-04-16,37.1,63.3,28.7,30,30,56.7,27.1,0.00000001
MiniCPM-V-2.6,-,2024-04-16,49.8,63.3,28.7,30,30,56.7,27.1,0.00000001
LLaVA-1.5-13B,13B,2023-11-27,36.4,51.7,22.7,29.3,38.7,53.3,31.4,0.00000001
Emu2-Chat*,-,2023-12-24,36.3,55,30,28.7,28.7,46.7,35.2,
Qwen-VL-7B-Chat,-,2023-11-27,35.9,51.7,29.3,29.3,33.3,45,32.9,
InstructBLIP-T5-XXL,-,2023-11-27,35.7,44.2,24,30.7,35.3,49.2,35.2,
Expand Down
124 changes: 112 additions & 12 deletions benchmark_visualization/model_benchmark_visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ def category_name(model_name):
return 'Anthropic Claude 3'
elif "Gemini 1.5" in model_name:
return 'Google Gemini 1.5'
elif "Llama 3.2" in model_name:
return 'Meta Llama 3.2'
return 'Other'


Expand All @@ -47,7 +49,7 @@ def categorize_model(model_name):

# Set order for legend
category_order = ['GPT-4', 'Claude 3',
'Claude 3.5', 'Gemini 1.5'] # Add 'Gemini 1.5'
'Claude 3.5', 'Gemini 1.5']
df['Category'] = pd.Categorical(
df['Category'], categories=category_order, ordered=True)
df = df.sort_values('Category')
Expand All @@ -63,16 +65,20 @@ def categorize_model(model_name):
x = group_df['Cost'].astype(float)
y = group_df['Overall'].astype(float)

# Fit a polynomial
z = np.polyfit(x, y, 2)
p = np.poly1d(z)

x_poly = np.linspace(x.min(), x.max(), 100)
y_poly = p(x_poly)

# Plot the poly line
fig.add_trace(go.Scatter(x=x_poly, y=y_poly, mode='lines',
name=f"{category_name(category)}", line=dict(color=colors[category], width=6, dash='dash'), opacity=0.5))
try:
# Fit a polynomial
z = np.polyfit(x, y, 2)
p = np.poly1d(z)

x_poly = np.linspace(x.min(), x.max(), 100)
y_poly = p(x_poly)

# Plot the poly line
fig.add_trace(go.Scatter(x=x_poly, y=y_poly, mode='lines',
name=f"{category_name(category)}", line=dict(color=colors[category], width=6, dash='dash'), opacity=0.5))
except np.linalg.LinAlgError:
print(f"LinAlgError: SVD did not converge for category {category}")
continue

# plot the actual datapoints
for index, model in df.iterrows():
Expand Down Expand Up @@ -102,7 +108,101 @@ def categorize_model(model_name):
fig.write_image("benchmark_visualization/benchmark_visualization.jpg",
width=1920, height=1080, scale=1)

# Create a second visualization for open source models
fig_open_source = go.Figure()

def categorize_open_source_model(model_name):
"""Categories open source models based on name"""
if "Llama" in model_name:
return 'Llama'
elif "LLaVA" in model_name:
return 'LLaVA'
elif "MiniCPM" in model_name:
return 'MiniCPM'
return 'Other'

# Categorize each open source model in the DataFrame
df['OpenSourceCategory'] = df['Model'].apply(categorize_open_source_model)

# Set order for legend
open_source_category_order = ['Llama', 'LLaVA', 'MiniCPM']
df['OpenSourceCategory'] = pd.Categorical(
df['OpenSourceCategory'], categories=open_source_category_order, ordered=True)
df = df.sort_values('OpenSourceCategory')

# Set colors for different open source models
open_source_colors = {'Llama': '#0081fb',
'LLaVA': '#ff7f0e', 'MiniCPM': '#2ca02c', 'Other': 'gray'}

# Convert 'Size' column to float
def convert_size_to_float(size_str):
"""Convert model size string to float"""
size_str = size_str.strip() # Remove leading/trailing spaces
if size_str == "-":
return 0
if size_str.endswith('B'):
return float(size_str[:-1]) * 1e9
elif size_str.endswith('M'):
return float(size_str[:-1]) * 1e6
return float(size_str)

df['Size'] = df['Size'].apply(convert_size_to_float)

for category, group_df in df.groupby('OpenSourceCategory'):
if category not in ['Llama', 'LLaVA', 'MiniCPM']:
continue

x = group_df['Size'].astype(float)
y = group_df['Overall'].astype(float)

if len(x) == 0 or len(y) == 0:
continue

try:
# Fit a polynomial
z = np.polyfit(x, y, 2)
p = np.poly1d(z)

x_poly = np.linspace(x.min(), x.max(), 100)
y_poly = p(x_poly)

# Plot the poly line
fig_open_source.add_trace(go.Scatter(x=x_poly, y=y_poly, mode='lines',
name=f"{category_name(category)}", line=dict(color=open_source_colors[category], width=6, dash='dash'), opacity=0.5))
except np.linalg.LinAlgError:
print(f"LinAlgError: SVD did not converge for category {category}")
continue

# plot the actual datapoints
for index, model in df.iterrows():
try:
fig_open_source.add_trace(go.Scatter(x=[model['Size']], y=[model['Overall']],
mode='markers', # Removed 'text' from mode
name=model['Model'], marker=dict(size=20, color=open_source_colors[model['OpenSourceCategory']])))

except Exception as e:
continue

# Add model name
fig_open_source.add_annotation(x=model['Size'], y=model['Overall'],
text=model['Model'],
showarrow=False,
yshift=-35)

fig_open_source.update_layout(title={'text': 'Performance vs Model Size of Open Source Models in LLM Vision',
'font': {'size': 50}},
xaxis_title='Model Size (Parameters)', yaxis_title='MMMU Score Average',
paper_bgcolor='#0d1117', plot_bgcolor='#161b22',
font=dict(color='white', family='Product Sans', size=25),
xaxis=dict(color='white', linecolor='grey',
showgrid=False, zeroline=False),
yaxis=dict(color='white', linecolor='grey',
showgrid=False, zeroline=False))
# Save the plot as an image
fig_open_source.write_image("benchmark_visualization/open_source_benchmark_visualization.jpg",
width=1920, height=1080, scale=1)


if __name__ == "__main__":
df = read_benchmark_data()
create_benchmark_visualization(df)
create_benchmark_visualization(df)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion blueprints/event_summary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -431,4 +431,4 @@ action:
group: "{{group}}"
interruption-level: passive

- delay: '00:{{cooldown|int}}:00'
- delay: '00:{{cooldown|int}}:00'
Loading
Loading