Skip to content

Commit

Permalink
Merge branch 'main' into image-persist
Browse files Browse the repository at this point in the history
  • Loading branch information
valentinfrlch authored Dec 26, 2024
2 parents 4bf756f + 32542f9 commit b8a359d
Show file tree
Hide file tree
Showing 16 changed files with 1,151 additions and 917 deletions.
9 changes: 6 additions & 3 deletions benchmark_visualization/benchmark_data.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
Model,Size,Date,Overall,Art & Design,Business,Science,Health & Medicine,Human. & Social Sci.,Tech & Eng.,Cost
Llama 3.2 90B, 90B, 2024-09-25, 60.3, -, -, -, -, -, -, 0.00000001
Llama 3.2 11B, 11B, 2024-09-25, 50.7, -, -, -, -, -, -, 0.00000001
GPT-4o,-,2024-05-27,69.1,-,-,-,-,-,-,5
GPT-4o mini,-,2024-05-27,59.4,-,-,-,-,-,-,0.15
Gemini 1.5 Pro,-,2024-05-31,65.8,-,-,-,-,-,-,3.5
Expand Down Expand Up @@ -33,9 +35,10 @@ Yi-VL-6B*,6B,2024-01-23,39.1,52.5,30.7,31.3,38,53.3,35.7,
InternVL-Chat-V1.1*,-,2024-01-27,39.1,56.7,34.7,31.3,39.3,57.5,27.1,
Bunny-3B*,3B,2024-02-13,38.2,49.2,30.7,30.7,40.7,45,37.1,
SVIT*,-,2023-12-26,38,52.5,27.3,28,42,51.7,33.8,
MiniCPM-V*,-,2024-02-07,37.2,55.8,33.3,28,32.7,58.3,27.1,
MiniCPM-V-2*,-,2024-04-16,37.1,63.3,28.7,30,30,56.7,27.1,
LLaVA-1.5-13B,13B,2023-11-27,36.4,51.7,22.7,29.3,38.7,53.3,31.4,
MiniCPM-V*,-,2024-02-07,37.2,55.8,33.3,28,32.7,58.3,27.1,0.00000001
MiniCPM-V-2*,-,2024-04-16,37.1,63.3,28.7,30,30,56.7,27.1,0.00000001
MiniCPM-V-2.6,-,2024-04-16,49.8,63.3,28.7,30,30,56.7,27.1,0.00000001
LLaVA-1.5-13B,13B,2023-11-27,36.4,51.7,22.7,29.3,38.7,53.3,31.4,0.00000001
Emu2-Chat*,-,2023-12-24,36.3,55,30,28.7,28.7,46.7,35.2,
Qwen-VL-7B-Chat,-,2023-11-27,35.9,51.7,29.3,29.3,33.3,45,32.9,
InstructBLIP-T5-XXL,-,2023-11-27,35.7,44.2,24,30.7,35.3,49.2,35.2,
Expand Down
124 changes: 112 additions & 12 deletions benchmark_visualization/model_benchmark_visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ def category_name(model_name):
return 'Anthropic Claude 3'
elif "Gemini 1.5" in model_name:
return 'Google Gemini 1.5'
elif "Llama 3.2" in model_name:
return 'Meta Llama 3.2'
return 'Other'


Expand All @@ -47,7 +49,7 @@ def categorize_model(model_name):

# Set order for legend
category_order = ['GPT-4', 'Claude 3',
'Claude 3.5', 'Gemini 1.5'] # Add 'Gemini 1.5'
'Claude 3.5', 'Gemini 1.5']
df['Category'] = pd.Categorical(
df['Category'], categories=category_order, ordered=True)
df = df.sort_values('Category')
Expand All @@ -63,16 +65,20 @@ def categorize_model(model_name):
x = group_df['Cost'].astype(float)
y = group_df['Overall'].astype(float)

# Fit a polynomial
z = np.polyfit(x, y, 2)
p = np.poly1d(z)

x_poly = np.linspace(x.min(), x.max(), 100)
y_poly = p(x_poly)

# Plot the poly line
fig.add_trace(go.Scatter(x=x_poly, y=y_poly, mode='lines',
name=f"{category_name(category)}", line=dict(color=colors[category], width=6, dash='dash'), opacity=0.5))
try:
# Fit a polynomial
z = np.polyfit(x, y, 2)
p = np.poly1d(z)

x_poly = np.linspace(x.min(), x.max(), 100)
y_poly = p(x_poly)

# Plot the poly line
fig.add_trace(go.Scatter(x=x_poly, y=y_poly, mode='lines',
name=f"{category_name(category)}", line=dict(color=colors[category], width=6, dash='dash'), opacity=0.5))
except np.linalg.LinAlgError:
print(f"LinAlgError: SVD did not converge for category {category}")
continue

# plot the actual datapoints
for index, model in df.iterrows():
Expand Down Expand Up @@ -102,7 +108,101 @@ def categorize_model(model_name):
fig.write_image("benchmark_visualization/benchmark_visualization.jpg",
width=1920, height=1080, scale=1)

# Create a second visualization for open source models
fig_open_source = go.Figure()

def categorize_open_source_model(model_name):
"""Categories open source models based on name"""
if "Llama" in model_name:
return 'Llama'
elif "LLaVA" in model_name:
return 'LLaVA'
elif "MiniCPM" in model_name:
return 'MiniCPM'
return 'Other'

# Categorize each open source model in the DataFrame
df['OpenSourceCategory'] = df['Model'].apply(categorize_open_source_model)

# Set order for legend
open_source_category_order = ['Llama', 'LLaVA', 'MiniCPM']
df['OpenSourceCategory'] = pd.Categorical(
df['OpenSourceCategory'], categories=open_source_category_order, ordered=True)
df = df.sort_values('OpenSourceCategory')

# Set colors for different open source models
open_source_colors = {'Llama': '#0081fb',
'LLaVA': '#ff7f0e', 'MiniCPM': '#2ca02c', 'Other': 'gray'}

# Convert 'Size' column to float
def convert_size_to_float(size_str):
"""Convert model size string to float"""
size_str = size_str.strip() # Remove leading/trailing spaces
if size_str == "-":
return 0
if size_str.endswith('B'):
return float(size_str[:-1]) * 1e9
elif size_str.endswith('M'):
return float(size_str[:-1]) * 1e6
return float(size_str)

df['Size'] = df['Size'].apply(convert_size_to_float)

for category, group_df in df.groupby('OpenSourceCategory'):
if category not in ['Llama', 'LLaVA', 'MiniCPM']:
continue

x = group_df['Size'].astype(float)
y = group_df['Overall'].astype(float)

if len(x) == 0 or len(y) == 0:
continue

try:
# Fit a polynomial
z = np.polyfit(x, y, 2)
p = np.poly1d(z)

x_poly = np.linspace(x.min(), x.max(), 100)
y_poly = p(x_poly)

# Plot the poly line
fig_open_source.add_trace(go.Scatter(x=x_poly, y=y_poly, mode='lines',
name=f"{category_name(category)}", line=dict(color=open_source_colors[category], width=6, dash='dash'), opacity=0.5))
except np.linalg.LinAlgError:
print(f"LinAlgError: SVD did not converge for category {category}")
continue

# plot the actual datapoints
for index, model in df.iterrows():
try:
fig_open_source.add_trace(go.Scatter(x=[model['Size']], y=[model['Overall']],
mode='markers', # Removed 'text' from mode
name=model['Model'], marker=dict(size=20, color=open_source_colors[model['OpenSourceCategory']])))

except Exception as e:
continue

# Add model name
fig_open_source.add_annotation(x=model['Size'], y=model['Overall'],
text=model['Model'],
showarrow=False,
yshift=-35)

fig_open_source.update_layout(title={'text': 'Performance vs Model Size of Open Source Models in LLM Vision',
'font': {'size': 50}},
xaxis_title='Model Size (Parameters)', yaxis_title='MMMU Score Average',
paper_bgcolor='#0d1117', plot_bgcolor='#161b22',
font=dict(color='white', family='Product Sans', size=25),
xaxis=dict(color='white', linecolor='grey',
showgrid=False, zeroline=False),
yaxis=dict(color='white', linecolor='grey',
showgrid=False, zeroline=False))
# Save the plot as an image
fig_open_source.write_image("benchmark_visualization/open_source_benchmark_visualization.jpg",
width=1920, height=1080, scale=1)


if __name__ == "__main__":
df = read_benchmark_data()
create_benchmark_visualization(df)
create_benchmark_visualization(df)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion blueprints/event_summary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -431,4 +431,4 @@ action:
group: "{{group}}"
interruption-level: passive

- delay: '00:{{cooldown|int}}:00'
- delay: '00:{{cooldown|int}}:00'
Loading

0 comments on commit b8a359d

Please sign in to comment.