-
Notifications
You must be signed in to change notification settings - Fork 5
/
generate_leaderboard.py
77 lines (68 loc) · 3.35 KB
/
generate_leaderboard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
import argparse
import yaml
from calculate_metrics import *
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--label_result_dir", type=str, default="results")
parser.add_argument("--model_config", type=str, default="./config/api_config.yaml")
parser.add_argument("--baseline_model", type=str, default="gpt-4o")
args = parser.parse_args()
result_dfs = []
for subset in ["character", "scene"]:
_, win_rate, model_list = get_metrics(
os.path.join(args.label_result_dir, subset), elo_algo="online"
)
# get model_name -> beautiful name mapping
# read yaml
with open(args.model_config, "r") as f:
model_config = yaml.load(f, Loader=yaml.SafeLoader)
model_name_to_beautiful_name = {}
for k, v in model_config.items():
model_name_to_beautiful_name[k] = v["beautiful_name"]
baseline_model_index = model_list.index(args.baseline_model)
for i in range(len(model_list)):
win_rate[i, i] = 0.50
win_rate_df = pd.DataFrame(
{
"model": model_list,
"beautiful_name": [model_name_to_beautiful_name[model] for model in model_list],
"win_rate": win_rate[:, baseline_model_index],
}
)
# rank by win rate
win_rate_df = win_rate_df.sort_values(by="win_rate", ascending=False)
result_dfs.append(win_rate_df)
result_dfs[1] = result_dfs[1].drop(columns=["beautiful_name"])
# merge the two dataframes by field "model"
leaderboard_df = pd.merge(result_dfs[0], result_dfs[1], on="model", suffixes=("_character", "_scene"))
# add win_rate_avg
leaderboard_df["win_rate_avg"] = (leaderboard_df["win_rate_character"] + leaderboard_df["win_rate_scene"]) / 2
# rank by win_rate_avg
leaderboard_df = leaderboard_df.sort_values(by="win_rate_avg", ascending=False)
# reindex, start from 1
leaderboard_df.index = np.arange(1, len(leaderboard_df) + 1)
leaderboard_df.index.name = "rank"
print(leaderboard_df)
# set index to model and sort it by model
leaderboard_df = leaderboard_df.set_index("model")
leaderboard_df = leaderboard_df.sort_index()
# save to csv
leaderboard_df.to_csv("./results/leaderboard.csv")
# drop model column
leaderboard_df = leaderboard_df.reset_index()
leaderboard_df = leaderboard_df.drop(columns=["model"])
# rank by win_rate_avg
leaderboard_df = leaderboard_df.sort_values(by="win_rate_avg", ascending=False)
# rename beautiful_name to "Model"
leaderboard_df = leaderboard_df.rename(columns={"beautiful_name": "Model", "win_rate_avg": "Avg. Win Rate", "win_rate_character": "Character", "win_rate_scene": "Scene"})
# add a rank column
leaderboard_df["Rank"] = np.arange(1, len(leaderboard_df) + 1)
# reorder columns
leaderboard_df = leaderboard_df[["Rank", "Model", "Character", "Scene", "Avg. Win Rate"]]
# win rate to percentage
leaderboard_df["Character"] = leaderboard_df["Character"].apply(lambda x: f"{x:.2%}")
leaderboard_df["Scene"] = leaderboard_df["Scene"].apply(lambda x: f"{x:.2%}")
leaderboard_df["Avg. Win Rate"] = leaderboard_df["Avg. Win Rate"].apply(lambda x: f"{x:.2%}")
# save to csv
leaderboard_df.to_csv("./results/leaderboard_for_display.csv", index=False)