-
Notifications
You must be signed in to change notification settings - Fork 73
/
stage-1_train.py
230 lines (200 loc) · 7.93 KB
/
stage-1_train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
import os
import torch
import numpy as np
import pandas as pd
from glob import glob
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from safetensors.torch import load_file
from argparse import ArgumentParser
"""
Perform multi-objective linear regression on precomputed embeddings.
This script loads embeddings and labels, splits the data into training and validation sets,
trains Ridge regression models for each attribute across a range of regularization strengths (alphas),
selects the best alpha based on validation loss, and saves the resulting regression weights.
"""
# ---------------------------
# Argument Parsing
# ---------------------------
parser = ArgumentParser(description="Linear Probing on Precomputed Embeddings")
parser.add_argument(
"--model_path",
type=str,
default="sfairXC/FsfairX-LLaMA3-RM-v0.1",
help="Path to the pre-trained model (HuggingFace path or local folder)",
)
parser.add_argument(
"--dataset_path",
type=str,
default="RLHFlow/ArmoRM-Multi-Objective-Data-v0.1",
help="Path to the dataset containing multi-objective labels (HuggingFace path or local folder)",
)
parser.add_argument(
"--embeddings_dir",
type=str,
default=None,
help="Path to the directory containing embedding files. If not provided, defaults to HOME/data/ArmoRM/embeddings/<model_name>/<dataset_name>",
)
parser.add_argument(
"--output_dir",
type=str,
default=None,
help="Path to save the regression weights. If not provided, defaults to HOME/data/ArmoRM/regression_weights/",
)
args = parser.parse_args()
# Extract names from paths
args.model_name = args.model_path.split("/")[-1]
args.dataset_name = args.dataset_path.split("/")[-1]
# ---------------------------
# Configuration and Setup
# ---------------------------
# Define the reward attributes as per the method overview in README.md
attributes = [
"helpsteer-helpfulness",
"helpsteer-correctness",
"helpsteer-coherence",
"helpsteer-complexity",
"helpsteer-verbosity",
"ultrafeedback-overall_score",
"ultrafeedback-instruction_following",
"ultrafeedback-truthfulness",
"ultrafeedback-honesty",
"ultrafeedback-helpfulness",
"beavertails-is_safe",
"prometheus-score",
"argilla-overall_quality",
"argilla-judge_lm",
"code-complexity",
"code-style",
"code-explanation",
"code-instruction-following",
"code-readability",
]
# Set the home directory
HOME = os.path.expanduser("~")
# Define the path to the embeddings based on user input or default location
if args.embeddings_dir:
embeddings_path = args.embeddings_dir
else:
embeddings_path = os.path.join(
HOME, "data", "ArmoRM", "embeddings", args.model_name, args.dataset_name
)
# Collect all embedding files matching the pattern embeddings_path-*.safetensors
embedding_files = sorted(glob(f"{embeddings_path}-*.safetensors"))
# ---------------------------
# Loading Embeddings and Labels
# ---------------------------
embeddings = []
labels = []
print("Loading embeddings and labels from Safetensors files...")
for file in tqdm(embedding_files, desc="Loading embeddings"):
# Load the safetensors file
data = load_file(file)
embeddings.append(data["embeddings"]) # Append embeddings tensor
labels.append(data["labels"]) # Append labels tensor
# Concatenate all embeddings and labels into single tensors
embeddings = torch.cat(embeddings, dim=0).float().numpy()
labels = torch.cat(labels, dim=0).float().numpy()
print(f"Total embeddings loaded: {embeddings.shape[0]}")
print(f"Total labels loaded: {labels.shape[0]}")
# ---------------------------
# Splitting Data into Train and Validation Sets
# ---------------------------
print("Splitting data into training and validation sets...")
X_train, X_val, Y_train, Y_val = train_test_split(
embeddings, labels, test_size=0.2, random_state=42
)
print(f"Training set size: {X_train.shape[0]}")
print(f"Validation set size: {X_val.shape[0]}")
# ---------------------------
# Defining Regularization Strengths (Alphas)
# ---------------------------
# Define a range of alpha values for Ridge regression
alphas = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
print(f"Using alphas: {alphas}")
# Initialize a DataFrame to store the results of each Ridge regression
df = pd.DataFrame(columns=["attribute", "alpha", "loss"])
# ---------------------------
# Ridge Regression Training
# ---------------------------
print("Training Ridge regression models for each attribute and alpha...")
for attr_idx in tqdm(range(Y_train.shape[1]), desc="Attributes"):
y_train = Y_train[:, attr_idx]
# Create a mask to filter out NaN values in training labels
valid_mask_train = ~np.isnan(y_train)
y_train_filtered = y_train[valid_mask_train]
X_train_filtered = X_train[valid_mask_train]
y_val = Y_val[:, attr_idx]
# Create a mask to filter out NaN values in validation labels
valid_mask_val = ~np.isnan(y_val)
y_val_filtered = y_val[valid_mask_val]
X_val_filtered = X_val[valid_mask_val]
# Iterate over each alpha to train Ridge models
for alpha in tqdm(alphas, desc=f"Alpha for attribute {attr_idx}", leave=False):
clf = Ridge(alpha=alpha, fit_intercept=False)
clf.fit(X_train_filtered, y_train_filtered) # Train the model
pred = clf.predict(X_val_filtered) # Predict on validation set
loss = mean_squared_error(y_val_filtered, pred) # Calculate MSE loss
# Append the results to the DataFrame
df = df._append(
{"attribute": attr_idx, "alpha": alpha, "loss": loss}, ignore_index=True
)
# ---------------------------
# Selecting Best Alphas Based on Validation Loss
# ---------------------------
print("Selecting the best alpha for each attribute based on validation loss...")
best_alphas = df.loc[df.groupby("attribute")["loss"].idxmin()]
print("Best alphas selected for each attribute:")
print(best_alphas)
# ---------------------------
# Fitting Final Models with Best Alphas and Extracting Weights
# ---------------------------
print(
"Fitting final Ridge regression models with the best alphas and extracting weights..."
)
weights = []
for index, row in tqdm(
best_alphas.iterrows(), total=best_alphas.shape[0], desc="Final Models"
):
attr_idx = int(row["attribute"])
best_alpha = row["alpha"]
# Initialize Ridge model with the best alpha
clf = Ridge(alpha=best_alpha, fit_intercept=False)
# Prepare training data
y_train = Y_train[:, attr_idx]
valid_mask_train = ~np.isnan(y_train)
X_train_filtered = X_train[valid_mask_train]
y_train_filtered = y_train[valid_mask_train]
# Train the model
clf.fit(X_train_filtered, y_train_filtered)
# Append the coefficient (weight) for the current attribute
weights.append(clf.coef_)
# Calculate loss on validation set for reporting
y_val = Y_val[:, attr_idx]
valid_mask_val = ~np.isnan(y_val)
X_val_filtered = X_val[valid_mask_val]
y_val_filtered = y_val[valid_mask_val]
pred = clf.predict(X_val_filtered)
loss = mean_squared_error(y_val_filtered, pred)
print(
f"Attribute {attr_idx} ({attributes[attr_idx]}): Best alpha = {best_alpha}, Validation Loss = {loss:.4f}"
)
# Stack all weights into a single NumPy array
weights = np.stack(weights)
print(f"All regression weights shape: {weights.shape}")
# ---------------------------
# Saving the Regression Weights
# ---------------------------
# Define the output directory
if args.output_dir:
save_dir = args.output_dir
else:
save_dir = os.path.join(HOME, "data", "ArmoRM", "regression_weights")
os.makedirs(save_dir, exist_ok=True) # Create the directory if it doesn't exist
# Define the path to save the weights
save_path = os.path.join(save_dir, f"{args.model_name}_{args.dataset_name}.pt")
# Save the weights as a PyTorch tensor
torch.save({"weight": torch.from_numpy(weights)}, save_path)
print(f"Saved regression weights to {save_path}")