-
Notifications
You must be signed in to change notification settings - Fork 0
/
comal.sh
196 lines (172 loc) · 7.37 KB
/
comal.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
NAME="COMAL" # name of the experiment
OLD_CKPT="yale-nlp/comal-qwen2-1.5b-mle" # at the first iteration, this should be the path to the MLE model
REF_CKPT="yale-nlp/comal-qwen2-1.5b-mle" # at the first iteration, this should be the path to the MLE model
GPU_IDS=0,1,2,3,4,5,6,7 # GPU IDs, here we assume 8 GPUs, please change this if you have different number of GPUs
GPU_LIST="0 1 2 3 4 5 6 7" # GPU list
NUM_GPUS=8 # number of GPUs
# COMAL hyperparameters
NUM_ITERS=24 # number of iterations
ITER_START=0 # starting iteration
UPDATE_INTERVAL=12 # interval to update the reference model
# Training hyperparameters
PORT=29320 # port number for FSDP
EPOCHS=3 # number of epochs
RATIO=0.3333333333333333 # tau/eta ratio
ETA=0.002 # eta
ACCUMULATE_STEP=4 # accumulate step
BATCH_SIZE=1 # local batch size, the actual batch size is BATCH_SIZE * NUM_GPUS * ACCUMULATE_STEP
echo "GPU_IDS: $GPU_IDS"
for ((ITER=$ITER_START; ITER<$ITER_START+$NUM_ITERS; ITER++))
do
echo "$NAME ITER $ITER"
FDIR="exps/$NAME/$ITER"
mkdir -p $FDIR
echo "FDIR: $FDIR"
DATA_ITER=$(( ITER % 6 )) # we have 6 different training data splits
# generate samples
echo "Generating samples"
CUDA_VISIBLE_DEVICES=$GPU_IDS python sampling.py \
--num_gpus $NUM_GPUS \
--model_type qwen \
--model_pt $OLD_CKPT/model \
--tokenizer_pt Qwen/Qwen2-1.5B \
--num_samples 5 \
--top_p 0.95 \
--input_dir data/prompts/test.jsonl \
--output_dir $FDIR/test.samples.jsonl \
--gpuids $GPU_LIST \
--num_workers 8
CUDA_VISIBLE_DEVICES=$GPU_IDS python sampling.py \
--num_gpus $NUM_GPUS \
--model_type qwen \
--model_pt $OLD_CKPT/model \
--tokenizer_pt Qwen/Qwen2-1.5B \
--num_samples 5 \
--top_p 0.95 \
--input_dir data/prompts/train_${DATA_ITER}.jsonl \
--output_dir $FDIR/train.samples.jsonl \
--gpuids $GPU_LIST \
--num_workers 8
# score samples
echo "Scoring samples"
CUDA_VISIBLE_DEVICES=$GPU_IDS python scoring.py \
--src_dir data/prompts/train_${DATA_ITER}.jsonl \
--input_dir $FDIR/train.samples.jsonl \
--output_dir $FDIR/train.samples.pairs.jsonl \
--gpuids $GPU_LIST \
--model_pt NCSOFT/Llama-3-OffsetBias-8B \
--batch_size 16 \
--score_mode pairwise \
--model_type offsetbias-lm \
--num_workers 8
CUDA_VISIBLE_DEVICES=$GPU_IDS python scoring.py \
--src_dir data/prompts/test.jsonl \
--input_dir $FDIR/test.samples.jsonl \
--output_dir $FDIR/test.samples.pairs.jsonl \
--gpuids $GPU_LIST \
--model_pt NCSOFT/Llama-3-OffsetBias-8B \
--batch_size 16 \
--score_mode pairwise \
--model_type offsetbias-lm \
--num_workers 8
# processing samples
echo "Processing samples"
python data_processing.py \
--task make_output_pair_from_pm \
--input_dir $FDIR/test.samples.pairs.jsonl \
--output_dir $FDIR/test.pairs.jsonl \
--num_workers 16 \
--tokenizer_pt Qwen/Qwen2-1.5B \
--model_type qwen \
--pm_tokenizer_pt NCSOFT/Llama-3-OffsetBias-8B
python data_processing.py \
--task make_output_pair_from_pm \
--input_dir $FDIR/train.samples.pairs.jsonl \
--output_dir $FDIR/train.pairs.jsonl \
--num_workers 16 \
--tokenizer_pt Qwen/Qwen2-1.5B \
--model_type qwen \
--pm_tokenizer_pt NCSOFT/Llama-3-OffsetBias-8B
mkdir -p $FDIR/data
# get logprobs
echo "Getting logprobs using the latest model"
CUDA_VISIBLE_DEVICES=$GPU_IDS python get_logprobs.py \
--input_dir $FDIR/train.pairs.jsonl \
--gpuids $GPU_LIST \
--output_dir $FDIR/data/train.jsonl \
--model_type qwen \
--model_pt $OLD_CKPT/model \
--tokenizer_pt Qwen/Qwen2-1.5B
CUDA_VISIBLE_DEVICES=$GPU_IDS python get_logprobs.py \
--input_dir $FDIR/test.pairs.jsonl \
--gpuids $GPU_LIST \
--output_dir $FDIR/data/test.jsonl \
--model_type qwen \
--model_pt $OLD_CKPT/model \
--tokenizer_pt Qwen/Qwen2-1.5B
echo "Getting logprobs using the ref model"
CUDA_VISIBLE_DEVICES=$GPU_IDS python get_logprobs.py \
--input_dir $FDIR/data/train.jsonl \
--gpuids $GPU_LIST \
--output_dir $FDIR/data/train.jsonl \
--model_type qwen \
--model_pt $REF_CKPT/model \
--tokenizer_pt Qwen/Qwen2-1.5B \
--mode nash
CUDA_VISIBLE_DEVICES=$GPU_IDS python get_logprobs.py \
--input_dir $FDIR/data/test.jsonl \
--gpuids $GPU_LIST \
--output_dir $FDIR/data/test.jsonl \
--model_type qwen \
--model_pt $REF_CKPT/model \
--tokenizer_pt Qwen/Qwen2-1.5B \
--mode nash
# training
echo "Training"
CUDA_VISIBLE_DEVICES=$GPU_IDS accelerate launch --config_file fsdp_config.yaml \
--main_process_port $PORT \
inpo.py \
--epoch $EPOCHS \
--eta $ETA \
--tau_eta_ratio $RATIO \
--dataset $FDIR/data \
--pretrained $OLD_CKPT/model \
--exp_name $FDIR/ckpts \
--accumulate_step $ACCUMULATE_STEP \
--batch_size $BATCH_SIZE \
-l
CKPT=$FDIR/ckpts
# evaluate
echo "generating evaluation samples"
CUDA_VISIBLE_DEVICES=$GPU_IDS python eval.py \
--num_gpus 4 \
--model_type qwen \
--model_pt $CKPT/model \
--tokenizer_pt Qwen/Qwen2-1.5B \
--num_samples 1 \
--temperature 0.7 \
--top_p 0.95 \
--output_dir $CKPT/alpacaeval_output.jsonl \
--task gen_alpaca
echo "Comparing with sft on alpaca"
CUDA_VISIBLE_DEVICES=$GPU_IDS python eval.py \
--output_dir $CKPT/alpacaeval_vs_sft.json \
--sys1_dir ckpts/qwen_mle/alpacaeval_output.jsonl \
--sys2_dir $CKPT/alpacaeval_output.jsonl \
--num_gpus $NUM_GPUS \
--batch_size 16 \
--task eval_alpaca
echo "Comparing with previous ckpt on alpaca"
CUDA_VISIBLE_DEVICES=$GPU_IDS python eval.py \
--output_dir $CKPT/alpacaeval_vs_previous.json \
--sys1_dir $OLD_CKPT/alpacaeval_output.jsonl \
--sys2_dir $CKPT/alpacaeval_output.jsonl \
--num_gpus $NUM_GPUS \
--batch_size 16 \
--task eval_alpaca
OLD_CKPT=$CKPT
if (( (ITER + 1) % UPDATE_INTERVAL == 0 )); then
echo "Updating the reference model"
REF_CKPT=$CKPT
fi
done