-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_chosen_hyps.sh
executable file
·148 lines (134 loc) · 3.85 KB
/
run_chosen_hyps.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# main script for running all experiments in succession
NUM_GPUS=4
N_TOTAL=8000
NUM_HYPOTHESES=20
# red_team_chat tests
# embed
echo "Embedding red_team_chat"
python -u -m scripts.generate_outputs \
--datasets red_team_chat \
--model-name-or-path sentence-transformers/multi-qa-mpnet-base-dot-v1 \
--num-gpus $NUM_GPUS \
--n-total 40000 \
--batch-size 1000 \
--seed 42 \
--embed
# generate with flan-t5
echo "Generating red_team_chat with flan-t5-xxl"
python -u -m scripts.generate_outputs \
--datasets red_team_chat \
--model-name-or-path google/flan-t5-xxl \
--num-gpus $NUM_GPUS \
--print-container-logs \
--n-total 40000 \
--num-hypotheses 2 \
--seed 42 \
--use-chosen-hypotheses
# eval red_team_chat
echo "Evaluating red_team_chat"
python -u -m scripts.compute_loss \
--output-dir output \
--datasets red_team_chat \
--loss-fn weqweasdas/hh_rlhf_rm_open_llama_3b \
--batch-size 5 \
--eval-models google/flan-t5-xxl
# full_chat tests
# embed
echo "Embedding full_chat"
python -u -m scripts.generate_outputs \
--datasets full_chat \
--model-name-or-path sentence-transformers/multi-qa-mpnet-base-dot-v1 \
--num-gpus $NUM_GPUS \
--n-total 40000 \
--batch-size 1000 \
--seed 42 \
--embed
# generate with flan-t5
echo "Generating full_chat with flan-t5-xxl"
python -u -m scripts.generate_outputs \
--datasets full_chat \
--model-name-or-path google/flan-t5-xxl \
--num-gpus $NUM_GPUS \
--print-container-logs \
--n-total 40000 \
--num-hypotheses 2 \
--seed 42 \
--use-chosen-hypotheses
# eval full_chat
echo "Evaluating full_chat"
python -u -m scripts.compute_loss \
--output-dir output \
--datasets full_chat \
--loss-fn weqweasdas/hh_rlhf_rm_open_llama_3b \
--batch-size 5 \
--eval-models google/flan-t5-xxl
# cnn_dailymail tests
# embed
echo "Embedding cnn_dailymail"
python -u -m scripts.generate_outputs \
--datasets cnn_dailymail \
--model-name-or-path sentence-transformers/multi-qa-mpnet-base-dot-v1 \
--num-gpus $NUM_GPUS \
--n-total $N_TOTAL \
--batch-size 200 \
--seed 42 \
--embed
# generate with llama 2
echo "Generating cnn_dailymail with meta-llama/Llama-2-7b-chat-hf"
python -u -m scripts.generate_outputs \
--datasets cnn_dailymail \
--model-name-or-path meta-llama/Llama-2-7b-chat-hf \
--num-gpus $NUM_GPUS \
--print-container-logs \
--n-total $N_TOTAL \
--num-hypotheses $NUM_HYPOTHESES \
--seed 42 \
--use-chosen-hypotheses
# eval cnn_dailymail
echo "Evaluating cnn_dailymail"
python -u -m scripts.compute_loss \
--output-dir output \
--datasets cnn_dailymail \
--loss-fn rouge
# eval with bertscore too
echo "Evaluating cnn_dailymail with bertscore"
python -u -m scripts.compute_loss \
--output-dir output \
--datasets cnn_dailymail \
--loss-fn bertscore \
--batch-size 400
# xsum tests
# embed
echo "Embedding xsum"
python -u -m scripts.generate_outputs \
--datasets xsum \
--model-name-or-path sentence-transformers/multi-qa-mpnet-base-dot-v1 \
--num-gpus $NUM_GPUS \
--n-total $N_TOTAL \
--batch-size 200 \
--seed 42 \
--embed
# generate with llama 2
echo "Generating xsum with meta-llama/Llama-2-7b-chat-hf"
python -u -m scripts.generate_outputs \
--datasets xsum \
--model-name-or-path meta-llama/Llama-2-7b-chat-hf \
--num-gpus $NUM_GPUS \
--print-container-logs \
--n-total $N_TOTAL \
--num-hypotheses $NUM_HYPOTHESES \
--seed 42 \
--use-chosen-hypotheses
# eval xsum
echo "Evaluating xsum"
python -u -m scripts.compute_loss \
--output-dir output \
--datasets xsum \
--loss-fn rouge
# eval with bertscore too
echo "Evaluating xsum with bertscore"
python -u -m scripts.compute_loss \
--output-dir output \
--datasets xsum \
--loss-fn bertscore \
--batch-size 300