Update training scripts of step2 DPO in DeepSpeed-Chat.

microsoft · Jan 27, 2024 · b563ff9 · b563ff9
1 parent 27a8782
commit b563ff9
Show file tree

Hide file tree

Showing 17 changed files with 65 additions and 326 deletions.
diff --git a/...cations/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/README.md b/...cations/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/README.md
@@ -1,6 +1,6 @@
 ### 💁For each folder, the bash scripts are examples of "facebook/opt" family.
 
 If you want to change your model such as EleutherAI/gpt-j-6b, you may simply replace
-`` --model_name_or_path facebook/opt-1.3b`` to ``--model_name_or_path EleutherAI/gpt-j-6b ``.
+`` --model_name_or_path facebook/opt-350m`` to ``--model_name_or_path EleutherAI/gpt-neo-125m ``.
 
 For the models we support, please see [our landing page](./../../../README.md#-supported-models-)
diff --git a/...ons/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/llama2/run_llama2_7b.sh b/...ons/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/llama2/run_llama2_7b.sh
@@ -6,29 +6,30 @@
 OUTPUT=$1
 ZERO_STAGE=$2
 if [ "$OUTPUT" == "" ]; then
-    OUTPUT=./output_step1_llama2_7b
+    OUTPUT=./output_step2_llama_7b_epoch1_lr9.65e-6
 fi
 if [ "$ZERO_STAGE" == "" ]; then
     ZERO_STAGE=3
 fi
 mkdir -p $OUTPUT
 
 deepspeed main.py \
-   --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
+   --data_path Dahoas/rm-static \
    --data_split 2,4,4 \
    --model_name_or_path meta-llama/Llama-2-7b-hf \
-   --per_device_train_batch_size 4 \
-   --per_device_eval_batch_size 4 \
+   --per_device_train_batch_size 8 \
+   --per_device_eval_batch_size 8 \
    --max_seq_len 512 \
    --learning_rate 9.65e-6 \
-   --weight_decay 0. \
-   --num_train_epochs 4  \
+   --weight_decay 0.1 \
+   --num_train_epochs 1  \
    --gradient_accumulation_steps 1 \
    --lr_scheduler_type cosine \
    --num_warmup_steps 0 \
    --seed 1234 \
    --gradient_checkpointing \
    --zero_stage $ZERO_STAGE \
    --deepspeed \
+   --offload \
    --output_dir $OUTPUT \
    &> $OUTPUT/training.log
diff --git a/...eepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/llama2/run_llama2_7b_lora.sh b/...eepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/llama2/run_llama2_7b_lora.sh
@@ -6,30 +6,31 @@
 OUTPUT=$1
 ZERO_STAGE=$2
 if [ "$OUTPUT" == "" ]; then
-    OUTPUT=./output_step1_llama2_7b_lora
+    OUTPUT=./output_step2_llama_7b_epoch1_lr9.65e-6
 fi
 if [ "$ZERO_STAGE" == "" ]; then
     ZERO_STAGE=3
 fi
 mkdir -p $OUTPUT
 
 deepspeed main.py \
-   --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
+   --data_path Dahoas/rm-static \
    --data_split 2,4,4 \
    --model_name_or_path meta-llama/Llama-2-7b-hf \
-   --per_device_train_batch_size 4 \
-   --per_device_eval_batch_size 4 \
+   --per_device_train_batch_size 8 \
+   --per_device_eval_batch_size 8 \
    --max_seq_len 512 \
    --learning_rate 9.65e-6 \
-   --weight_decay 0. \
-   --num_train_epochs 4  \
+   --weight_decay 0.1 \
+   --num_train_epochs 1  \
    --gradient_accumulation_steps 1 \
    --lr_scheduler_type cosine \
    --num_warmup_steps 0 \
    --seed 1234 \
    --gradient_checkpointing \
    --zero_stage $ZERO_STAGE \
    --deepspeed \
+   --offload \
    --lora_dim 128 \
    --lora_module_name "layers." \
    --output_dir $OUTPUT \

diff --git a/...ining_scripts/opt/single_node/run_6.7b.sh → ...aining_scripts/opt/multi_node/run_350m.sh b/...ining_scripts/opt/single_node/run_6.7b.sh → ...aining_scripts/opt/multi_node/run_350m.sh
@@ -9,25 +9,25 @@ if [ "$OUTPUT" == "" ]; then
     OUTPUT=./output
 fi
 if [ "$ZERO_STAGE" == "" ]; then
-    ZERO_STAGE=3
+    ZERO_STAGE=0
 fi
 mkdir -p $OUTPUT
 
 deepspeed main.py \
    --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
    --data_split 2,4,4 \
-   --model_name_or_path facebook/opt-6.7b \
-   --per_device_train_batch_size 6 \
-   --per_device_eval_batch_size 6 \
+   --model_name_or_path facebook/opt-350m \
+   --per_device_train_batch_size 2 \
+   --per_device_eval_batch_size 2 \
    --max_seq_len 512 \
-   --learning_rate 9.65e-6 \
-   --weight_decay 0. \
-   --num_train_epochs 16  \
+   --learning_rate 5e-5 \
+   --weight_decay 0.1 \
+   --dropout 0.0 \
+   --num_train_epochs 1 \
    --gradient_accumulation_steps 1 \
    --lr_scheduler_type cosine \
    --num_warmup_steps 0 \
    --seed 1234 \
-   --gradient_checkpointing \
    --zero_stage $ZERO_STAGE \
    --deepspeed \
    --output_dir $OUTPUT \

diff --git a/...aining_scripts/opt/single_gpu/run_1.3b.sh → ...aining_scripts/opt/single_gpu/run_350m.sh b/...aining_scripts/opt/single_gpu/run_1.3b.sh → ...aining_scripts/opt/single_gpu/run_350m.sh
@@ -3,8 +3,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # DeepSpeed Team
-
-# Note that usually LoRA needs to use larger learning rate
 OUTPUT=$1
 ZERO_STAGE=$2
 if [ "$OUTPUT" == "" ]; then
@@ -15,8 +13,8 @@ if [ "$ZERO_STAGE" == "" ]; then
 fi
 mkdir -p $OUTPUT
 
-deepspeed --num_gpus 1 main.py --model_name_or_path facebook/opt-1.3b \
-   --gradient_accumulation_steps 8 --lora_dim 128 --zero_stage $ZERO_STAGE \
+deepspeed --num_gpus 1 main.py --model_name_or_path facebook/opt-350m \
+   --weight_decay 0.1 --dropout 0.0 --gradient_accumulation_steps 4 --zero_stage $ZERO_STAGE \
    --enable_tensorboard \
    --tensorboard_path $OUTPUT \
    --deepspeed --output_dir $OUTPUT &> $OUTPUT/training.log
diff --git a/...Speed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_6.7b_lora.sh b/...Speed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_6.7b_lora.sh
diff --git a/...DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b.sh b/...DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b.sh
diff --git a/...peed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b_lora.sh b/...peed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b_lora.sh
diff --git a/.../DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_13b.sh b/.../DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_13b.sh
diff --git a/...Speed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_30b_lora.sh b/...Speed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_30b_lora.sh
diff --git a/...raining_scripts/opt/multi_node/run_66b.sh → ...ining_scripts/opt/single_node/run_350m.sh b/...raining_scripts/opt/multi_node/run_66b.sh → ...ining_scripts/opt/single_node/run_350m.sh
@@ -9,28 +9,26 @@ if [ "$OUTPUT" == "" ]; then
     OUTPUT=./output
 fi
 if [ "$ZERO_STAGE" == "" ]; then
-    ZERO_STAGE=3
+    ZERO_STAGE=0
 fi
 mkdir -p $OUTPUT
 
 deepspeed main.py \
    --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
    --data_split 2,4,4 \
-   --model_name_or_path facebook/opt-66b \
+   --model_name_or_path facebook/opt-350m \
    --per_device_train_batch_size 4 \
    --per_device_eval_batch_size 4 \
    --max_seq_len 512 \
-   --learning_rate 1e-4 \
+   --learning_rate 5e-5 \
    --weight_decay 0.1 \
-   --num_train_epochs 2  \
+   --num_train_epochs 1 \
+   --dropout 0.0 \
    --gradient_accumulation_steps 1 \
    --lr_scheduler_type cosine \
    --num_warmup_steps 0 \
    --seed 1234 \
-   --gradient_checkpointing \
    --zero_stage $ZERO_STAGE \
-   --lora_dim 128 \
-   --lora_module_name decoder.layers. \
    --deepspeed \
    --output_dir $OUTPUT \
    &> $OUTPUT/training.log
diff --git a/.../training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/README.md b/.../training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/README.md
@@ -5,17 +5,16 @@
    * [Usage](#usage)
 
 # Introduction
-The step 1 characterization script sweeps across various training parameters. Currently, the following parameters are swept:
+The step 2 characterization script sweeps across various training parameters. Currently, the following parameters are swept:
 <pre>
 Zero Stage: 2, 3
 Offload: True, False
-Lora: True, False
 </pre>
 
-The `run_step1_sweep.sh` script passes configuration arguments to `run_single.sh`, which can be extended to sweep beyond the parameters listed above (e.g. learning rate, weight decay, etc).
+The `run_step2_sweep.sh` script passes configuration arguments to `run_single.sh`, which can be extended to sweep beyond the parameters listed above (e.g. learning rate, weight decay, etc).
 
 # Usage
 The sweep script can be run as follows:
 <pre>
-DeepSpeedExamples/applications/DeepSpeed-Chat/training/step1_supervised_finetuning$ bash training_scripts/opt/single_node/sweep/run_step1_sweep.sh
+DeepSpeedExamples/applications/DeepSpeed-Chat/training/step2_reward_model_finetuning$ bash training_scripts/opt/single_node/sweep/run_step2_sweep.sh
 </pre>