From b563ff971d870f7b839e08faf833b286c4cf0fdb Mon Sep 17 00:00:00 2001 From: stceum <50257864+stceum@users.noreply.github.com> Date: Fri, 8 Dec 2023 22:15:40 +0800 Subject: [PATCH] Update training scripts of step2 DPO in DeepSpeed-Chat. --- .../training_scripts/README.md | 2 +- .../training_scripts/llama2/run_llama2_7b.sh | 13 +++--- .../llama2/run_llama2_7b_lora.sh | 13 +++--- .../run_6.7b.sh => multi_node/run_350m.sh} | 16 +++---- .../single_gpu/{run_1.3b.sh => run_350m.sh} | 6 +-- .../opt/single_gpu/run_6.7b_lora.sh | 31 ------------- .../opt/single_node/run_1.3b.sh | 35 --------------- .../opt/single_node/run_1.3b_lora.sh | 31 ------------- .../opt/single_node/run_13b.sh | 36 ---------------- .../opt/single_node/run_30b_lora.sh | 28 ------------ .../run_66b.sh => single_node/run_350m.sh} | 12 +++--- .../opt/single_node/sweep/README.md | 7 ++- .../opt/single_node/sweep/run_single.sh | 43 +++++-------------- .../opt/single_node/sweep/run_step1_sweep.sh | 25 ----------- .../opt/single_node/sweep/run_step2_sweep.sh | 21 +++++++++ .../other_language/run_chinese.sh | 36 ---------------- .../other_language/run_japanese.sh | 36 ---------------- 17 files changed, 65 insertions(+), 326 deletions(-) rename applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/{single_node/run_6.7b.sh => multi_node/run_350m.sh} (72%) rename applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/{run_1.3b.sh => run_350m.sh} (75%) delete mode 100644 applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_6.7b_lora.sh delete mode 100644 applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b.sh delete mode 100644 applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b_lora.sh delete mode 100644 applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_13b.sh delete mode 100644 applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_30b_lora.sh rename applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/{multi_node/run_66b.sh => single_node/run_350m.sh} (78%) delete mode 100644 applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/run_step1_sweep.sh create mode 100644 applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/run_step2_sweep.sh delete mode 100644 applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/other_language/run_chinese.sh delete mode 100644 applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/other_language/run_japanese.sh diff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/README.md b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/README.md index f680397af..ca2d5eb70 100644 --- a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/README.md +++ b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/README.md @@ -1,6 +1,6 @@ ### 💁For each folder, the bash scripts are examples of "facebook/opt" family. If you want to change your model such as EleutherAI/gpt-j-6b, you may simply replace -`` --model_name_or_path facebook/opt-1.3b`` to ``--model_name_or_path EleutherAI/gpt-j-6b ``. +`` --model_name_or_path facebook/opt-350m`` to ``--model_name_or_path EleutherAI/gpt-neo-125m ``. For the models we support, please see [our landing page](./../../../README.md#-supported-models-) diff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/llama2/run_llama2_7b.sh b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/llama2/run_llama2_7b.sh index 2fe70be13..aaa059bbe 100755 --- a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/llama2/run_llama2_7b.sh +++ b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/llama2/run_llama2_7b.sh @@ -6,7 +6,7 @@ OUTPUT=$1 ZERO_STAGE=$2 if [ "$OUTPUT" == "" ]; then - OUTPUT=./output_step1_llama2_7b + OUTPUT=./output_step2_llama_7b_epoch1_lr9.65e-6 fi if [ "$ZERO_STAGE" == "" ]; then ZERO_STAGE=3 @@ -14,15 +14,15 @@ fi mkdir -p $OUTPUT deepspeed main.py \ - --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \ + --data_path Dahoas/rm-static \ --data_split 2,4,4 \ --model_name_or_path meta-llama/Llama-2-7b-hf \ - --per_device_train_batch_size 4 \ - --per_device_eval_batch_size 4 \ + --per_device_train_batch_size 8 \ + --per_device_eval_batch_size 8 \ --max_seq_len 512 \ --learning_rate 9.65e-6 \ - --weight_decay 0. \ - --num_train_epochs 4 \ + --weight_decay 0.1 \ + --num_train_epochs 1 \ --gradient_accumulation_steps 1 \ --lr_scheduler_type cosine \ --num_warmup_steps 0 \ @@ -30,5 +30,6 @@ deepspeed main.py \ --gradient_checkpointing \ --zero_stage $ZERO_STAGE \ --deepspeed \ + --offload \ --output_dir $OUTPUT \ &> $OUTPUT/training.log diff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/llama2/run_llama2_7b_lora.sh b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/llama2/run_llama2_7b_lora.sh index 7689266ee..ec48de78a 100755 --- a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/llama2/run_llama2_7b_lora.sh +++ b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/llama2/run_llama2_7b_lora.sh @@ -6,7 +6,7 @@ OUTPUT=$1 ZERO_STAGE=$2 if [ "$OUTPUT" == "" ]; then - OUTPUT=./output_step1_llama2_7b_lora + OUTPUT=./output_step2_llama_7b_epoch1_lr9.65e-6 fi if [ "$ZERO_STAGE" == "" ]; then ZERO_STAGE=3 @@ -14,15 +14,15 @@ fi mkdir -p $OUTPUT deepspeed main.py \ - --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \ + --data_path Dahoas/rm-static \ --data_split 2,4,4 \ --model_name_or_path meta-llama/Llama-2-7b-hf \ - --per_device_train_batch_size 4 \ - --per_device_eval_batch_size 4 \ + --per_device_train_batch_size 8 \ + --per_device_eval_batch_size 8 \ --max_seq_len 512 \ --learning_rate 9.65e-6 \ - --weight_decay 0. \ - --num_train_epochs 4 \ + --weight_decay 0.1 \ + --num_train_epochs 1 \ --gradient_accumulation_steps 1 \ --lr_scheduler_type cosine \ --num_warmup_steps 0 \ @@ -30,6 +30,7 @@ deepspeed main.py \ --gradient_checkpointing \ --zero_stage $ZERO_STAGE \ --deepspeed \ + --offload \ --lora_dim 128 \ --lora_module_name "layers." \ --output_dir $OUTPUT \ diff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_6.7b.sh b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/multi_node/run_350m.sh similarity index 72% rename from applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_6.7b.sh rename to applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/multi_node/run_350m.sh index 126f8892f..b55beef98 100644 --- a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_6.7b.sh +++ b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/multi_node/run_350m.sh @@ -9,25 +9,25 @@ if [ "$OUTPUT" == "" ]; then OUTPUT=./output fi if [ "$ZERO_STAGE" == "" ]; then - ZERO_STAGE=3 + ZERO_STAGE=0 fi mkdir -p $OUTPUT deepspeed main.py \ --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \ --data_split 2,4,4 \ - --model_name_or_path facebook/opt-6.7b \ - --per_device_train_batch_size 6 \ - --per_device_eval_batch_size 6 \ + --model_name_or_path facebook/opt-350m \ + --per_device_train_batch_size 2 \ + --per_device_eval_batch_size 2 \ --max_seq_len 512 \ - --learning_rate 9.65e-6 \ - --weight_decay 0. \ - --num_train_epochs 16 \ + --learning_rate 5e-5 \ + --weight_decay 0.1 \ + --dropout 0.0 \ + --num_train_epochs 1 \ --gradient_accumulation_steps 1 \ --lr_scheduler_type cosine \ --num_warmup_steps 0 \ --seed 1234 \ - --gradient_checkpointing \ --zero_stage $ZERO_STAGE \ --deepspeed \ --output_dir $OUTPUT \ diff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_1.3b.sh b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_350m.sh similarity index 75% rename from applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_1.3b.sh rename to applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_350m.sh index a0a2fddc9..8157865a5 100644 --- a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_1.3b.sh +++ b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_350m.sh @@ -3,8 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 # DeepSpeed Team - -# Note that usually LoRA needs to use larger learning rate OUTPUT=$1 ZERO_STAGE=$2 if [ "$OUTPUT" == "" ]; then @@ -15,8 +13,8 @@ if [ "$ZERO_STAGE" == "" ]; then fi mkdir -p $OUTPUT -deepspeed --num_gpus 1 main.py --model_name_or_path facebook/opt-1.3b \ - --gradient_accumulation_steps 8 --lora_dim 128 --zero_stage $ZERO_STAGE \ +deepspeed --num_gpus 1 main.py --model_name_or_path facebook/opt-350m \ + --weight_decay 0.1 --dropout 0.0 --gradient_accumulation_steps 4 --zero_stage $ZERO_STAGE \ --enable_tensorboard \ --tensorboard_path $OUTPUT \ --deepspeed --output_dir $OUTPUT &> $OUTPUT/training.log diff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_6.7b_lora.sh b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_6.7b_lora.sh deleted file mode 100644 index d4189bb1e..000000000 --- a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_6.7b_lora.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# Copyright (c) Microsoft Corporation. -# SPDX-License-Identifier: Apache-2.0 - -# DeepSpeed Team - -# Note that usually LoRA needs to use larger learning rate -OUTPUT_PATH=./output -mkdir -p $OUTPUT_PATH - -deepspeed --num_gpus 1 main.py \ - --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \ - --data_split 2,4,4 \ - --model_name_or_path facebook/opt-6.7b \ - --per_device_train_batch_size 8 \ - --per_device_eval_batch_size 8 \ - --max_seq_len 512 \ - --learning_rate 1e-3 \ - --weight_decay 0. \ - --num_train_epochs 16 \ - --gradient_accumulation_steps 16 \ - --lr_scheduler_type cosine \ - --num_warmup_steps 0 \ - --seed 1234 \ - --gradient_checkpointing \ - --zero_stage 0 \ - --lora_dim 128 \ - --lora_module_name decoder.layers. \ - --deepspeed \ - --output_dir $OUTPUT_PATH \ - &> $OUTPUT_PATH/training.log diff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b.sh b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b.sh deleted file mode 100644 index 3eeeefe02..000000000 --- a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -# Copyright (c) Microsoft Corporation. -# SPDX-License-Identifier: Apache-2.0 - -# DeepSpeed Team -OUTPUT=$1 -ZERO_STAGE=$2 -if [ "$OUTPUT" == "" ]; then - OUTPUT=./output -fi -if [ "$ZERO_STAGE" == "" ]; then - ZERO_STAGE=2 -fi -mkdir -p $OUTPUT - -deepspeed main.py \ - --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \ - --data_split 2,4,4 \ - --model_name_or_path facebook/opt-1.3b \ - --per_device_train_batch_size 8 \ - --per_device_eval_batch_size 8 \ - --max_seq_len 512 \ - --learning_rate 9.65e-6 \ - --weight_decay 0. \ - --num_train_epochs 16 \ - --gradient_accumulation_steps 1 \ - --lr_scheduler_type cosine \ - --num_warmup_steps 0 \ - --seed 1234 \ - --zero_stage $ZERO_STAGE \ - --deepspeed \ - --enable_tensorboard \ - --tensorboard_path $OUTPUT \ - --output_dir $OUTPUT \ - &> $OUTPUT/training.log diff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b_lora.sh b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b_lora.sh deleted file mode 100644 index c0057812c..000000000 --- a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b_lora.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# Copyright (c) Microsoft Corporation. -# SPDX-License-Identifier: Apache-2.0 - -# DeepSpeed Team - -# Note that usually LoRA needs to use larger learning rate -OUTPUT_PATH=./output -mkdir -p $OUTPUT_PATH - -deepspeed main.py \ - --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \ - --data_split 2,4,4 \ - --model_name_or_path facebook/opt-1.3b \ - --per_device_train_batch_size 8 \ - --per_device_eval_batch_size 8 \ - --max_seq_len 512 \ - --learning_rate 1e-3 \ - --weight_decay 0.1 \ - --num_train_epochs 16 \ - --gradient_accumulation_steps 1 \ - --lr_scheduler_type cosine \ - --num_warmup_steps 0 \ - --seed 1234 \ - --zero_stage 0 \ - --lora_dim 128 \ - --lora_module_name decoder.layers. \ - --only_optimize_lora \ - --deepspeed \ - --output_dir $OUTPUT_PATH \ - &> $OUTPUT_PATH/training.log diff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_13b.sh b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_13b.sh deleted file mode 100644 index f93f1f9f7..000000000 --- a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_13b.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash -# Copyright (c) Microsoft Corporation. -# SPDX-License-Identifier: Apache-2.0 - -# DeepSpeed Team -OUTPUT=$1 -ZERO_STAGE=$2 -if [ "$OUTPUT" == "" ]; then - OUTPUT=./output -fi -if [ "$ZERO_STAGE" == "" ]; then - ZERO_STAGE=3 -fi -mkdir -p $OUTPUT - -deepspeed main.py \ - --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \ - --data_split 2,4,4 \ - --model_name_or_path facebook/opt-13b \ - --per_device_train_batch_size 4 \ - --per_device_eval_batch_size 4 \ - --max_seq_len 512 \ - --learning_rate 1e-4 \ - --weight_decay 0. \ - --num_train_epochs 16 \ - --gradient_accumulation_steps 1 \ - --lr_scheduler_type cosine \ - --num_warmup_steps 0 \ - --seed 1234 \ - --gradient_checkpointing \ - --zero_stage $ZERO_STAGE \ - --lora_dim 128 \ - --lora_module_name decoder.layers. \ - --deepspeed \ - --output_dir $OUTPUT \ - &> $OUTPUT/training.log diff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_30b_lora.sh b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_30b_lora.sh deleted file mode 100644 index 661279c5f..000000000 --- a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_30b_lora.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# Copyright (c) Microsoft Corporation. -# SPDX-License-Identifier: Apache-2.0 - -# DeepSpeed Team -OUTPUT_PATH=./output -mkdir -p $OUTPUT_PATH - -deepspeed main.py \ - --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \ - --data_split 2,4,4 \ - --model_name_or_path facebook/opt-30b \ - --per_device_train_batch_size 4 \ - --per_device_eval_batch_size 4 \ - --max_seq_len 512 \ - --learning_rate 9.65e-6 \ - --weight_decay 0. \ - --num_train_epochs 16 \ - --gradient_accumulation_steps 1 \ - --lr_scheduler_type cosine \ - --num_warmup_steps 0 \ - --seed 1234 \ - --lora_dim 128 \ - --gradient_checkpointing \ - --zero_stage 3 \ - --deepspeed \ - --output_dir $OUTPUT_PATH \ - &> $OUTPUT_PATH/training.log diff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/multi_node/run_66b.sh b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_350m.sh similarity index 78% rename from applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/multi_node/run_66b.sh rename to applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_350m.sh index 4df99382a..16aed6a42 100644 --- a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/multi_node/run_66b.sh +++ b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_350m.sh @@ -9,28 +9,26 @@ if [ "$OUTPUT" == "" ]; then OUTPUT=./output fi if [ "$ZERO_STAGE" == "" ]; then - ZERO_STAGE=3 + ZERO_STAGE=0 fi mkdir -p $OUTPUT deepspeed main.py \ --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \ --data_split 2,4,4 \ - --model_name_or_path facebook/opt-66b \ + --model_name_or_path facebook/opt-350m \ --per_device_train_batch_size 4 \ --per_device_eval_batch_size 4 \ --max_seq_len 512 \ - --learning_rate 1e-4 \ + --learning_rate 5e-5 \ --weight_decay 0.1 \ - --num_train_epochs 2 \ + --num_train_epochs 1 \ + --dropout 0.0 \ --gradient_accumulation_steps 1 \ --lr_scheduler_type cosine \ --num_warmup_steps 0 \ --seed 1234 \ - --gradient_checkpointing \ --zero_stage $ZERO_STAGE \ - --lora_dim 128 \ - --lora_module_name decoder.layers. \ --deepspeed \ --output_dir $OUTPUT \ &> $OUTPUT/training.log diff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/README.md b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/README.md index 254442faf..1f90b9f65 100644 --- a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/README.md +++ b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/README.md @@ -5,17 +5,16 @@ * [Usage](#usage) # Introduction -The step 1 characterization script sweeps across various training parameters. Currently, the following parameters are swept: +The step 2 characterization script sweeps across various training parameters. Currently, the following parameters are swept:
Zero Stage: 2, 3 Offload: True, False -Lora: True, False-The `run_step1_sweep.sh` script passes configuration arguments to `run_single.sh`, which can be extended to sweep beyond the parameters listed above (e.g. learning rate, weight decay, etc). +The `run_step2_sweep.sh` script passes configuration arguments to `run_single.sh`, which can be extended to sweep beyond the parameters listed above (e.g. learning rate, weight decay, etc). # Usage The sweep script can be run as follows:
-DeepSpeedExamples/applications/DeepSpeed-Chat/training/step1_supervised_finetuning$ bash training_scripts/opt/single_node/sweep/run_step1_sweep.sh +DeepSpeedExamples/applications/DeepSpeed-Chat/training/step2_reward_model_finetuning$ bash training_scripts/opt/single_node/sweep/run_step2_sweep.shdiff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/run_single.sh b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/run_single.sh index 1590128bb..6f5453af1 100644 --- a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/run_single.sh +++ b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/run_single.sh @@ -2,55 +2,35 @@ # Copyright (c) Microsoft Corporation. # SPDX-License-Identifier: Apache-2.0 -# DeepSpeed Team - -# Note that usually LoRA needs to use larger learning rate # DeepSpeed Team ZERO_STAGE=$1 OFFLOAD=$2 -LORA=$3 -OUTPUT=$4 - +OUTPUT=$3 if [ "$ZERO_STAGE" == "" ]; then - ZERO_STAGE=2 + ZERO_STAGE=0 fi - if [ "$OFFLOAD" == true ]; then OFFLOAD="--offload" else OFFLOAD="" fi - -if [ "$LORA" == true ]; then - LORA_DIM="--lora_dim 128" - LORA_MODULE_NAME="--lora_module_name decoder.layers." - ONLY_OPTIMIZE_LORA="--only_optimize_lora" - LEARNING_RATE="1e-3" - WEIGHT_DECAY="0.1" -else - LORA_DIM="--lora_dim 0" - LORA_MODULE_NAME="" - ONLY_OPTIMIZE_LORA="" - LEARNING_RATE="9.65e-6" - WEIGHT_DECAY="0." -fi - if [ "$OUTPUT" == "" ]; then OUTPUT=./output fi - mkdir -p $OUTPUT cmd="deepspeed main.py \ --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \ --data_split 2,4,4 \ - --model_name_or_path facebook/opt-1.3b \ - --per_device_train_batch_size 8 \ - --per_device_eval_batch_size 8 \ + --model_name_or_path facebook/opt-350m \ + --num_padding_at_beginning 1 \ + --per_device_train_batch_size 4 \ + --per_device_eval_batch_size 4 \ --max_seq_len 512 \ - --learning_rate ${LEARNING_RATE} \ - --weight_decay ${WEIGHT_DECAY} \ - --num_train_epochs 16 \ + --learning_rate 5e-5 \ + --weight_decay 0.1 \ + --num_train_epochs 1 \ + --dropout 0.0 \ --gradient_accumulation_steps 1 \ --lr_scheduler_type cosine \ --num_warmup_steps 0 \ @@ -58,8 +38,7 @@ cmd="deepspeed main.py \ --zero_stage $ZERO_STAGE \ --deepspeed \ --output_dir $OUTPUT \ - $OFFLOAD $LORA_DIM $LORA_MODULE_NAME \ - $ONLY_OPTIMIZE_LORA" + $OFFLOAD" echo "----------------------------- DS COMMAND -----------------------------" echo $cmd diff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/run_step1_sweep.sh b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/run_step1_sweep.sh deleted file mode 100644 index 7b6e57823..000000000 --- a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/run_step1_sweep.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -# Copyright (c) Microsoft Corporation. -# SPDX-License-Identifier: Apache-2.0 - -# DeepSpeed Team -for z in {2..3} -do - for offload in true false - do - for lora in true false - do - cmd="bash training_scripts/opt/single_node/sweep/run_single.sh \ - ${z} \ - ${offload} \ - ${lora} \ - z${z}_offload_${offload}_lora_${lora}" - echo "----------------------------- CALLING SHELL SCRIPT -----------------------------" - echo $cmd - $cmd - pkill -9 python - sleep 60 - echo "" - done - done -done diff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/run_step2_sweep.sh b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/run_step2_sweep.sh new file mode 100644 index 000000000..ad9849e38 --- /dev/null +++ b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/sweep/run_step2_sweep.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team +for z in {2..3} +do + for offload in true false + do + cmd="bash training_scripts/opt/single_node/sweep/run_single.sh \ + ${z} \ + ${offload} \ + z${z}_offload_${offload}" + echo "----------------------------- CALLING SHELL SCRIPT -----------------------------" + echo $cmd + $cmd + pkill -9 python + sleep 60 + echo "" + done +done diff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/other_language/run_chinese.sh b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/other_language/run_chinese.sh deleted file mode 100644 index 88d424100..000000000 --- a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/other_language/run_chinese.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash -# Copyright (c) Microsoft Corporation. -# SPDX-License-Identifier: Apache-2.0 - -# DeepSpeed Team -OUTPUT=$1 -ZERO_STAGE=$2 -if [ "$OUTPUT" == "" ]; then - OUTPUT=./output -fi -if [ "$ZERO_STAGE" == "" ]; then - ZERO_STAGE=2 -fi -mkdir -p $OUTPUT - -# The Chinese data we found mostly only contain one response without another -# "rejected" response. Thus we only test the step 1 finetuning and use -# a data_split of 10,0,0 (keep all data for step 1). -deepspeed main.py \ - --data_path wangrui6/Zhihu-KOL Cohere/miracl-zh-queries-22-12 Hello-SimpleAI/HC3-Chinese mkqa-Chinese \ - --data_split 10,0,0 \ - --model_name_or_path bigscience/bloom-1b1 \ - --per_device_train_batch_size 8 \ - --per_device_eval_batch_size 8 \ - --max_seq_len 512 \ - --learning_rate 9.65e-6 \ - --weight_decay 0. \ - --num_train_epochs 16 \ - --gradient_accumulation_steps 1 \ - --lr_scheduler_type cosine \ - --num_warmup_steps 0 \ - --seed 1234 \ - --zero_stage $ZERO_STAGE \ - --deepspeed \ - --output_dir $OUTPUT \ - &> $OUTPUT/training.log diff --git a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/other_language/run_japanese.sh b/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/other_language/run_japanese.sh deleted file mode 100644 index 7b0646244..000000000 --- a/applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/other_language/run_japanese.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash -# Copyright (c) Microsoft Corporation. -# SPDX-License-Identifier: Apache-2.0 - -# DeepSpeed Team -OUTPUT=$1 -ZERO_STAGE=$2 -if [ "$OUTPUT" == "" ]; then - OUTPUT=./output -fi -if [ "$ZERO_STAGE" == "" ]; then - ZERO_STAGE=2 -fi -mkdir -p $OUTPUT - -# The Japanese data we found mostly only contain one response without another -# "rejected" response. Thus we only test the step 1 finetuning and use -# a data_split of 10,0,0 (keep all data for step 1). -deepspeed main.py \ - --data_path mkqa-Japanese Cohere/miracl-ja-queries-22-12 lmqg/qg_jaquad lmqg/qag_jaquad \ - --data_split 10,0,0 \ - --model_name_or_path sberbank-ai/mGPT \ - --per_device_train_batch_size 8 \ - --per_device_eval_batch_size 8 \ - --max_seq_len 512 \ - --learning_rate 9.65e-6 \ - --weight_decay 0. \ - --num_train_epochs 16 \ - --gradient_accumulation_steps 1 \ - --lr_scheduler_type cosine \ - --num_warmup_steps 0 \ - --seed 1234 \ - --zero_stage $ZERO_STAGE \ - --deepspeed \ - --output_dir $OUTPUT \ - &> $OUTPUT/training.log