microsoft · Leavingseason · Apr 9, 2024 · Mar 26, 2024 · Apr 8, 2024 · Apr 8, 2024
diff --git a/RecLM-gen/README.md b/RecLM-gen/README.md
@@ -181,7 +181,7 @@ To use a static dataset instead, specify the `--train_data_file` and `--val_data
 
 ### 2.4. SFT model merge
 
-Merge the trained models using the script found at [scripts/sft_merge.sh](https://github.com/Luuuk12321/RecLM-gen/blob/main/scripts/sft_merge.sh). The merged model will be saved to `snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/`.  
+Merge the trained models using the script found at [scripts/sft_merge.sh](https://github.com/Luuuk12321/RecLM-gen/blob/main/scripts/sft_merge.sh). The merged model will be saved to `snap/ICR_SubMovie/SFT_Epoch27/`.  
 
 **Note: Use `CUDA_VISIBLE_DEVICES=x` to select a GPU. Do not set the `--gpu` command parameter.**  
 
@@ -245,27 +245,40 @@ Single-GPU training is supported for the RL stage as well. See [scripts/single_g
 
 
 ### 3.4. RL model merge
-Merge the RL-trained models using the script provided at [scripts/rl_merge.sh](https://github.com/Luuuk12321/RecLM-gen/blob/main/scripts/rl_merge.sh). The merged model will be saved in an appropriately named directory within the `snap/` folder, such as `snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/RL_ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/Total_train_LM-True_VM-False_NR-20.1_SN-2_Q-False_T6_FG-True_LR-5e-06_LDO-0.0_WD-0.0_KLC-0.3_EW-0.01_RS-False_RW-True_VFC-0.1_KLT-0.05_LRP-2.0_GAMMA-0.99_GAS-4_LB-1_RA_0.5_/RLHF_Step7000/`
+Merge the RL-trained models using the script provided at [scripts/rl_merge.sh](https://github.com/Luuuk12321/RecLM-gen/blob/main/scripts/rl_merge.sh). The merged model will be saved in an appropriately named directory within the `snap/` folder, such as `snap/ICR_SubMovie/SFT_Epoch27/RL/RLHF_Step3000/`
 
 
 ## 4. Test stage
 
-### 4.1. VLLM deploy
-
-Deploy the VLLM using the following command, making sure to specify the correct model directory:  
+### 4.1. Llama2 deploy and test
+```shell
+CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --port 13579 --model snap/Llama-2-7b-hf-chat/
+./scripts/tasks_test.sh snap/Llama-2-7b-hf-chat/ 13579 sub_movie
+```
 
+### 4.2. SFT model deploy and test
 ```shell
-CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --port 13579 --model snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/
-CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --port 13579 --model snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/RL_ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/Total_train_LM-True_VM-False_NR-20.1_SN-2_Q-False_T6_FG-True_LR-5e-06_LDO-0.0_WD-0.0_KLC-0.3_EW-0.01_RS-False_RW-True_VFC-0.1_KLT-0.05_LRP-2.0_GAMMA-0.99_GAS-4_LB-1_RA_0.5_/RLHF_Step7000/
+CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --port 13579 --model snap/ICR_SubMovie/SFT_Epoch27/
+./scripts/tasks_test.sh snap/ICR_SubMovie/SFT_Epoch27/ 13579 sub_movie
 ```
 
-### 4.2. VLLM test
+### 4.3. RL model deploy and test
+```shell
+CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --port 13579 --model snap/ICR_SubMovie/SFT_Epoch27/RL/RLHF_Step3000/
+./scripts/tasks_test.sh snap/ICR_SubMovie/SFT_Epoch27/RL/RLHF_Step3000/ 13579 sub_movie
+```
 
-Run the test scripts by specifying the path to the model directory and the server port: 
+### 4.4. ChatGPT test
+If you want to test the capacity of ChatGPT, you need to firstly set these environment variables.  If it is not Azure OpenAI API (OPENAI_API_TYPE is not "azure"), you only need to specify OPENAI_API_KEY and ENGINE.
 
 ```shell
-./scripts/tasks_test.sh snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/ 13579
-./scripts/tasks_test.sh snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/RL_ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/Total_train_LM-True_VM-False_NR-20.1_SN-2_Q-False_T6_FG-True_LR-5e-06_LDO-0.0_WD-0.0_KLC-0.3_EW-0.01_RS-False_RW-True_VFC-0.1_KLT-0.05_LRP-2.0_GAMMA-0.99_GAS-4_LB-1_RA_0.5_/RL_Step7000/ 13579
+export OPENAI_API_KEY=xxx
+export OPENAI_API_BASE=https://xxx.openai.azure.com/
+export OPENAI_API_VERSION=2023-03-15-preview
+export OPENAI_API_TYPE=azure
+export ENGINE=gpt-3.5-turbo-1106
+
+./scripts/tasks_test.sh gpt-3.5-turbo-1106 0 sub_movie
 ```
 
 

diff --git a/RecLM-gen/base/model.py b/RecLM-gen/base/model.py
@@ -98,8 +98,6 @@ def __init__(self, args, device, actor_lora_scope='actor', critic_lora_scope='cr
                 self.model.lm_head.requires_grad_(True)
 
     def save_parameters(self, name='Epoch00'):
-        if not os.path.isdir(self.args.output):
-            os.makedirs(self.args.output, exist_ok=True)
         params = {}
         if self.args.train_stage in ['SFT', 'RL']:
             params.update(self.actor_named_parameters)
@@ -108,7 +106,7 @@ def save_parameters(self, name='Epoch00'):
         state_dict = {
             'params': params,
         }
-        torch.save(state_dict, os.path.join(self.args.output, f"{name}_{self.args.train_stage}.pth"))
+        torch.save(state_dict, os.path.join(self.args.output_path, f"{name}_{self.args.train_stage}.pth"))
 
     def load_parameters(self, load_file):
         # self.args.load: 'xxx/Epoch{xx}_SFT' or 'xxx/{xx}step_RL'

diff --git a/RecLM-gen/base/trainer.py b/RecLM-gen/base/trainer.py
@@ -1,6 +1,7 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
-
+import json
+import os
 from collections import deque
 import torch.nn.functional as F
 import numpy as np
@@ -19,6 +20,7 @@
 from rl.reward import RewardModel
 from utils.tools import masked_mean, whiten, eval_decorator, shift, log_prob, Memory, sync_dict
 from base.model import BaseModel
+from param import Config
 
 
 # trainer
@@ -30,12 +32,18 @@ def __init__(self, args):
             gradient_accumulation_steps=self.args.gradient_accumulation_steps,
             kwargs_handlers=[DistributedDataParallelKwargs(find_unused_parameters=(self.args.train_stage == 'RL'))]     # need for RL
         )
-        set_seed(self.args.seed)
         # Use CUDA_VISIBLE_DEVICES=x to select gpu, do not set the --gpu command param
-        self.args.gpu = self.accelerator.device
+        self.args.gpu = self.accelerator.device.__str__()
+        if self.accelerator.is_main_process:
+            print(Config(**vars(args)))
+            if not os.path.exists(args.output_path):
+                os.makedirs(args.output_path)
+            if args.train_stage in ['SFT', 'RL']:
+                with open(os.path.join(args.output_path, 'config.json'), 'w') as f:
+                    json.dump(vars(args), f, indent=4)
+
         self.actor_critic = BaseModel(args=self.args, device=self.args.gpu)
         if self.accelerator.is_main_process:
-            print(args)
             self.actor_critic.print_trainable_parameters()
 
         self.warped_actor_critic = None
@@ -370,14 +378,16 @@ def Adapter_merge(self):
         if self.args.train_stage == 'SFT_Merge':
             train_epoch = self.actor_critic.load_parameters(self.args.SFT_load)
             model = self.actor_critic.lora_model.merge_and_unload(progressbar=True)
-            model.save_pretrained(f'{self.args.output}SFT_Epoch{train_epoch:02d}', safe_serialization=True)
-            self.tokenizer.save_pretrained(f'{self.args.output}SFT_Epoch{train_epoch:02d}')
+            save_path = os.path.join(self.args.output_path, f'SFT_Epoch{train_epoch:02d}')
+            model.save_pretrained(save_path, safe_serialization=True)
+            self.tokenizer.save_pretrained(save_path)
         elif self.args.train_stage == 'RL_Merge':
             train_step = self.actor_critic.load_parameters(self.args.RL_load)
             self.actor_critic.lora_model.delete_adapter(self.actor_critic.critic_lora_scope)
             model = self.actor_critic.lora_model.merge_and_unload(progressbar=True)
-            model.save_pretrained(f'{self.args.output}RL_Step{train_step}', safe_serialization=True)
-            self.tokenizer.save_pretrained(f'{self.args.output}RL_Step{train_step}')
+            save_path = os.path.join(self.args.output_path, f'RL_Step{train_step}')
+            model.save_pretrained(save_path, safe_serialization=True)
+            self.tokenizer.save_pretrained(save_path)
         else:
             raise NotImplementedError
 

diff --git a/RecLM-gen/main.py b/RecLM-gen/main.py
@@ -1,41 +1,34 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
-
+import json
+import os.path
 import random
+import sys
+
 import numpy as np
 import torch
 import transformers
+from accelerate.utils import set_seed
+
 from base.trainer import BaseTrainer
 from rl.trainer import RLTrainer
 from sft.trainer import SFTTrainer
 from param import Config, get_args
 
 if __name__ == '__main__':
     args = get_args()
-    kwargs = vars(args)
-    args = Config(**kwargs)
+    assert args.train_stage in ['SFT', 'RL', 'SFT_Merge', 'RL_Merge']
+    assert args.output_path is not None
+
     # Set seeds
     torch.manual_seed(args.seed)
     random.seed(args.seed)
     np.random.seed(args.seed)
     transformers.set_seed(args.seed)
-
-    if args.train_stage in ['RL', 'RL_merge']:
-        if args.model_name is None:
-            if args.lr > 0:
-                args.model_name = f'RL_Total_train_LM-{args.lm_head_full_tune}_VM-{args.vague_mapping}_NR-20.1_SN-{args.sample_num}' \
-                                  f'_Q-{args.quantization}_T{len(args.RL_train_tasks.split(","))}' \
-                                  f'_FG-{args.fine_grain_reward}_LR-{args.lr}_LDO-{args.lora_dropout}_WD-{args.weight_decay}' \
-                                  f'_KLC-{args.kl_coef}_EW-{args.entropy_weight}_RS-{args.reward_scale}_RW-{args.whiten_reward}' \
-                                  f'_VFC-{args.vf_coef}_KLT-{args.policy_kl_threshold}_LRP-{args.lr_power}_GAMMA-{args.gamma}' \
-                                  f'_GAS-{args.gradient_accumulation_steps}_LB-{args.learn_batch}_RA_{args.reward_alpha}' \
-                                  f'_{args.model_name_suffix}'
-            else:
-                args.model_name = f'RL_Total_init_LM-{args.lm_head}_VM-{args.vague_mapping}_NR-20.1_SN-{args.sample_num}_Q-{args.quantization}_T{len(args.RL_train_tasks.split(","))}'
-        args.output = f'{args.output}{args.model_name}/'
+    set_seed(args.seed)
 
     # if args.log_to_file:
-    #     log_file = open(args.output+f'{time.strftime("%Y-%m-%d %Hh_%Mm_%Ss", time.localtime())} {args.train_stage}.log', 'w')
+    #     log_file = open(args.output_path+f'{time.strftime("%Y-%m-%d %Hh_%Mm_%Ss", time.localtime())} {args.train_stage}.log', 'w')
     #     sys.stdout = log_file
 
     if args.train_stage == 'SFT':

diff --git a/RecLM-gen/param.py b/RecLM-gen/param.py
@@ -43,8 +43,6 @@ def add_args_RL(parser):
     parser.add_argument("--lr_power", type=float, default=2.0)
     parser.add_argument("--learn_batch", type=int, default=2)
     parser.add_argument("--reward_alpha", type=float, default=0.5)
-    parser.add_argument("--model_name", type=str, default=None, help='auto generated while RL, or custom setting')
-    parser.add_argument("--model_name_suffix", type=str, default="")
     parser.add_argument("--val_save_step", type=int, default=100)
     return parser
 
@@ -70,7 +68,7 @@ def add_args(parse=True, **optional_kwargs):
     parser.add_argument("--val_num_per_task", type=int, default=320, help='the number of valuation samples')
 
     # Checkpoint
-    parser.add_argument('--output', type=str, default='snap/', help='path to save model params file, or to save the merged model.')
+    parser.add_argument('--output_path', type=str, default='snap/', help='path to save model params file, or to save the merged model.')
 
     # Model Config
     parser.add_argument('--backbone', type=str, default='google/flan-t5-xl')

diff --git a/RecLM-gen/rl/reward.py b/RecLM-gen/rl/reward.py
@@ -31,11 +31,9 @@ def __init__(self, args, tokenizer):
             'list': RunningMoments()
         }
 
-    def ranking_score_func(self, idx):
-        if 'NR-9' in self.args.model_name:
-            return 1.0-idx/len(self.metas)      # NR-9
-        else:
-            return 1.0/math.log2(idx+2)         # NR-8
+    @staticmethod
+    def ranking_score_func(idx):
+        return 1.0/math.log2(idx+2)         # NR-8
 
     def reward_calculate(self, task, input_field_data, title_list):
         ranking_score_frac, task_score_frac = self.args.reward_alpha, 1.0-self.args.reward_alpha            # NR-13

diff --git a/RecLM-gen/rl/trainer.py b/RecLM-gen/rl/trainer.py
@@ -23,7 +23,7 @@ def __init__(self, args):
 
         self.writer = None
         if self.accelerator.is_main_process:
-            self.writer = SummaryWriter(log_dir=f'logs/RL_train/{self.args.model_name}', flush_secs=30)
+            self.writer = SummaryWriter(log_dir=os.path.join('logs', self.args.output_path), flush_secs=30)
 
         self.actor_critic.load_parameters(self.args.RL_load)
         self.dataset_prepare()
@@ -236,10 +236,10 @@ def RL_val(self, step: int):
 
     def RL_val_path(self):
         val_steps = {}
-        for params_file in os.listdir(self.args.output):
+        for params_file in os.listdir(self.args.output_path):
             step = re.findall(r'^(\d+)step_RL\.pth', params_file)   # matching the train step from file name
             if len(step) > 0:
-                val_steps[step[0]] = os.path.join(self.args.output, params_file[:-4])
+                val_steps[step[0]] = os.path.join(self.args.output_path, params_file[:-4])
         if self.args.dry:
             val_steps[0] = None
         val_steps = {_: val_steps[_] for _ in sorted(val_steps, key=lambda k: k) if _ >= 0}

diff --git a/RecLM-gen/scripts/rl_merge.sh b/RecLM-gen/scripts/rl_merge.sh
@@ -1,17 +1,21 @@
 #!/bin/bash
 
 
-# --RL_load is the file saved in RL.
+BACKBONE="snap/ICR_SubMovie/SFT_Epoch27/"
+OUTPUT_PATH_SUFFIX="RL/"
+RL_LOAD="3000step_RL"
+
+# --RL_load is the file saved in RL: snap/ICR_SubMovie/SFT_Epoch27/RL/3000step_RL.pth
 # need to keep the setting about model params as same as training, such RL_actor_lora_r, RL_actor_lora_a, RL_critic_lora_r, RL_critic_lora_a and lm_head_full_tune.
-CUDA_VISIBLE_DEVICES=8 python main.py \
-  --output snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/RL_Total_train_LM-True_VM-False_NR-20.1_SN-2_Q-False_T6_FG-True_LR-5e-06_LDO-0.0_WD-0.0_KLC-0.3_EW-0.01_RS-False_RW-True_VFC-0.1_KLT-0.05_LRP-2.0_GAMMA-0.99_GAS-4_LB-1_RA_0.5_/ \
-  --backbone snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch27/ \
+CUDA_VISIBLE_DEVICES=0 python main.py \
+  --output_path ${BACKBONE}${OUTPUT_PATH_SUFFIX} \
+  --backbone $BACKBONE \
   --train_stage RL_Merge \
   --RL_actor_lora_r 4 \
   --RL_actor_lora_a 2 \
   --RL_critic_lora_r 4 \
   --RL_critic_lora_a 2 \
-  --RL_load snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/RL_Total_train_LM-True_VM-False_NR-20.1_SN-2_Q-False_T6_FG-True_LR-5e-06_LDO-0.0_WD-0.0_KLC-0.3_EW-0.01_RS-False_RW-True_VFC-0.1_KLT-0.05_LRP-2.0_GAMMA-0.99_GAS-4_LB-1_RA_0.5_/4800step_RL \
+  --RL_load ${BACKBONE}${OUTPUT_PATH_SUFFIX}${RL_LOAD} \
   --lm_head_full_tune \
   --FA2
 
diff --git a/RecLM-gen/scripts/rl_train.sh b/RecLM-gen/scripts/rl_train.sh
@@ -1,10 +1,14 @@
 #!/bin/bash
 
-CUDA_VISIBLE_DEVICES=4,5 accelerate launch --num_processes 2 --gpu_ids all main.py \
+
+BACKBONE="snap/ICR_SubMovie/SFT_Epoch27/"
+OUTPUT_PATH_SUFFIX="RL/"
+
+CUDA_VISIBLE_DEVICES=0,1 accelerate launch --num_processes 2 --gpu_ids all main.py \
   --seed 0 \
   --data_path data/dataset/sub_movie/ \
-  --output snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/ \
-  --backbone snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch27/ \
+  --output_path ${BACKBONE}${OUTPUT_PATH_SUFFIX} \
+  --backbone ${BACKBONE} \
   --item_index title64_t \
   --batch_size 8 \
   --gradient_accumulation_steps 2 \

diff --git a/RecLM-gen/scripts/sft_merge.sh b/RecLM-gen/scripts/sft_merge.sh
@@ -1,13 +1,17 @@
 #!/bin/bash
 
 
-# --SFT_load is the model parameter file saved in SFT: snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/Epoch37_SFT.pth
+OUTPUT_PATH="snap/ICR_SubMovie/"
+BACKBONE="snap/Llama-2-7b-hf-chat/"
+SFT_LOAD="Epoch27_SFT"
+
+# --SFT_load is the model parameter file saved in SFT: snap/ICR_SubMovie/Epoch27_SFT.pth
 # need to keep the setting about model params as same as training, such as SFT_actor_lora_r and SFT_actor_lora_a.
 # You need to ensure all saved parameters in file perfectly cover the trainable parameters of BaseModel.
-CUDA_VISIBLE_DEVICES=8 python main.py \
-  --backbone snap/Llama-2-7b-hf-chat/ \
+CUDA_VISIBLE_DEVICES=0 python main.py \
+  --backbone $BACKBONE \
   --train_stage SFT_Merge \
   --SFT_actor_lora_r 16 \
   --SFT_actor_lora_a 8 \
-  --output snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/ \
-  --SFT_load snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/Epoch27_SFT
+  --output_path ${OUTPUT_PATH} \
+  --SFT_load ${OUTPUT_PATH}${SFT_LOAD}
diff --git a/RecLM-gen/scripts/sft_train.sh b/RecLM-gen/scripts/sft_train.sh
@@ -1,10 +1,14 @@
 #!/bin/bash
 
-CUDA_VISIBLE_DEVICES=0,1,2,4 accelerate launch --num_processes 4 --gpu_ids all main.py \
+
+OUTPUT_PATH="snap/ICR_SubMovie/"
+BACKBONE="snap/Llama-2-7b-hf-chat/"
+
+CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch --num_processes 4 --gpu_ids all main.py \
   --seed 0 \
   --data_path data/dataset/sub_movie/ \
-  --output snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/ \
-  --backbone snap/Llama-2-7b-hf-chat/ \
+  --output_path ${OUTPUT_PATH} \
+  --backbone ${BACKBONE} \
   --item_index title64_t \
   --batch_size 1 \
   --topk 10 \

diff --git a/RecLM-gen/scripts/single_gpu_rl_train.sh b/RecLM-gen/scripts/single_gpu_rl_train.sh
@@ -1,10 +1,14 @@
 #!/bin/bash
 
+
+BACKBONE="snap/ICR_SubMovie/SFT_Epoch27/"
+OUTPUT_PATH_SUFFIX="RL/"
+
 CUDA_VISIBLE_DEVICES=0 python main.py \
   --seed 0 \
   --data_path data/dataset/sub_movie/ \
-  --output snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/ \
-  --backbone snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/ \
+  --output_path ${BACKBONE}${OUTPUT_PATH_SUFFIX} \
+  --backbone ${BACKBONE} \
   --item_index title64_t \
   --batch_size 8 \
   --gradient_accumulation_steps 4 \
@@ -35,4 +39,5 @@ CUDA_VISIBLE_DEVICES=0 python main.py \
   --whiten_reward \
   --num_episodes 2 \
   --reward_alpha 0.5 \
-  --fine_grain_reward
+  --fine_grain_reward \
+  --teacher_port 12621
diff --git a/RecLM-gen/scripts/single_gpu_sft_train.sh b/RecLM-gen/scripts/single_gpu_sft_train.sh
@@ -1,18 +1,22 @@
 #!/bin/bash
 
+
+OUTPUT_PATH="snap/ICR_SubMovie/"
+BACKBONE="snap/Llama-2-7b-hf-chat/"
+
 CUDA_VISIBLE_DEVICES=0 python main.py \
   --seed 0 \
   --data_path data/dataset/sub_movie/ \
-  --output snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/ \
-  --backbone snap/Llama-2-7b-hf-chat/ \
+  --output_path ${OUTPUT_PATH} \
+  --backbone ${BACKBONE} \
   --item_index title64_t \
   --batch_size 1 \
   --topk 10 \
   --clip_grad_norm 1.0 \
   --epoch 40 \
   --gen_max_length 512 \
   --lr 0.001 \
-  --gradient_accumulation_steps 16 \
+  --gradient_accumulation_steps 64 \
   --train_stage SFT \
   --SFT_actor_lora_r 16 \
   --SFT_actor_lora_a 8 \
@@ -25,4 +29,5 @@ CUDA_VISIBLE_DEVICES=0 python main.py \
   --share_chat_gpt_ratio 0.5 \
   --FA2 \
   --llama2_chat_template \
-  --idx
+  --idx \
+  --teacher_port 12621