Dev (#37)

* Read GPT setting from env variable; Change the path of saved parameter. (#25) * read GPT setting from env variable * change setting of parameter path * edit readme; fix bug in RL. * changes in return value (#36) * add data preprocess script; edit readme. (#33) * add data preprocess script; edit readme. * fix scripts; fix unirec server bug; fix data preprocess. * edit readme. --------- Co-authored-by: Luuuk12321 <[email protected]> Co-authored-by: v-yihuang1 <[email protected]>
microsoft · Apr 19, 2024 · 44a6043 · 44a6043
1 parent 5d9ab63
commit 44a6043
Show file tree

Hide file tree

Showing 23 changed files with 524 additions and 157 deletions.
diff --git a/RecLM-eval/evaluates/metrics4rec.py b/RecLM-eval/evaluates/metrics4rec.py
@@ -275,12 +275,12 @@ def evaluate_all(model, predicted_items, groudtruths, topk=10, sim_threshold=0.6
     msg += "\n{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}".format(avg_ndcg, avg_recall, avg_hit, avg_prec, map_, mrr)
     print(msg)
     res = {
-        'ndcg': avg_ndcg,
-        'map': map_,
-        'recall': avg_recall,
-        'precision': avg_prec,
-        'mrr': mrr,
-        'hit': avg_hit,
+        f'ndcg@{topk}': avg_ndcg,
+        f'map@{topk}': map_,
+        f'recall@{topk}': avg_recall,
+        f'precision@{topk}': avg_prec,
+        f'mrr@{topk}': mrr,
+        f'hit@{topk}': avg_hit,
     }
     return msg, res
 
@@ -330,11 +330,11 @@ def evaluate_all_id(predicted_items, groudtruths, topk=10):
     msg += "\n{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}".format(avg_ndcg, avg_recall, avg_hit, avg_prec, map_, mrr)
     print(msg)
     res = {
-        'ndcg': avg_ndcg,
-        'map': map_,
-        'recall': avg_recall,
-        'precision': avg_prec,
-        'mrr': mrr,
-        'hit': avg_hit,
+        f'ndcg@{topk}': avg_ndcg,
+        f'map@{topk}': map_,
+        f'recall@{topk}': avg_recall,
+        f'precision@{topk}': avg_prec,
+        f'mrr@{topk}': mrr,
+        f'hit@{topk}': avg_hit,
     }
     return msg, res
diff --git a/RecLM-gen/README.md b/RecLM-gen/README.md
@@ -8,9 +8,9 @@ Welcome to the repository for  [Aligning Large Language Models for Controllable
 Our implementation leverages the [`transformers`](https://github.com/huggingface/transformers) library by Hugging Face.  
 
 
-## Raw dataset format
+## Intermediate dataset format
 
-To use this repo, you'll need a raw dataset comprising at least three files located in `data_path`: `category.pickle`, `meta.pickle`, and `sequential.pickle`. Additionally, `ranking_candidate.pickle` is required for reranking task tests.  
+To use this repo, you'll need an intermediate dataset comprising at least three files located in `data_path`: `category.pickle`, `meta.pickle`, and `sequential.pickle`. Additionally, `ranking_candidate.pickle` is required for reranking task tests.  
 
 **A volunteer has prepared a copy of data for reproducing the experiments. You can download it from [Google Drive link](https://drive.google.com/file/d/1cfw-KSqEwGF0eB_hm1PUWhUTdloT04Le/view?usp=drive_link). Thanks [Luuuk12321](https://github.com/Luuuk12321)!**
 
@@ -57,6 +57,13 @@ This file contains a dictionary where the keys are user IDs, and the values are
 }
 ```
 
+### Raw dataset preprocess
+We provide the code in `preprocess/data_preprocess_amazon.py` to automatically generate the intermediate dataset with above format from the downloaded raw dataset. 
+
+Firstly, download `Movies_and_TV_5.json.gz` and `meta_Movies_and_TV.json.gz` from [Amazon](https://cseweb.ucsd.edu/~jmcauley/datasets/amazon_v2/), then place them in `data/dataset/sub_movie/` and run the next command.
+```shell
+./scripts/data_preprocess_amazon.sh data/dataset/sub_movie/
+```
 
 ## 1. SASRec Server
 We utilize the [UniRec](https://github.com/microsoft/UniRec) library to implement the SASRec teacher model and deploy as a server.  
@@ -91,7 +98,7 @@ pip install dist/unirec-*.whl
 ### 1.2. SASRec dataset and model
 Model parameters and weights are saved in `unirec/output/`.
 
-The dataset files `train.pkl`, `valid.pkl`, `test.pkl`, `user_history.pkl`, `map.pkl`, and `category.pickle` (as described in the raw dataset format) should be placed in `unirec/data/sub_movie/`. 
+The dataset files `train.pkl`, `valid.pkl`, `test.pkl`, `user_history.pkl`, `map.pkl`, and `category.pickle` (as described in the intermediate dataset format) should be placed in `unirec/data/sub_movie/`. 
 
 Use these files to train the SASRec model with the UniRec library.
 
@@ -181,7 +188,7 @@ To use a static dataset instead, specify the `--train_data_file` and `--val_data
 
 ### 2.4. SFT model merge
 
-Merge the trained models using the script found at [scripts/sft_merge.sh](https://github.com/Luuuk12321/RecLM-gen/blob/main/scripts/sft_merge.sh). The merged model will be saved to `snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/`.  
+Merge the trained models using the script found at [scripts/sft_merge.sh](https://github.com/Luuuk12321/RecLM-gen/blob/main/scripts/sft_merge.sh). The merged model will be saved to `snap/ICR_SubMovie/SFT_Epoch27/`.  
 
 **Note: Use `CUDA_VISIBLE_DEVICES=x` to select a GPU. Do not set the `--gpu` command parameter.**  
 
@@ -245,27 +252,40 @@ Single-GPU training is supported for the RL stage as well. See [scripts/single_g
 
 
 ### 3.4. RL model merge
-Merge the RL-trained models using the script provided at [scripts/rl_merge.sh](https://github.com/Luuuk12321/RecLM-gen/blob/main/scripts/rl_merge.sh). The merged model will be saved in an appropriately named directory within the `snap/` folder, such as `snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/RL_ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/Total_train_LM-True_VM-False_NR-20.1_SN-2_Q-False_T6_FG-True_LR-5e-06_LDO-0.0_WD-0.0_KLC-0.3_EW-0.01_RS-False_RW-True_VFC-0.1_KLT-0.05_LRP-2.0_GAMMA-0.99_GAS-4_LB-1_RA_0.5_/RLHF_Step7000/`
+Merge the RL-trained models using the script provided at [scripts/rl_merge.sh](https://github.com/Luuuk12321/RecLM-gen/blob/main/scripts/rl_merge.sh). The merged model will be saved in an appropriately named directory within the `snap/` folder, such as `snap/ICR_SubMovie/SFT_Epoch27/RL/RLHF_Step3000/`
 
 
 ## 4. Test stage
 
-### 4.1. VLLM deploy
-
-Deploy the VLLM using the following command, making sure to specify the correct model directory:  
+### 4.1. Llama2 deploy and test
+```shell
+CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --port 13579 --model snap/Llama-2-7b-hf-chat/
+./scripts/tasks_test.sh snap/Llama-2-7b-hf-chat/ 13579 sub_movie
+```
 
+### 4.2. SFT model deploy and test
 ```shell
-CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --port 13579 --model snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/
-CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --port 13579 --model snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/RL_ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/Total_train_LM-True_VM-False_NR-20.1_SN-2_Q-False_T6_FG-True_LR-5e-06_LDO-0.0_WD-0.0_KLC-0.3_EW-0.01_RS-False_RW-True_VFC-0.1_KLT-0.05_LRP-2.0_GAMMA-0.99_GAS-4_LB-1_RA_0.5_/RLHF_Step7000/
+CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --port 13579 --model snap/ICR_SubMovie/SFT_Epoch27/
+./scripts/tasks_test.sh snap/ICR_SubMovie/SFT_Epoch27/ 13579 sub_movie
 ```
 
-### 4.2. VLLM test
+### 4.3. RL model deploy and test
+```shell
+CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --port 13579 --model snap/ICR_SubMovie/SFT_Epoch27/RL/RLHF_Step3000/
+./scripts/tasks_test.sh snap/ICR_SubMovie/SFT_Epoch27/RL/RLHF_Step3000/ 13579 sub_movie
+```
 
-Run the test scripts by specifying the path to the model directory and the server port: 
+### 4.4. ChatGPT test
+If you want to test the capability of ChatGPT, you need to firstly set these environment variables.  If it is not Azure OpenAI API (OPENAI_API_TYPE is not "azure"), you only need to specify OPENAI_API_KEY and ENGINE.
 
 ```shell
-./scripts/tasks_test.sh snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/ 13579
-./scripts/tasks_test.sh snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/RL_ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/Total_train_LM-True_VM-False_NR-20.1_SN-2_Q-False_T6_FG-True_LR-5e-06_LDO-0.0_WD-0.0_KLC-0.3_EW-0.01_RS-False_RW-True_VFC-0.1_KLT-0.05_LRP-2.0_GAMMA-0.99_GAS-4_LB-1_RA_0.5_/RL_Step7000/ 13579
+export OPENAI_API_KEY=xxx
+export OPENAI_API_BASE=https://xxx.openai.azure.com/
+export OPENAI_API_VERSION=2023-03-15-preview
+export OPENAI_API_TYPE=azure
+export ENGINE=gpt-3.5-turbo-1106
+
+./scripts/tasks_test.sh gpt-3.5-turbo-1106 0 sub_movie
 ```
 
 

diff --git a/RecLM-gen/base/model.py b/RecLM-gen/base/model.py
@@ -98,8 +98,6 @@ def __init__(self, args, device, actor_lora_scope='actor', critic_lora_scope='cr
                 self.model.lm_head.requires_grad_(True)
 
     def save_parameters(self, name='Epoch00'):
-        if not os.path.isdir(self.args.output):
-            os.makedirs(self.args.output, exist_ok=True)
         params = {}
         if self.args.train_stage in ['SFT', 'RL']:
             params.update(self.actor_named_parameters)
@@ -108,7 +106,7 @@ def save_parameters(self, name='Epoch00'):
         state_dict = {
             'params': params,
         }
-        torch.save(state_dict, os.path.join(self.args.output, f"{name}_{self.args.train_stage}.pth"))
+        torch.save(state_dict, os.path.join(self.args.output_path, f"{name}_{self.args.train_stage}.pth"))
 
     def load_parameters(self, load_file):
         # self.args.load: 'xxx/Epoch{xx}_SFT' or 'xxx/{xx}step_RL'

diff --git a/RecLM-gen/base/trainer.py b/RecLM-gen/base/trainer.py
@@ -1,6 +1,7 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
-
+import json
+import os
 from collections import deque
 import torch.nn.functional as F
 import numpy as np
@@ -19,6 +20,7 @@
 from rl.reward import RewardModel
 from utils.tools import masked_mean, whiten, eval_decorator, shift, log_prob, Memory, sync_dict
 from base.model import BaseModel
+from param import Config
 
 
 # trainer
@@ -30,12 +32,18 @@ def __init__(self, args):
             gradient_accumulation_steps=self.args.gradient_accumulation_steps,
             kwargs_handlers=[DistributedDataParallelKwargs(find_unused_parameters=(self.args.train_stage == 'RL'))]     # need for RL
         )
-        set_seed(self.args.seed)
         # Use CUDA_VISIBLE_DEVICES=x to select gpu, do not set the --gpu command param
-        self.args.gpu = self.accelerator.device
+        self.args.gpu = self.accelerator.device.__str__()
+        if self.accelerator.is_main_process:
+            print(Config(**vars(args)))
+            if not os.path.exists(args.output_path):
+                os.makedirs(args.output_path)
+            if args.train_stage in ['SFT', 'RL']:
+                with open(os.path.join(args.output_path, 'config.json'), 'w') as f:
+                    json.dump(vars(args), f, indent=4)
+
         self.actor_critic = BaseModel(args=self.args, device=self.args.gpu)
         if self.accelerator.is_main_process:
-            print(args)
             self.actor_critic.print_trainable_parameters()
 
         self.warped_actor_critic = None
@@ -370,14 +378,16 @@ def Adapter_merge(self):
         if self.args.train_stage == 'SFT_Merge':
             train_epoch = self.actor_critic.load_parameters(self.args.SFT_load)
             model = self.actor_critic.lora_model.merge_and_unload(progressbar=True)
-            model.save_pretrained(f'{self.args.output}SFT_Epoch{train_epoch:02d}', safe_serialization=True)
-            self.tokenizer.save_pretrained(f'{self.args.output}SFT_Epoch{train_epoch:02d}')
+            save_path = os.path.join(self.args.output_path, f'SFT_Epoch{train_epoch:02d}')
+            model.save_pretrained(save_path, safe_serialization=True)
+            self.tokenizer.save_pretrained(save_path)
         elif self.args.train_stage == 'RL_Merge':
             train_step = self.actor_critic.load_parameters(self.args.RL_load)
             self.actor_critic.lora_model.delete_adapter(self.actor_critic.critic_lora_scope)
             model = self.actor_critic.lora_model.merge_and_unload(progressbar=True)
-            model.save_pretrained(f'{self.args.output}RL_Step{train_step}', safe_serialization=True)
-            self.tokenizer.save_pretrained(f'{self.args.output}RL_Step{train_step}')
+            save_path = os.path.join(self.args.output_path, f'RL_Step{train_step}')
+            model.save_pretrained(save_path, safe_serialization=True)
+            self.tokenizer.save_pretrained(save_path)
         else:
             raise NotImplementedError
 

diff --git a/RecLM-gen/data_process.py b/RecLM-gen/data_process.py
@@ -14,8 +14,6 @@
 
 if __name__ == '__main__':
     args = get_args()
-    kwargs = vars(args)
-    args = Config(**kwargs)
     # Set seeds
     torch.manual_seed(args.seed)
     random.seed(args.seed)

diff --git a/RecLM-gen/main.py b/RecLM-gen/main.py
@@ -1,41 +1,34 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
-
+import json
+import os.path
 import random
+import sys
+
 import numpy as np
 import torch
 import transformers
+from accelerate.utils import set_seed
+
 from base.trainer import BaseTrainer
 from rl.trainer import RLTrainer
 from sft.trainer import SFTTrainer
 from param import Config, get_args
 
 if __name__ == '__main__':
     args = get_args()
-    kwargs = vars(args)
-    args = Config(**kwargs)
+    assert args.train_stage in ['SFT', 'RL', 'SFT_Merge', 'RL_Merge']
+    assert args.output_path is not None
+
     # Set seeds
     torch.manual_seed(args.seed)
     random.seed(args.seed)
     np.random.seed(args.seed)
     transformers.set_seed(args.seed)
-
-    if args.train_stage in ['RL', 'RL_merge']:
-        if args.model_name is None:
-            if args.lr > 0:
-                args.model_name = f'RL_Total_train_LM-{args.lm_head_full_tune}_VM-{args.vague_mapping}_NR-20.1_SN-{args.sample_num}' \
-                                  f'_Q-{args.quantization}_T{len(args.RL_train_tasks.split(","))}' \
-                                  f'_FG-{args.fine_grain_reward}_LR-{args.lr}_LDO-{args.lora_dropout}_WD-{args.weight_decay}' \
-                                  f'_KLC-{args.kl_coef}_EW-{args.entropy_weight}_RS-{args.reward_scale}_RW-{args.whiten_reward}' \
-                                  f'_VFC-{args.vf_coef}_KLT-{args.policy_kl_threshold}_LRP-{args.lr_power}_GAMMA-{args.gamma}' \
-                                  f'_GAS-{args.gradient_accumulation_steps}_LB-{args.learn_batch}_RA_{args.reward_alpha}' \
-                                  f'_{args.model_name_suffix}'
-            else:
-                args.model_name = f'RL_Total_init_LM-{args.lm_head}_VM-{args.vague_mapping}_NR-20.1_SN-{args.sample_num}_Q-{args.quantization}_T{len(args.RL_train_tasks.split(","))}'
-        args.output = f'{args.output}{args.model_name}/'
+    set_seed(args.seed)
 
     # if args.log_to_file:
-    #     log_file = open(args.output+f'{time.strftime("%Y-%m-%d %Hh_%Mm_%Ss", time.localtime())} {args.train_stage}.log', 'w')
+    #     log_file = open(args.output_path+f'{time.strftime("%Y-%m-%d %Hh_%Mm_%Ss", time.localtime())} {args.train_stage}.log', 'w')
     #     sys.stdout = log_file
 
     if args.train_stage == 'SFT':

diff --git a/RecLM-gen/param.py b/RecLM-gen/param.py
@@ -43,8 +43,6 @@ def add_args_RL(parser):
     parser.add_argument("--lr_power", type=float, default=2.0)
     parser.add_argument("--learn_batch", type=int, default=2)
     parser.add_argument("--reward_alpha", type=float, default=0.5)
-    parser.add_argument("--model_name", type=str, default=None, help='auto generated while RL, or custom setting')
-    parser.add_argument("--model_name_suffix", type=str, default="")
     parser.add_argument("--val_save_step", type=int, default=100)
     return parser
 
@@ -70,7 +68,7 @@ def add_args(parse=True, **optional_kwargs):
     parser.add_argument("--val_num_per_task", type=int, default=320, help='the number of valuation samples')
 
     # Checkpoint
-    parser.add_argument('--output', type=str, default='snap/', help='path to save model params file, or to save the merged model.')
+    parser.add_argument('--output_path', type=str, default='snap/', help='path to save model params file, or to save the merged model.')
 
     # Model Config
     parser.add_argument('--backbone', type=str, default='google/flan-t5-xl')