diff --git a/RecLM-eval/evaluates/metrics4rec.py b/RecLM-eval/evaluates/metrics4rec.py
index 67a5a63..8e6195f 100644
--- a/RecLM-eval/evaluates/metrics4rec.py
+++ b/RecLM-eval/evaluates/metrics4rec.py
@@ -275,12 +275,12 @@ def evaluate_all(model, predicted_items, groudtruths, topk=10, sim_threshold=0.6
     msg += "\n{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}".format(avg_ndcg, avg_recall, avg_hit, avg_prec, map_, mrr)
     print(msg)
     res = {
-        'ndcg': avg_ndcg,
-        'map': map_,
-        'recall': avg_recall,
-        'precision': avg_prec,
-        'mrr': mrr,
-        'hit': avg_hit,
+        f'ndcg@{topk}': avg_ndcg,
+        f'map@{topk}': map_,
+        f'recall@{topk}': avg_recall,
+        f'precision@{topk}': avg_prec,
+        f'mrr@{topk}': mrr,
+        f'hit@{topk}': avg_hit,
     }
     return msg, res
 
@@ -330,11 +330,11 @@ def evaluate_all_id(predicted_items, groudtruths, topk=10):
     msg += "\n{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}".format(avg_ndcg, avg_recall, avg_hit, avg_prec, map_, mrr)
     print(msg)
     res = {
-        'ndcg': avg_ndcg,
-        'map': map_,
-        'recall': avg_recall,
-        'precision': avg_prec,
-        'mrr': mrr,
-        'hit': avg_hit,
+        f'ndcg@{topk}': avg_ndcg,
+        f'map@{topk}': map_,
+        f'recall@{topk}': avg_recall,
+        f'precision@{topk}': avg_prec,
+        f'mrr@{topk}': mrr,
+        f'hit@{topk}': avg_hit,
     }
     return msg, res
\ No newline at end of file
diff --git a/RecLM-gen/README.md b/RecLM-gen/README.md
index 20d32b1..2278948 100644
--- a/RecLM-gen/README.md
+++ b/RecLM-gen/README.md
@@ -8,9 +8,9 @@ Welcome to the repository for  [Aligning Large Language Models for Controllable
 Our implementation leverages the [`transformers`](https://github.com/huggingface/transformers) library by Hugging Face.  
    
 
-## Raw dataset format
+## Intermediate dataset format
 
-To use this repo, you'll need a raw dataset comprising at least three files located in `data_path`: `category.pickle`, `meta.pickle`, and `sequential.pickle`. Additionally, `ranking_candidate.pickle` is required for reranking task tests.  
+To use this repo, you'll need an intermediate dataset comprising at least three files located in `data_path`: `category.pickle`, `meta.pickle`, and `sequential.pickle`. Additionally, `ranking_candidate.pickle` is required for reranking task tests.  
 
 **A volunteer has prepared a copy of data for reproducing the experiments. You can download it from [Google Drive link](https://drive.google.com/file/d/1cfw-KSqEwGF0eB_hm1PUWhUTdloT04Le/view?usp=drive_link). Thanks [Luuuk12321](https://github.com/Luuuk12321)!**
 
@@ -57,6 +57,13 @@ This file contains a dictionary where the keys are user IDs, and the values are
 }
 ```
 
+### Raw dataset preprocess
+We provide the code in `preprocess/data_preprocess_amazon.py` to automatically generate the intermediate dataset with above format from the downloaded raw dataset. 
+
+Firstly, download `Movies_and_TV_5.json.gz` and `meta_Movies_and_TV.json.gz` from [Amazon](https://cseweb.ucsd.edu/~jmcauley/datasets/amazon_v2/), then place them in `data/dataset/sub_movie/` and run the next command.
+```shell
+./scripts/data_preprocess_amazon.sh data/dataset/sub_movie/
+```
 
 ## 1. SASRec Server
 We utilize the [UniRec](https://github.com/microsoft/UniRec) library to implement the SASRec teacher model and deploy as a server.  
@@ -91,7 +98,7 @@ pip install dist/unirec-*.whl
 ### 1.2. SASRec dataset and model
 Model parameters and weights are saved in `unirec/output/`.
 
-The dataset files `train.pkl`, `valid.pkl`, `test.pkl`, `user_history.pkl`, `map.pkl`, and `category.pickle` (as described in the raw dataset format) should be placed in `unirec/data/sub_movie/`. 
+The dataset files `train.pkl`, `valid.pkl`, `test.pkl`, `user_history.pkl`, `map.pkl`, and `category.pickle` (as described in the intermediate dataset format) should be placed in `unirec/data/sub_movie/`. 
 
 Use these files to train the SASRec model with the UniRec library.
 
@@ -181,7 +188,7 @@ To use a static dataset instead, specify the `--train_data_file` and `--val_data
 
 ### 2.4. SFT model merge
 
-Merge the trained models using the script found at [scripts/sft_merge.sh](https://github.com/Luuuk12321/RecLM-gen/blob/main/scripts/sft_merge.sh). The merged model will be saved to `snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/`.  
+Merge the trained models using the script found at [scripts/sft_merge.sh](https://github.com/Luuuk12321/RecLM-gen/blob/main/scripts/sft_merge.sh). The merged model will be saved to `snap/ICR_SubMovie/SFT_Epoch27/`.  
    
 **Note: Use `CUDA_VISIBLE_DEVICES=x` to select a GPU. Do not set the `--gpu` command parameter.**  
 
@@ -245,27 +252,40 @@ Single-GPU training is supported for the RL stage as well. See [scripts/single_g
 
 
 ### 3.4. RL model merge
-Merge the RL-trained models using the script provided at [scripts/rl_merge.sh](https://github.com/Luuuk12321/RecLM-gen/blob/main/scripts/rl_merge.sh). The merged model will be saved in an appropriately named directory within the `snap/` folder, such as `snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/RL_ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/Total_train_LM-True_VM-False_NR-20.1_SN-2_Q-False_T6_FG-True_LR-5e-06_LDO-0.0_WD-0.0_KLC-0.3_EW-0.01_RS-False_RW-True_VFC-0.1_KLT-0.05_LRP-2.0_GAMMA-0.99_GAS-4_LB-1_RA_0.5_/RLHF_Step7000/`
+Merge the RL-trained models using the script provided at [scripts/rl_merge.sh](https://github.com/Luuuk12321/RecLM-gen/blob/main/scripts/rl_merge.sh). The merged model will be saved in an appropriately named directory within the `snap/` folder, such as `snap/ICR_SubMovie/SFT_Epoch27/RL/RLHF_Step3000/`
 
 
 ## 4. Test stage
 
-### 4.1. VLLM deploy
-
-Deploy the VLLM using the following command, making sure to specify the correct model directory:  
+### 4.1. Llama2 deploy and test
+```shell
+CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --port 13579 --model snap/Llama-2-7b-hf-chat/
+./scripts/tasks_test.sh snap/Llama-2-7b-hf-chat/ 13579 sub_movie
+```
 
+### 4.2. SFT model deploy and test
 ```shell
-CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --port 13579 --model snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/
-CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --port 13579 --model snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/RL_ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/Total_train_LM-True_VM-False_NR-20.1_SN-2_Q-False_T6_FG-True_LR-5e-06_LDO-0.0_WD-0.0_KLC-0.3_EW-0.01_RS-False_RW-True_VFC-0.1_KLT-0.05_LRP-2.0_GAMMA-0.99_GAS-4_LB-1_RA_0.5_/RLHF_Step7000/
+CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --port 13579 --model snap/ICR_SubMovie/SFT_Epoch27/
+./scripts/tasks_test.sh snap/ICR_SubMovie/SFT_Epoch27/ 13579 sub_movie
 ```
 
-### 4.2. VLLM test
+### 4.3. RL model deploy and test
+```shell
+CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --port 13579 --model snap/ICR_SubMovie/SFT_Epoch27/RL/RLHF_Step3000/
+./scripts/tasks_test.sh snap/ICR_SubMovie/SFT_Epoch27/RL/RLHF_Step3000/ 13579 sub_movie
+```
 
-Run the test scripts by specifying the path to the model directory and the server port: 
+### 4.4. ChatGPT test
+If you want to test the capability of ChatGPT, you need to firstly set these environment variables.  If it is not Azure OpenAI API (OPENAI_API_TYPE is not "azure"), you only need to specify OPENAI_API_KEY and ENGINE.
 
 ```shell
-./scripts/tasks_test.sh snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/ 13579
-./scripts/tasks_test.sh snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/RL_ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/Total_train_LM-True_VM-False_NR-20.1_SN-2_Q-False_T6_FG-True_LR-5e-06_LDO-0.0_WD-0.0_KLC-0.3_EW-0.01_RS-False_RW-True_VFC-0.1_KLT-0.05_LRP-2.0_GAMMA-0.99_GAS-4_LB-1_RA_0.5_/RL_Step7000/ 13579
+export OPENAI_API_KEY=xxx
+export OPENAI_API_BASE=https://xxx.openai.azure.com/
+export OPENAI_API_VERSION=2023-03-15-preview
+export OPENAI_API_TYPE=azure
+export ENGINE=gpt-3.5-turbo-1106
+
+./scripts/tasks_test.sh gpt-3.5-turbo-1106 0 sub_movie
 ```
 
 
diff --git a/RecLM-gen/base/model.py b/RecLM-gen/base/model.py
index 3b1e51e..3119df5 100644
--- a/RecLM-gen/base/model.py
+++ b/RecLM-gen/base/model.py
@@ -98,8 +98,6 @@ def __init__(self, args, device, actor_lora_scope='actor', critic_lora_scope='cr
                 self.model.lm_head.requires_grad_(True)
 
     def save_parameters(self, name='Epoch00'):
-        if not os.path.isdir(self.args.output):
-            os.makedirs(self.args.output, exist_ok=True)
         params = {}
         if self.args.train_stage in ['SFT', 'RL']:
             params.update(self.actor_named_parameters)
@@ -108,7 +106,7 @@ def save_parameters(self, name='Epoch00'):
         state_dict = {
             'params': params,
         }
-        torch.save(state_dict, os.path.join(self.args.output, f"{name}_{self.args.train_stage}.pth"))
+        torch.save(state_dict, os.path.join(self.args.output_path, f"{name}_{self.args.train_stage}.pth"))
 
     def load_parameters(self, load_file):
         # self.args.load: 'xxx/Epoch{xx}_SFT' or 'xxx/{xx}step_RL'
diff --git a/RecLM-gen/base/trainer.py b/RecLM-gen/base/trainer.py
index 9b47b97..6576aab 100644
--- a/RecLM-gen/base/trainer.py
+++ b/RecLM-gen/base/trainer.py
@@ -1,6 +1,7 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
-
+import json
+import os
 from collections import deque
 import torch.nn.functional as F
 import numpy as np
@@ -19,6 +20,7 @@
 from rl.reward import RewardModel
 from utils.tools import masked_mean, whiten, eval_decorator, shift, log_prob, Memory, sync_dict
 from base.model import BaseModel
+from param import Config
 
 
 # trainer
@@ -30,12 +32,18 @@ def __init__(self, args):
             gradient_accumulation_steps=self.args.gradient_accumulation_steps,
             kwargs_handlers=[DistributedDataParallelKwargs(find_unused_parameters=(self.args.train_stage == 'RL'))]     # need for RL
         )
-        set_seed(self.args.seed)
         # Use CUDA_VISIBLE_DEVICES=x to select gpu, do not set the --gpu command param
-        self.args.gpu = self.accelerator.device
+        self.args.gpu = self.accelerator.device.__str__()
+        if self.accelerator.is_main_process:
+            print(Config(**vars(args)))
+            if not os.path.exists(args.output_path):
+                os.makedirs(args.output_path)
+            if args.train_stage in ['SFT', 'RL']:
+                with open(os.path.join(args.output_path, 'config.json'), 'w') as f:
+                    json.dump(vars(args), f, indent=4)
+
         self.actor_critic = BaseModel(args=self.args, device=self.args.gpu)
         if self.accelerator.is_main_process:
-            print(args)
             self.actor_critic.print_trainable_parameters()
 
         self.warped_actor_critic = None
@@ -370,14 +378,16 @@ def Adapter_merge(self):
         if self.args.train_stage == 'SFT_Merge':
             train_epoch = self.actor_critic.load_parameters(self.args.SFT_load)
             model = self.actor_critic.lora_model.merge_and_unload(progressbar=True)
-            model.save_pretrained(f'{self.args.output}SFT_Epoch{train_epoch:02d}', safe_serialization=True)
-            self.tokenizer.save_pretrained(f'{self.args.output}SFT_Epoch{train_epoch:02d}')
+            save_path = os.path.join(self.args.output_path, f'SFT_Epoch{train_epoch:02d}')
+            model.save_pretrained(save_path, safe_serialization=True)
+            self.tokenizer.save_pretrained(save_path)
         elif self.args.train_stage == 'RL_Merge':
             train_step = self.actor_critic.load_parameters(self.args.RL_load)
             self.actor_critic.lora_model.delete_adapter(self.actor_critic.critic_lora_scope)
             model = self.actor_critic.lora_model.merge_and_unload(progressbar=True)
-            model.save_pretrained(f'{self.args.output}RL_Step{train_step}', safe_serialization=True)
-            self.tokenizer.save_pretrained(f'{self.args.output}RL_Step{train_step}')
+            save_path = os.path.join(self.args.output_path, f'RL_Step{train_step}')
+            model.save_pretrained(save_path, safe_serialization=True)
+            self.tokenizer.save_pretrained(save_path)
         else:
             raise NotImplementedError
 
diff --git a/RecLM-gen/data_process.py b/RecLM-gen/data_process.py
index 9abe75f..8ca9982 100644
--- a/RecLM-gen/data_process.py
+++ b/RecLM-gen/data_process.py
@@ -14,8 +14,6 @@
 
 if __name__ == '__main__':
     args = get_args()
-    kwargs = vars(args)
-    args = Config(**kwargs)
     # Set seeds
     torch.manual_seed(args.seed)
     random.seed(args.seed)
diff --git a/RecLM-gen/main.py b/RecLM-gen/main.py
index 78b4229..54a7d94 100644
--- a/RecLM-gen/main.py
+++ b/RecLM-gen/main.py
@@ -1,10 +1,15 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
-
+import json
+import os.path
 import random
+import sys
+
 import numpy as np
 import torch
 import transformers
+from accelerate.utils import set_seed
+
 from base.trainer import BaseTrainer
 from rl.trainer import RLTrainer
 from sft.trainer import SFTTrainer
@@ -12,30 +17,18 @@
 
 if __name__ == '__main__':
     args = get_args()
-    kwargs = vars(args)
-    args = Config(**kwargs)
+    assert args.train_stage in ['SFT', 'RL', 'SFT_Merge', 'RL_Merge']
+    assert args.output_path is not None
+
     # Set seeds
     torch.manual_seed(args.seed)
     random.seed(args.seed)
     np.random.seed(args.seed)
     transformers.set_seed(args.seed)
-
-    if args.train_stage in ['RL', 'RL_merge']:
-        if args.model_name is None:
-            if args.lr > 0:
-                args.model_name = f'RL_Total_train_LM-{args.lm_head_full_tune}_VM-{args.vague_mapping}_NR-20.1_SN-{args.sample_num}' \
-                                  f'_Q-{args.quantization}_T{len(args.RL_train_tasks.split(","))}' \
-                                  f'_FG-{args.fine_grain_reward}_LR-{args.lr}_LDO-{args.lora_dropout}_WD-{args.weight_decay}' \
-                                  f'_KLC-{args.kl_coef}_EW-{args.entropy_weight}_RS-{args.reward_scale}_RW-{args.whiten_reward}' \
-                                  f'_VFC-{args.vf_coef}_KLT-{args.policy_kl_threshold}_LRP-{args.lr_power}_GAMMA-{args.gamma}' \
-                                  f'_GAS-{args.gradient_accumulation_steps}_LB-{args.learn_batch}_RA_{args.reward_alpha}' \
-                                  f'_{args.model_name_suffix}'
-            else:
-                args.model_name = f'RL_Total_init_LM-{args.lm_head}_VM-{args.vague_mapping}_NR-20.1_SN-{args.sample_num}_Q-{args.quantization}_T{len(args.RL_train_tasks.split(","))}'
-        args.output = f'{args.output}{args.model_name}/'
+    set_seed(args.seed)
 
     # if args.log_to_file:
-    #     log_file = open(args.output+f'{time.strftime("%Y-%m-%d %Hh_%Mm_%Ss", time.localtime())} {args.train_stage}.log', 'w')
+    #     log_file = open(args.output_path+f'{time.strftime("%Y-%m-%d %Hh_%Mm_%Ss", time.localtime())} {args.train_stage}.log', 'w')
     #     sys.stdout = log_file
 
     if args.train_stage == 'SFT':
diff --git a/RecLM-gen/param.py b/RecLM-gen/param.py
index 096c2c6..6d3f1ce 100644
--- a/RecLM-gen/param.py
+++ b/RecLM-gen/param.py
@@ -43,8 +43,6 @@ def add_args_RL(parser):
     parser.add_argument("--lr_power", type=float, default=2.0)
     parser.add_argument("--learn_batch", type=int, default=2)
     parser.add_argument("--reward_alpha", type=float, default=0.5)
-    parser.add_argument("--model_name", type=str, default=None, help='auto generated while RL, or custom setting')
-    parser.add_argument("--model_name_suffix", type=str, default="")
     parser.add_argument("--val_save_step", type=int, default=100)
     return parser
 
@@ -70,7 +68,7 @@ def add_args(parse=True, **optional_kwargs):
     parser.add_argument("--val_num_per_task", type=int, default=320, help='the number of valuation samples')
 
     # Checkpoint
-    parser.add_argument('--output', type=str, default='snap/', help='path to save model params file, or to save the merged model.')
+    parser.add_argument('--output_path', type=str, default='snap/', help='path to save model params file, or to save the merged model.')
 
     # Model Config
     parser.add_argument('--backbone', type=str, default='google/flan-t5-xl')
diff --git a/RecLM-gen/preprocess/data_preprocess_amazon.py b/RecLM-gen/preprocess/data_preprocess_amazon.py
new file mode 100644
index 0000000..f8d9ffe
--- /dev/null
+++ b/RecLM-gen/preprocess/data_preprocess_amazon.py
@@ -0,0 +1,321 @@
+"""
+The following code is modified from
+https://github.com/aHuiWang/CIKM2020-S3Rec/blob/master/data/data_process.py
+"""
+
+import os
+import re
+import json
+import gzip
+import torch
+import pickle
+import random
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+from collections import defaultdict
+import copy
+import argparse
+
+'''
+Set seeds
+'''
+seed = 2022
+random.seed(seed)
+np.random.seed(seed)
+torch.manual_seed(seed)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="data process")
+    parser.add_argument("--full_data_name", type=str, help="")
+    parser.add_argument("--meta_file", type=str, help="")
+    parser.add_argument("--review_file", type=str, help="")
+    parser.add_argument("--data_path", type=str, help="")
+    parser.add_argument("--unirec_data_path", type=str, help="")
+    return parser.parse_args()
+
+
+def save_pickle(data, filename):
+    with open(filename, "wb") as f:
+        pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
+
+
+def Amazon(rating_score=-1.0, args=None):
+    '''
+    return (user, item, timestamp) sort in get_interaction
+
+    reviewerID - ID of the reviewer, e.g. A2SUAM1J3GNN3B
+    asin - ID of the product, e.g. 0000013714
+    reviewerName - name of the reviewer
+    helpful - helpfulness rating of the review, e.g. 2/3
+    reviewText - text of the review
+    overall - rating of the product
+    summary - summary of the review
+    unixReviewTime - time of the review (unix time)
+    reviewTime - time of the review (raw)
+    '''
+    items_with_title = {}
+    with gzip.open(args.meta_file, "r") as fr:
+        for line in tqdm(fr, desc="load meta data"):
+            line = json.loads(line)
+            if "title" not in line or len(line['title']) == 0 or 'category' not in line or len(line['category']) == 0:
+                continue
+            items_with_title[line['asin']] = 1
+    print(f"items with title and category: {add_comma(len(items_with_title))}")  # title: 181781
+    datas = []
+    data_dict = {}
+    with gzip.open(args.review_file, "r") as fr:
+        for line in tqdm(fr, desc="load all interactions"):
+            # try:
+            line = json.loads(line)
+            user = line['reviewerID']
+            item = line['asin']
+            if float(line['overall']) <= rating_score or item not in items_with_title:  # remove low rating
+                continue
+            if (user, item) in data_dict:
+                continue
+            time = line['unixReviewTime']
+            data_dict[(user, item)] = int(time)  # merge duplicate interactions, keep the first record
+            datas.append((user, item, int(time)))
+    print(f"total interactions: {add_comma(len(datas))}")
+    return datas
+
+
+def Amazon_meta(datamaps, args):
+    '''
+    asin - ID of the product, e.g. 0000031852
+    title - name of the product  --"title": "Girls Ballet Tutu Zebra Hot Pink",
+    description
+    price - price in US dollars (at time of crawl) --"price": 3.17,
+    imUrl - url of the product image (str) --"imUrl": "http://ecx.images-amazon.com/images/I/51fAmVkTbyL._SY300_.jpg",
+    related - related products (also bought, also viewed, bought together, buy after viewing)
+    salesRank - sales rank information --"salesRank": {"Toys & Games": 211836}
+    brand - brand name --"brand": "Coxlures",
+    categories - list of categories the product belongs to --"categories": [["Sports & Outdoors", "Other Sports", "Dance"]]
+    '''
+    meta_datas = {}
+    item_ids = set(datamaps['item2id'].keys())
+    with gzip.open(args.meta_file, "r") as fr:
+        for line in tqdm(fr, desc="load meta data"):
+            line = json.loads(line)
+            if line['asin'] not in item_ids:
+                continue
+            # if "title" in line:
+            line['title'] = re.sub(r'\n\t', ' ', line['title']).encode('UTF-8', 'ignore').decode('UTF-8')
+            # line['title'] = line['title'].split(",")[0]
+            if "description" in line:
+                if type(line['description']) == str:
+                    line['description'] = re.sub(r'\n\t', ' ', line['description']).encode('UTF-8', 'ignore').decode('UTF-8')
+                elif type(line['description']) == list:
+                    descs = []
+                    for desc in line['description']:
+                        desc = re.sub(r'\n\t', ' ', desc).encode('UTF-8', 'ignore').decode('UTF-8')
+                        descs.append(desc)
+                    line['description'] = descs
+            if 'related' in line:
+                del line['related']
+            if 'imUrl' in line:
+                del line['imUrl']
+            if 'imageURL' in line:
+                del line['imageURL']
+            if 'imageURLHighRes' in line:
+                del line['imageURLHighRes']
+            meta_datas[line['asin']] = line
+    return meta_datas
+
+
+def Amazon_category(meta_infos):
+    category_datas = {}
+    for item in meta_infos:
+        categories = []
+        for c in meta_infos[item]['category'][1:]:  # filter the subset category, such as 'Movies $ TV'.
+            categories.append(c)
+            temp = ', '.join(categories)
+            if temp not in category_datas:
+                category_datas[temp] = []
+            category_datas[temp].append(item)
+
+    return category_datas
+
+
+def add_comma(num):  # 1000000 -> 1,000,000
+    str_num = str(num)
+    res_num = ''
+    for i in range(len(str_num)):
+        res_num += str_num[i]
+        if (len(str_num) - i - 1) % 3 == 0:
+            res_num += ','
+    return res_num[:-1]
+
+
+def get_interaction(datas):
+    # get user interaction sequence for sequential recommendation
+    user_seq = {}
+    for data in datas:
+        user, item, time = data
+        if user in user_seq:
+            user_seq[user].append((item, time))
+        else:
+            user_seq[user] = []
+            user_seq[user].append((item, time))
+
+    for user, item_time in user_seq.items():
+        item_time.sort(key=lambda x: x[1])
+        items = []
+        for t in item_time:
+            items.append(t[0])
+        user_seq[user] = items
+    return user_seq
+
+
+def check_Kcore(user_items, user_core, item_core):
+    # K-core user_core item_core, return False if any user/item < core
+    user_count = defaultdict(int)
+    item_count = defaultdict(int)
+    for user, items in user_items.items():
+        user_count[user] += 0
+        for item in items:
+            user_count[user] += 1
+            item_count[item] += 1
+
+    for user, num in user_count.items():
+        if num < user_core:
+            return user_count, item_count, False
+    for item, num in item_count.items():
+        if num < item_core:
+            return user_count, item_count, False
+    return user_count, item_count, True  # Already guaranteed Kcore
+
+
+def filter_Kcore(user_items, user_core, item_core):
+    # Loop filter K-core, filter out users and items that do not meet K-core
+    user_count, item_count, isKcore = check_Kcore(user_items, user_core, item_core)
+    while not isKcore:
+        cur_user_items = copy.deepcopy(user_items)
+        for user, num in user_count.items():
+            if user_count[user] < user_core:  # Delete the user
+                cur_user_items.pop(user)
+            else:
+                for item in user_items[user]:
+                    if item_count[item] < item_core:
+                        cur_user_items[user].remove(item)
+        user_items = cur_user_items
+        user_count, item_count, isKcore = check_Kcore(user_items, user_core, item_core)
+    total_interactions = 0
+    for user, items in user_items.items():
+        total_interactions += len(items)
+    print("interactions: {0} after k-core filter".format(add_comma(total_interactions)))
+    return user_items
+
+
+def id_map(user_items):  # user_items dict
+    user2id = {'[PAD]': 0}  # raw 2 uid
+    item2id = {'[PAD]': 0}  # raw 2 iid
+    id2user = ['[PAD]']  # uid 2 raw
+    id2item = ['[PAD]']  # iid 2 raw
+    user_id = 1  # start from 1
+    item_id = 1
+    random_user_list = list(user_items.keys())
+    # random.shuffle(random_user_list)  # user is shuffled and re-encoded
+    for user in random_user_list:
+        items = user_items[user]
+        if user not in user2id:
+            user2id[user] = user_id
+            id2user.append(user)
+            user_id += 1
+        for item in items:
+            if item not in item2id:
+                item2id[item] = item_id
+                id2item.append(item)
+                item_id += 1
+    data_maps = {
+        'user2id': user2id,
+        'item2id': item2id,
+        'id2user': id2user,
+        'id2item': id2item
+    }
+    # return: final_data: {uid: [iid1, iid2, ...], ...}, user_num, item_num, data_maps
+    return user_id - 1, item_id - 1, data_maps
+
+
+def main_process(data_name, args, data_type='Amazon'):
+    assert data_type in {'Amazon', 'Yelp', 'Steam'}
+    rating_score = -0.1  # rating score smaller than this score would be deleted
+    # user 25-core item 10-core
+    user_core = 25
+    item_core = 10
+    attribute_core = 0
+
+    datas = Amazon(rating_score, args)  # list of [user, item, timestamp]
+
+    user_items = get_interaction(datas)  # dict of {user: interaction list sorted by time}
+    print(f'{data_name} Raw data has been processed! Lower than {rating_score} are deleted!')
+    print(f'User Num: {len(user_items)}')
+    # raw_id user: [item1, item2, item3...]
+
+    user_items = filter_Kcore(user_items, user_core=user_core, item_core=item_core)
+    print(f'User {user_core}-core complete! Item {item_core}-core complete!')
+
+    user_num, item_num, datamaps = id_map(user_items)  # get mapping dicts
+    user_count, item_count, isKcore = check_Kcore(user_items, user_core=user_core, item_core=item_core)
+    assert isKcore is True
+    user_count_list = list(user_count.values())  # user click count
+    user_avg, user_min, user_max = np.mean(user_count_list), np.min(user_count_list), np.max(user_count_list)
+    item_count_list = list(item_count.values())  # item click count
+    item_avg, item_min, item_max = np.mean(item_count_list), np.min(item_count_list), np.max(item_count_list)
+    interact_num = np.sum([x for x in user_count_list])
+    sparsity = (1 - interact_num / (user_num * item_num)) * 100
+    show_info = f'Total User: {user_num}, Avg User: {user_avg:.4f}, Min Len: {user_min}, Max Len: {user_max}\n' + \
+                f'Total Item: {item_num}, Avg Item: {item_avg:.4f}, Min Inter: {item_min}, Max Inter: {item_max}\n' + \
+                f'Interaction Num: {interact_num}, Sparsity: {sparsity:.2f}%'
+    print(show_info)
+
+    print('Begin extracting meta infos...')
+
+    meta_infos = Amazon_meta(datamaps, args)
+
+    print(f'{data_name} & {add_comma(user_num)} & {add_comma(item_num)} & {user_avg:.1f}'
+          f'& {item_avg:.1f} & {add_comma(interact_num)} & {sparsity:.2f}%')
+
+    category_infos = Amazon_category(meta_infos)
+
+    # -------------- Save Data ---------------
+    if not os.path.exists(args.data_path):
+        os.makedirs(args.data_path)
+    save_pickle(user_items, os.path.join(args.data_path, 'sequential.pickle'))
+    save_pickle(meta_infos, os.path.join(args.data_path, 'meta.pickle'))
+    save_pickle(category_infos, os.path.join(args.data_path, 'category.pickle'))
+
+    train_data = {'user_id': [], 'item_id': []}
+    valid_data = {'user_id': [], 'item_id': []}
+    test_data = {'user_id': [], 'item_id': []}
+    user_history = {'user_id': [], 'item_seq': []}
+    for u in user_items:
+        user_history['user_id'].append(datamaps['user2id'][u])
+        user_history['item_seq'].append(np.array([datamaps['item2id'][_] for _ in user_items[u][:-1]], dtype=np.int32))
+        for i in user_items[u][:-2]:
+            train_data['user_id'].append(datamaps['user2id'][u])
+            train_data['item_id'].append(datamaps['item2id'][i])
+
+        valid_item, test_item = user_items[u][-2], user_items[u][-1]
+        valid_data['user_id'].append(datamaps['user2id'][u])
+        valid_data['item_id'].append(datamaps['item2id'][valid_item])
+        test_data['user_id'].append(datamaps['user2id'][u])
+        test_data['item_id'].append(datamaps['item2id'][test_item])
+
+    if not os.path.exists(args.unirec_data_path):
+        os.makedirs(args.unirec_data_path)
+    save_pickle(pd.DataFrame(train_data), os.path.join(args.unirec_data_path, 'train.pkl'))
+    save_pickle(pd.DataFrame(valid_data), os.path.join(args.unirec_data_path, 'valid.pkl'))
+    save_pickle(pd.DataFrame(test_data), os.path.join(args.unirec_data_path, 'test.pkl'))
+    save_pickle(pd.DataFrame(user_history), os.path.join(args.unirec_data_path, 'user_history.pkl'))
+    save_pickle(datamaps, os.path.join(args.unirec_data_path, 'map.pkl'))
+    save_pickle(category_infos, os.path.join(args.unirec_data_path, 'category.pickle'))
+
+    print('Done!!!')
+
+
+if __name__ == '__main__':
+    _args = parse_args()
+    main_process(_args.full_data_name, args=_args, data_type='Amazon')
diff --git a/RecLM-gen/rl/reward.py b/RecLM-gen/rl/reward.py
index eb34807..5635ffd 100644
--- a/RecLM-gen/rl/reward.py
+++ b/RecLM-gen/rl/reward.py
@@ -31,11 +31,9 @@ def __init__(self, args, tokenizer):
             'list': RunningMoments()
         }
 
-    def ranking_score_func(self, idx):
-        if 'NR-9' in self.args.model_name:
-            return 1.0-idx/len(self.metas)      # NR-9
-        else:
-            return 1.0/math.log2(idx+2)         # NR-8
+    @staticmethod
+    def ranking_score_func(idx):
+        return 1.0/math.log2(idx+2)         # NR-8
 
     def reward_calculate(self, task, input_field_data, title_list):
         ranking_score_frac, task_score_frac = self.args.reward_alpha, 1.0-self.args.reward_alpha            # NR-13
diff --git a/RecLM-gen/rl/trainer.py b/RecLM-gen/rl/trainer.py
index 02653e3..f688164 100644
--- a/RecLM-gen/rl/trainer.py
+++ b/RecLM-gen/rl/trainer.py
@@ -23,7 +23,7 @@ def __init__(self, args):
 
         self.writer = None
         if self.accelerator.is_main_process:
-            self.writer = SummaryWriter(log_dir=f'logs/RL_train/{self.args.model_name}', flush_secs=30)
+            self.writer = SummaryWriter(log_dir=os.path.join('logs', self.args.output_path), flush_secs=30)
 
         self.actor_critic.load_parameters(self.args.RL_load)
         self.dataset_prepare()
@@ -236,10 +236,10 @@ def RL_val(self, step: int):
 
     def RL_val_path(self):
         val_steps = {}
-        for params_file in os.listdir(self.args.output):
+        for params_file in os.listdir(self.args.output_path):
             step = re.findall(r'^(\d+)step_RL\.pth', params_file)   # matching the train step from file name
             if len(step) > 0:
-                val_steps[step[0]] = os.path.join(self.args.output, params_file[:-4])
+                val_steps[step[0]] = os.path.join(self.args.output_path, params_file[:-4])
         if self.args.dry:
             val_steps[0] = None
         val_steps = {_: val_steps[_] for _ in sorted(val_steps, key=lambda k: k) if _ >= 0}
diff --git a/RecLM-gen/scripts/data_preprocess_amazon.sh b/RecLM-gen/scripts/data_preprocess_amazon.sh
new file mode 100644
index 0000000..0d9d700
--- /dev/null
+++ b/RecLM-gen/scripts/data_preprocess_amazon.sh
@@ -0,0 +1,12 @@
+
+
+DATA_PATH="data/dataset/sub_movie/"
+UNIREC_DATA_PATH="unirec/data/sub_movie/"
+
+python preprocess/data_preprocess_amazon.py \
+  --full_data_name Movies_TV \
+  --meta_file ${DATA_PATH}meta_Movies_and_TV.json.gz \
+  --review_file ${DATA_PATH}Movies_and_TV_5.json.gz \
+  --data_path ${DATA_PATH} \
+  --unirec_data_path ${UNIREC_DATA_PATH} \
+
diff --git a/RecLM-gen/scripts/rl_merge.sh b/RecLM-gen/scripts/rl_merge.sh
index 616f79c..7485c2d 100644
--- a/RecLM-gen/scripts/rl_merge.sh
+++ b/RecLM-gen/scripts/rl_merge.sh
@@ -1,17 +1,21 @@
 #!/bin/bash
 
 
-# --RL_load is the file saved in RL.
+BACKBONE="snap/ICR_SubMovie/SFT_Epoch27/"
+OUTPUT_PATH_SUFFIX="RL/"
+RL_LOAD="3000step_RL"
+
+# --RL_load is the file saved in RL: snap/ICR_SubMovie/SFT_Epoch27/RL/3000step_RL.pth
 # need to keep the setting about model params as same as training, such RL_actor_lora_r, RL_actor_lora_a, RL_critic_lora_r, RL_critic_lora_a and lm_head_full_tune.
-CUDA_VISIBLE_DEVICES=8 python main.py \
-  --output snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/RL_Total_train_LM-True_VM-False_NR-20.1_SN-2_Q-False_T6_FG-True_LR-5e-06_LDO-0.0_WD-0.0_KLC-0.3_EW-0.01_RS-False_RW-True_VFC-0.1_KLT-0.05_LRP-2.0_GAMMA-0.99_GAS-4_LB-1_RA_0.5_/ \
-  --backbone snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch27/ \
+CUDA_VISIBLE_DEVICES=0 python main.py \
+  --output_path ${BACKBONE}${OUTPUT_PATH_SUFFIX} \
+  --backbone $BACKBONE \
   --train_stage RL_Merge \
   --RL_actor_lora_r 4 \
   --RL_actor_lora_a 2 \
   --RL_critic_lora_r 4 \
   --RL_critic_lora_a 2 \
-  --RL_load snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/RL_Total_train_LM-True_VM-False_NR-20.1_SN-2_Q-False_T6_FG-True_LR-5e-06_LDO-0.0_WD-0.0_KLC-0.3_EW-0.01_RS-False_RW-True_VFC-0.1_KLT-0.05_LRP-2.0_GAMMA-0.99_GAS-4_LB-1_RA_0.5_/4800step_RL \
+  --RL_load ${BACKBONE}${OUTPUT_PATH_SUFFIX}${RL_LOAD} \
   --lm_head_full_tune \
   --FA2
 
diff --git a/RecLM-gen/scripts/rl_train.sh b/RecLM-gen/scripts/rl_train.sh
index 550c4c8..83000fe 100644
--- a/RecLM-gen/scripts/rl_train.sh
+++ b/RecLM-gen/scripts/rl_train.sh
@@ -1,10 +1,14 @@
 #!/bin/bash
 
-CUDA_VISIBLE_DEVICES=4,5 accelerate launch --num_processes 2 --gpu_ids all main.py \
+
+BACKBONE="snap/ICR_SubMovie/SFT_Epoch27/"
+OUTPUT_PATH_SUFFIX="RL/"
+
+CUDA_VISIBLE_DEVICES=0,1 accelerate launch --num_processes 2 --gpu_ids all main.py \
   --seed 0 \
   --data_path data/dataset/sub_movie/ \
-  --output snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/ \
-  --backbone snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch27/ \
+  --output_path ${BACKBONE}${OUTPUT_PATH_SUFFIX} \
+  --backbone ${BACKBONE} \
   --item_index title64_t \
   --batch_size 8 \
   --gradient_accumulation_steps 2 \
diff --git a/RecLM-gen/scripts/sft_merge.sh b/RecLM-gen/scripts/sft_merge.sh
index 558cb27..4678d2e 100644
--- a/RecLM-gen/scripts/sft_merge.sh
+++ b/RecLM-gen/scripts/sft_merge.sh
@@ -1,13 +1,17 @@
 #!/bin/bash
 
 
-# --SFT_load is the model parameter file saved in SFT: snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/Epoch37_SFT.pth
+OUTPUT_PATH="snap/ICR_SubMovie/"
+BACKBONE="snap/Llama-2-7b-hf-chat/"
+SFT_LOAD="Epoch27_SFT"
+
+# --SFT_load is the model parameter file saved in SFT: snap/ICR_SubMovie/Epoch27_SFT.pth
 # need to keep the setting about model params as same as training, such as SFT_actor_lora_r and SFT_actor_lora_a.
 # You need to ensure all saved parameters in file perfectly cover the trainable parameters of BaseModel.
-CUDA_VISIBLE_DEVICES=8 python main.py \
-  --backbone snap/Llama-2-7b-hf-chat/ \
+CUDA_VISIBLE_DEVICES=0 python main.py \
+  --backbone $BACKBONE \
   --train_stage SFT_Merge \
   --SFT_actor_lora_r 16 \
   --SFT_actor_lora_a 8 \
-  --output snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/ \
-  --SFT_load snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/Epoch27_SFT
+  --output_path ${OUTPUT_PATH} \
+  --SFT_load ${OUTPUT_PATH}${SFT_LOAD}
diff --git a/RecLM-gen/scripts/sft_train.sh b/RecLM-gen/scripts/sft_train.sh
index f624132..67d2af6 100644
--- a/RecLM-gen/scripts/sft_train.sh
+++ b/RecLM-gen/scripts/sft_train.sh
@@ -1,10 +1,14 @@
 #!/bin/bash
 
-CUDA_VISIBLE_DEVICES=0,1,2,4 accelerate launch --num_processes 4 --gpu_ids all main.py \
+
+OUTPUT_PATH="snap/ICR_SubMovie/"
+BACKBONE="snap/Llama-2-7b-hf-chat/"
+
+CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch --num_processes 4 --gpu_ids all main.py \
   --seed 0 \
   --data_path data/dataset/sub_movie/ \
-  --output snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/ \
-  --backbone snap/Llama-2-7b-hf-chat/ \
+  --output_path ${OUTPUT_PATH} \
+  --backbone ${BACKBONE} \
   --item_index title64_t \
   --batch_size 1 \
   --topk 10 \
diff --git a/RecLM-gen/scripts/single_gpu_rl_train.sh b/RecLM-gen/scripts/single_gpu_rl_train.sh
index 90a382d..e21f8f2 100644
--- a/RecLM-gen/scripts/single_gpu_rl_train.sh
+++ b/RecLM-gen/scripts/single_gpu_rl_train.sh
@@ -1,10 +1,14 @@
 #!/bin/bash
 
+
+BACKBONE="snap/ICR_SubMovie/SFT_Epoch27/"
+OUTPUT_PATH_SUFFIX="RL/"
+
 CUDA_VISIBLE_DEVICES=0 python main.py \
   --seed 0 \
   --data_path data/dataset/sub_movie/ \
-  --output snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/ \
-  --backbone snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/SFT_Epoch37/ \
+  --output_path ${BACKBONE}${OUTPUT_PATH_SUFFIX} \
+  --backbone ${BACKBONE} \
   --item_index title64_t \
   --batch_size 8 \
   --gradient_accumulation_steps 4 \
@@ -35,4 +39,5 @@ CUDA_VISIBLE_DEVICES=0 python main.py \
   --whiten_reward \
   --num_episodes 2 \
   --reward_alpha 0.5 \
-  --fine_grain_reward
\ No newline at end of file
+  --fine_grain_reward \
+  --teacher_port 12621
\ No newline at end of file
diff --git a/RecLM-gen/scripts/single_gpu_sft_train.sh b/RecLM-gen/scripts/single_gpu_sft_train.sh
index c5426c6..57f8e41 100644
--- a/RecLM-gen/scripts/single_gpu_sft_train.sh
+++ b/RecLM-gen/scripts/single_gpu_sft_train.sh
@@ -1,10 +1,14 @@
 #!/bin/bash
 
+
+OUTPUT_PATH="snap/ICR_SubMovie/"
+BACKBONE="snap/Llama-2-7b-hf-chat/"
+
 CUDA_VISIBLE_DEVICES=0 python main.py \
   --seed 0 \
   --data_path data/dataset/sub_movie/ \
-  --output snap/ICR_SubMovie_Title64T_0_Llama7bChat_LCT_E40_CCR2_SCG2-0.5_IDX/ \
-  --backbone snap/Llama-2-7b-hf-chat/ \
+  --output_path ${OUTPUT_PATH} \
+  --backbone ${BACKBONE} \
   --item_index title64_t \
   --batch_size 1 \
   --topk 10 \
@@ -12,7 +16,7 @@ CUDA_VISIBLE_DEVICES=0 python main.py \
   --epoch 40 \
   --gen_max_length 512 \
   --lr 0.001 \
-  --gradient_accumulation_steps 16 \
+  --gradient_accumulation_steps 64 \
   --train_stage SFT \
   --SFT_actor_lora_r 16 \
   --SFT_actor_lora_a 8 \
@@ -25,4 +29,5 @@ CUDA_VISIBLE_DEVICES=0 python main.py \
   --share_chat_gpt_ratio 0.5 \
   --FA2 \
   --llama2_chat_template \
-  --idx
\ No newline at end of file
+  --idx \
+  --teacher_port 12621
\ No newline at end of file
diff --git a/RecLM-gen/scripts/tasks_test.sh b/RecLM-gen/scripts/tasks_test.sh
index f0afd06..4bafea3 100644
--- a/RecLM-gen/scripts/tasks_test.sh
+++ b/RecLM-gen/scripts/tasks_test.sh
@@ -2,16 +2,27 @@
 
 MODEL_NAME=$1
 PORT=$2
+DATASET=$3
 
-CHECK=$(echo "$MODEL_NAME" | grep "Steam")
-if [ "$CHECK" != "" ]; then
+if [ "$DATASET" = "steam" ]; then
   ITEM_INDEX='title'
-  DATASET='steam'
 else
   ITEM_INDEX='title64_t'
-  DATASET='sub_movie'
 fi
 
+GENERAL_LLM=""
+OUTPUT_PATH=${MODEL_NAME}
+if [ "$MODEL_NAME" = "gpt-3.5-turbo-1106" ]; then
+  GENERAL_LLM="--general_llm"
+  OUTPUT_PATH=snap/${MODEL_NAME}/${DATASET}/
+elif [ "$MODEL_NAME" = "snap/Llama-2-7b-hf-chat/" ]; then
+  GENERAL_LLM="--general_llm"
+  OUTPUT_PATH=${MODEL_NAME}${DATASET}/
+else
+  OUTPUT_PATH=${MODEL_NAME}
+fi
+
+
 tasks=(
   "SFTTestSeqRec"
   "SFT+TestPersonalControlRec"
@@ -21,7 +32,7 @@ tasks=(
   "SFTTestPersonalCategoryRateMP_30"
   "SFTTestItemCount"
 )
-for t in "${tasks[@]}";
+for task in "${tasks[@]}";
 do
-  python task_test.py --data_path data/dataset/${DATASET}/ --item_index ${ITEM_INDEX} --SFT_test_task ${t} --model_name ${MODEL_NAME} --llama2_chat_template --idx --topk 10 --vllm_port ${PORT}
+  python task_test.py --data_path data/dataset/${DATASET}/ --item_index ${ITEM_INDEX} --SFT_test_task ${task} --model_name ${MODEL_NAME} --output_path ${OUTPUT_PATH} --llama2_chat_template --idx --topk 10 --vllm_port ${PORT} ${GENERAL_LLM}
 done
diff --git a/RecLM-gen/scripts/unirec_serve.sh b/RecLM-gen/scripts/unirec_serve.sh
index 9439c8c..1a3a7cb 100644
--- a/RecLM-gen/scripts/unirec_serve.sh
+++ b/RecLM-gen/scripts/unirec_serve.sh
@@ -23,7 +23,7 @@ DATA_TYPE='SeqRecDataset'  # BaseDataset SeqRecDataset
 export PYTHONPATH=$PWD
 
 CUDA_VISIBLE_DEVICES=0 python unirec/asyc_server.py \
-    --config_dir="unirec/unirec/config" \
+    --config_dir="unirec/config" \
     --model=$MODEL_NAME \
     --dataloader=$DATA_TYPE \
     --dataset=$DATASET_NAME \
diff --git a/RecLM-gen/sft/trainer.py b/RecLM-gen/sft/trainer.py
index 05f50ba..c317794 100644
--- a/RecLM-gen/sft/trainer.py
+++ b/RecLM-gen/sft/trainer.py
@@ -1,5 +1,6 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
+import os.path
 
 import torch
 from torch.utils.data import DataLoader
@@ -23,8 +24,7 @@ def __init__(self, args):
 
         self.writer = None
         if self.accelerator.is_main_process:
-            name = self.args.output.split('snap/')[-1]
-            self.writer = SummaryWriter(log_dir=f'logs/SFT_train/{self.args.SFT_train_tasks}/{name}', flush_secs=30)
+            self.writer = SummaryWriter(log_dir=os.path.join('logs', self.args.output_path), flush_secs=30)
 
         self.start_epoch = self.actor_critic.load_parameters(self.args.SFT_load)
         self.dataset_prepare()
diff --git a/RecLM-gen/task_test.py b/RecLM-gen/task_test.py
index 856787c..cabeaa1 100644
--- a/RecLM-gen/task_test.py
+++ b/RecLM-gen/task_test.py
@@ -17,33 +17,6 @@
 headers = {"User-Agent": "Test Client"}
 
 
-def quary_vllm(input_text, args):
-    for ii in range(args.try_num):
-        pload_search = {
-            # "model": args.model_name,
-            "prompt": input_text,
-            "n": 1,
-            "temperature": 0.0,
-            "max_tokens": args.gen_max_length,
-        }
-
-        pload_sample = {
-            # "model": args.model_name,
-            "prompt": input_text,
-            "n": 1,
-            "temperature": 0.7,
-            "max_tokens": args.gen_max_length,
-            "top_p": 0.2,
-            "top_k": 5,
-        }
-        response = requests.post(f'http://127.0.0.1:{args.vllm_port}/generate', headers=headers, json=pload_sample if args.sample else pload_search, stream=False)
-        output_data = json.loads(response.content)
-        if 'text' not in output_data:
-            continue
-        output_text = output_data["text"][0][len(input_text):]
-        return output_text
-
-
 def quary_vllm_openai(input_text, args):
     for ii in range(args.try_num):
         pload = {
@@ -75,7 +48,7 @@ def quary_api(d, args):
     try:
         if f'{args.model_name}_output' not in d:
             input_text = d['input_text']
-            if args.model_name in ['snap/Llama-2-7b-hf-chat/', 'snap/gpt-3.5-turbo-1106/']:
+            if args.general_llm:
                 input_text = d['input_text'].split('\n')
                 sub_text1 = input_text[1].strip()
                 sub_text2 = input_text[4].split('[/INST]')[0].strip()
@@ -87,12 +60,10 @@ def quary_api(d, args):
                 input_text = f'{sub_text1} {sub_text2} {sub_text3} \n{sub_text4}'
                 d['raw_input_text'] = input_text
 
-            if args.model_name in ['snap/Llama-2-7b-hf-chat/', 'snap/gpt-3.5-turbo-1106/']:
-            # if args.model_name in ['snap/gpt-3.5-turbo-1106/']:
-                d[f'{args.model_name}_output'] = quary_openai(input_text, args)
-            else:
+            if args.vllm_port > 0:
                 d[f'{args.model_name}_output'] = quary_vllm_openai(input_text, args)
-                # d[f'{args.model_name}_output'] = quary_vllm(input_text, args)
+            else:
+                d[f'{args.model_name}_output'] = quary_openai(input_text, args)
 
         assert f'{args.model_name}_output' in d, f'no {args.model_name}_output'
         wrongtime = 0
@@ -138,6 +109,7 @@ def process_api_output(d):
     parser.add_argument('--SFT_test_task', type=str, default='', help='in {SFTTestSeqRec, SFTTestRanking, SFT+TestPersonalControlRec, SFT-TestPersonalControlRec, SFTTestPersonalCategoryRate_xx%, SFTTestItemCount}')
     parser.add_argument("--num_process", type=int, default=80)
     parser.add_argument("--model_name", type=str, default='Llama-2-7b-hf-chat', help="openai model")
+    parser.add_argument("--output_path", type=str, default=None)
     parser.add_argument("--try_num", type=int, default=2, help="The number of attempts to call the API")
     parser.add_argument("--max_item_length", type=int, default=10)
     parser.add_argument("--max_token_length", type=int, default=512, help="The max length of input text to gpt")
@@ -152,12 +124,14 @@ def process_api_output(d):
     parser.add_argument("--reprocess", action='store_true')
     parser.add_argument("--teacher_port", type=int, default=12621)
     parser.add_argument("--vllm_port", type=int, default=13579)
+    parser.add_argument("--general_llm", action='store_true')
     args = parser.parse_args()
     args.is_main_process = True
-    kwargs = vars(args)
-    args = Config(**kwargs)
-    print(args)
-    gpt = GPT(model_name=args.model_name, port=args.vllm_port)
+    print(Config(**vars(args)))
+    assert args.output_path is not None
+    if not os.path.exists(args.output_path):
+        os.makedirs(args.output_path)
+    gpt = GPT()
 
     category2item = load_pickle(args.data_path + 'category.pickle')
     metas = load_pickle(args.data_path + 'meta.pickle')
@@ -183,17 +157,12 @@ def process_api_output(d):
     }
     TestTaskTemplate = {args.SFT_test_task: Test_task_group_mapping[args.SFT_test_task.split('_')[0]]}
     TestTaskNum = {args.SFT_test_task: 1}
-    args.output_path = args.model_name
-    if args.model_name in ['snap/Llama-2-7b-hf-chat/', 'snap/gpt-3.5-turbo-1106/']:
-        args.output_path = f'{args.model_name}{args.data_path.split("/")[-2]}/'
-        if not os.path.exists(args.output_path):
-            os.mkdir(args.output_path)
     if args.SFT_test_task in ['SFT+TestPersonalControlRec', 'SFT-TestPersonalControlRec'] or args.SFT_test_task.startswith('SFTTestPersonalCategoryRate'):
-        TestSeqRec_Result_file = f'{args.output_path}SFTTestSeqRec_Top{args.topk}_Result.pickle'
+        TestSeqRec_Result_file = os.path.join(args.output_path, f'SFTTestSeqRec_Top{args.topk}_Result.pickle')
         data['SFTTestSeqRec_Result'] = load_pickle(TestSeqRec_Result_file)
     test_data = SFTDataset(args, TestTaskTemplate, TestTaskNum, data, None, 'test')
     metrics_dict = Metrics([args.SFT_test_task], args.topk, test_data.category2item, test_data.title2item)
-    result_file = f'{args.output_path}{args.SFT_test_task}_Top{args.topk}_Result{"_Sample" if args.sample else ""}.pickle'
+    result_file = os.path.join(args.output_path, f'{args.SFT_test_task}_Top{args.topk}_Result{"_Sample" if args.sample else ""}.pickle')
 
     test_data_list = load_pickle(result_file)
     _test_data_list = [_ for _ in test_data]
@@ -215,9 +184,9 @@ def process_api_output(d):
 
     if len(remain_test_data_list) > 0:
         save_pickle(test_data_list, result_file)
-    if args.model_name not in ['snap/Llama-2-7b-hf-chat/', 'snap/gpt-3.5-turbo-1106/']:
+    if not args.general_llm:
         for step_i, example in tqdm(enumerate(test_data_list)):
-            if f'{args.model_name}_output' not in example or (f'{args.SFT_test_task}_output_title_list' in example and args.reprocess):
+            if f'{args.model_name}_output' not in example or (f'{args.SFT_test_task}_output_title_list' in example and not args.reprocess):
                 continue
             output_title = example[f'{args.model_name}_output']
             output_title_list = [_.strip() for _ in output_title.strip().split('\n')]
diff --git a/RecLM-gen/unirec/asyc_server.py b/RecLM-gen/unirec/asyc_server.py
index b5cd19c..ffca0a4 100644
--- a/RecLM-gen/unirec/asyc_server.py
+++ b/RecLM-gen/unirec/asyc_server.py
@@ -42,6 +42,7 @@ def __init__(self, msg, code=500):
 }
 print(' '.join(sys.argv))
 config = argument_parser.parse_arguments()
+config['port'] = [_.split('=')[1] for _ in sys.argv if _.startswith('--port=')][0]
 config['device'] = torch.device('cuda:0')
 dataset = config['dataset']
 config['dataset_path'] = f'data/{dataset}'
@@ -127,12 +128,12 @@ def __init__(self, model_name):
         self.model.load_state_dict(checkpoint["state_dict"])
         self.model.eval()
         self.model.requires_grad_(False)
-        _, scores, _, _ = self.model.forward(item_seq=torch.tensor([[0, 0, 0, 1, 2, 3, 4]], device=config['device']))
 
         self.needs_processing = None
         self.needs_processing_timer = None
 
         self.all_item_id = torch.arange(len(map_dict['id2item']), device=config['device'], dtype=torch.int64)
+        _, scores, _, _ = self.model.forward(item_seq=torch.tensor([[0, 0, 0, 1, 2, 3, 4]], device=config['device']), item_id=self.all_item_id)
 
     def schedule_processing_if_needed(self):
         if len(self.queue) >= MAX_BATCH_SIZE:
diff --git a/RecLM-gen/utils/tools.py b/RecLM-gen/utils/tools.py
index 8a71116..e18c41b 100644
--- a/RecLM-gen/utils/tools.py
+++ b/RecLM-gen/utils/tools.py
@@ -8,6 +8,7 @@
 from Levenshtein import distance
 from einops import rearrange
 from openai import OpenAI
+from openai.lib.azure import AzureOpenAI
 from torch.nn.utils.rnn import pad_sequence
 from collections import namedtuple
 
@@ -219,20 +220,31 @@ def get_complete_text(input_text: str, output_titles: str):
 
 
 class GPT:
-    def __init__(self, model_name='', port=8000) -> None:
+    def __init__(self) -> None:
         self.client = None
         self.max_wrong_time = 2
-        self.port = port
-        self.model_name = 'gpt-3.5' if 'gpt-3.5' in model_name else model_name
+        self.api_base = os.environ['OPENAI_API_BASE'] if 'OPENAI_API_BASE' in os.environ else None
+        self.api_version = os.environ['OPENAI_API_VERSION'] if 'OPENAI_API_VERSION' in os.environ else None
+        self.api_type = os.environ['OPENAI_API_TYPE'] if 'OPENAI_API_TYPE' in os.environ else None
+        self.api_key = os.environ['OPENAI_API_KEY'] if 'OPENAI_API_KEY' in os.environ else 'Empty'
+        self.engine = os.environ['ENGINE'] if 'ENGINE' in os.environ else None
         self.init_client()
-        print(f'use model of {self.model_name}')
+        print(f'use model of {self.engine}')
 
     def init_client(self):
-        self.client = OpenAI(
-            api_key='xxx' if self.model_name == 'gpt-3.5' else 'EMPTY',
-            max_retries=self.max_wrong_time,
-            base_url='https://xxx.xxx/v1' if self.model_name == 'gpt-3.5' else f'http://127.0.0.1:{self.port}/v1'
-        )
+        if self.api_type == "azure":
+            self.client = AzureOpenAI(
+                api_key=self.api_key,
+                api_version=self.api_version,
+                azure_endpoint=self.api_base,
+                max_retries=self.max_wrong_time,
+            )
+        else:
+            self.client = OpenAI(
+                api_key=self.api_key,
+                base_url=self.api_base,
+                max_retries=self.max_wrong_time,
+            )
 
     def call(self, content, t=0.0):
         chat_completion = self.client.chat.completions.create(
@@ -245,7 +257,7 @@ def call(self, content, t=0.0):
             temperature=t,
             # top_p=0.2,
             max_tokens=2048,
-            model=self.model_name,
+            model=self.engine,
         )
         response = chat_completion.choices[0].message.content
         return response