diff --git a/configs/models/chatglm/lmdeploy_glm4_9b_chat.py b/configs/models/chatglm/lmdeploy_glm4_9b_chat.py
index 2f8218a62..c5cb8c4d5 100644
--- a/configs/models/chatglm/lmdeploy_glm4_9b_chat.py
+++ b/configs/models/chatglm/lmdeploy_glm4_9b_chat.py
@@ -6,9 +6,9 @@
         abbr='glm-4-9b-chat-turbomind',
         path='THUDM/glm-4-9b-chat',
         engine_config=dict(max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
         max_seq_len=8192,
-        max_out_len=1024,
+        max_out_len=2048,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/configs/models/deepseek/lmdeploy_deepseek_67b_chat.py b/configs/models/deepseek/lmdeploy_deepseek_67b_chat.py
index e369e6e12..67624eb89 100644
--- a/configs/models/deepseek/lmdeploy_deepseek_67b_chat.py
+++ b/configs/models/deepseek/lmdeploy_deepseek_67b_chat.py
@@ -7,8 +7,8 @@
         path='deepseek-ai/deepseek-llm-67b-chat',
         engine_config=dict(max_batch_size=16, tp=4),
         gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
-        max_seq_len=7168,
-        max_out_len=1024,
+        max_seq_len=8192,
+        max_out_len=2048,
         batch_size=16,
         run_cfg=dict(num_gpus=4),
     )
diff --git a/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py b/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py
index 26aa2afce..2c108cc13 100644
--- a/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py
+++ b/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py
@@ -7,8 +7,8 @@
         path='deepseek-ai/deepseek-llm-7b-chat',
         engine_config=dict(max_batch_size=16, tp=1),
         gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
-        max_seq_len=7168,
-        max_out_len=1024,
+        max_seq_len=8192,
+        max_out_len=2048,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py b/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py
index 5d5c257b1..cf4691f16 100644
--- a/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py
+++ b/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='internlm2_5-1_8b-chat-turbomind',
         path='internlm/internlm2_5-1_8b-chat',
-        engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
-        max_seq_len=8192,
-        max_out_len=2048,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py b/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py
index f1bb1b081..7fb521618 100644
--- a/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py
+++ b/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='internlm2_5-20b-chat-turbomind',
         path='internlm/internlm2_5-20b-chat',
-        engine_config=dict(session_len=8192, max_batch_size=16, tp=2),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
-        max_seq_len=8192,
-        max_out_len=2048,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=2),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=2),
     )
diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py b/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py
index 75fb93713..8dce26843 100644
--- a/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py
+++ b/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='internlm2_5-7b-chat-turbomind',
         path='internlm/internlm2_5-7b-chat',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py b/configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py
index 9c358d5a6..f5df7926d 100644
--- a/configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py
+++ b/configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='internlm2-chat-1.8b-turbomind',
         path='internlm/internlm2-chat-1_8b',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=8192,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py b/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py
index 443715494..23f35636c 100644
--- a/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py
+++ b/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='internlm2-chat-20b-turbomind',
         path='internlm/internlm2-chat-20b',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=2),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=8192, max_batch_size=16, tp=2),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=8192,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=2),
     )
diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py b/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py
index 82ad2e46a..60097e373 100644
--- a/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py
+++ b/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='internlm2-chat-7b-turbomind',
         path='internlm/internlm2-chat-7b',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=8192,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py b/configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py
index 8718a6cfc..e9af5578b 100644
--- a/configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py
+++ b/configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py
@@ -6,9 +6,9 @@
         abbr='internlm-chat-20b-turbomind',
         path='internlm/internlm-chat-20b',
         engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
         max_seq_len=4096,
-        max_out_len=1024,
+        max_out_len=2048,
         batch_size=16,
         run_cfg=dict(num_gpus=2),
     )
diff --git a/configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py b/configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py
index ea61313af..50656a5f8 100644
--- a/configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py
+++ b/configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py
@@ -6,9 +6,9 @@
         abbr='internlm-chat-7b-turbomind',
         path='internlm/internlm-chat-7b',
         engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
         max_seq_len=4096,
-        max_out_len=1024,
+        max_out_len=2048,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/configs/models/hf_llama/hf_llama3_1_70b_instruct.py b/configs/models/hf_llama/hf_llama3_1_70b_instruct.py
index 4a17de935..c7527bb53 100644
--- a/configs/models/hf_llama/hf_llama3_1_70b_instruct.py
+++ b/configs/models/hf_llama/hf_llama3_1_70b_instruct.py
@@ -5,7 +5,7 @@
         type=HuggingFacewithChatTemplate,
         abbr='llama-3_1-70b-instruct-hf',
         path='meta-llama/Meta-Llama-3.1-70B-Instruct',
-        max_out_len=1024,
+        max_out_len=4096,
         batch_size=8,
         run_cfg=dict(num_gpus=4),
         stop_words=['<|end_of_text|>', '<|eot_id|>'],
diff --git a/configs/models/hf_llama/hf_llama3_1_8b.py b/configs/models/hf_llama/hf_llama3_1_8b.py
new file mode 100644
index 000000000..a41e1ddfc
--- /dev/null
+++ b/configs/models/hf_llama/hf_llama3_1_8b.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFaceBaseModel
+
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='llama-3_1-8b-hf',
+        path='meta-llama/Meta-Llama-3.1-8B-Instruct',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/configs/models/hf_llama/lmdeploy_llama2_13b_chat.py b/configs/models/hf_llama/lmdeploy_llama2_13b_chat.py
index cb42cb294..cacdec9a5 100644
--- a/configs/models/hf_llama/lmdeploy_llama2_13b_chat.py
+++ b/configs/models/hf_llama/lmdeploy_llama2_13b_chat.py
@@ -6,9 +6,9 @@
         abbr='llama-2-13b-chat-turbomind',
         path='meta-llama/Llama-2-13b-chat-hf',
         engine_config=dict(max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
         max_seq_len=4096,
-        max_out_len=1024,
+        max_out_len=2048,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/configs/models/hf_llama/lmdeploy_llama2_70b_chat.py b/configs/models/hf_llama/lmdeploy_llama2_70b_chat.py
index d6c69c6f9..b850106b3 100644
--- a/configs/models/hf_llama/lmdeploy_llama2_70b_chat.py
+++ b/configs/models/hf_llama/lmdeploy_llama2_70b_chat.py
@@ -6,9 +6,9 @@
         abbr='llama-2-70b-chat-turbomind',
         path='meta-llama/Llama-2-70b-chat-hf',
         engine_config=dict(max_batch_size=16, tp=4),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
         max_seq_len=4096,
-        max_out_len=1024,
+        max_out_len=2048,
         batch_size=16,
         run_cfg=dict(num_gpus=4),
     )
diff --git a/configs/models/hf_llama/lmdeploy_llama2_7b_chat.py b/configs/models/hf_llama/lmdeploy_llama2_7b_chat.py
index f520ce8b3..aa3452488 100644
--- a/configs/models/hf_llama/lmdeploy_llama2_7b_chat.py
+++ b/configs/models/hf_llama/lmdeploy_llama2_7b_chat.py
@@ -6,9 +6,9 @@
         abbr='llama-2-7b-chat-turbomind',
         path='meta-llama/Llama-2-7b-chat-hf',
         engine_config=dict(max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
         max_seq_len=4096,
-        max_out_len=1024,
+        max_out_len=2048,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py b/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py
index 23f9bc2a1..9674169f5 100644
--- a/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py
+++ b/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py
@@ -6,9 +6,9 @@
         abbr='llama-3_1-70b-instruct-turbomind',
         path='meta-llama/Meta-Llama-3.1-70B-Instruct',
         engine_config=dict(max_batch_size=16, tp=4),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=4),
         stop_words=['<|end_of_text|>', '<|eot_id|>'],
diff --git a/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py b/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py
index 429dfec72..2754eb835 100644
--- a/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py
+++ b/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py
@@ -6,9 +6,9 @@
         abbr='llama-3_1-8b-instruct-turbomind',
         path='meta-llama/Meta-Llama-3.1-8B-Instruct',
         engine_config=dict(max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
         stop_words=['<|end_of_text|>', '<|eot_id|>'],
diff --git a/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py b/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py
index 333dc0153..12fc944c7 100644
--- a/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py
+++ b/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py
@@ -6,9 +6,9 @@
         abbr='llama-3-70b-instruct-turbomind',
         path='meta-llama/Meta-Llama-3-70B-Instruct',
         engine_config=dict(max_batch_size=16, tp=4),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=4),
         stop_words=['<|end_of_text|>', '<|eot_id|>'],
diff --git a/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py b/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py
index cc5b3bd45..5a6431b7a 100644
--- a/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py
+++ b/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py
@@ -6,9 +6,9 @@
         abbr='llama-3-8b-instruct-turbomind',
         path='meta-llama/Meta-Llama-3-8B-Instruct',
         engine_config=dict(max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
         stop_words=['<|end_of_text|>', '<|eot_id|>'],
diff --git a/configs/models/mistral/lmdeploy_mistral_7b_instruct_v0_3.py b/configs/models/mistral/lmdeploy_mistral_7b_instruct_v0_3.py
new file mode 100644
index 000000000..4c867b602
--- /dev/null
+++ b/configs/models/mistral/lmdeploy_mistral_7b_instruct_v0_3.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='mistral-7b-instruct-v0.3-turbomind',
+        path='mistralai/Mistral-7B-Instruct-v0.3',
+        engine_config=dict(session_len=32768, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=32768,
+        max_out_len=4096,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/configs/models/mistral/lmdeploy_mixtral_large_instruct_2407.py b/configs/models/mistral/lmdeploy_mixtral_large_instruct_2407.py
new file mode 100644
index 000000000..e79a1f73a
--- /dev/null
+++ b/configs/models/mistral/lmdeploy_mixtral_large_instruct_2407.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='mixtral-large-instruct-2407-turbomind',
+        path='mistralai/Mistral-Large-Instruct-2407',
+        engine_config=dict(session_len=32768, max_batch_size=16, tp=4),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=32768,
+        max_out_len=4096,
+        batch_size=16,
+        run_cfg=dict(num_gpus=4),
+    )
+]
diff --git a/configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py b/configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py
index 9b92b8140..bc123b405 100644
--- a/configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py
+++ b/configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen1.5-110b-chat-turbomind',
         path='Qwen/Qwen1.5-110B-Chat',
-        engine_config=dict(session_len=7168, max_batch_size=8, tp=4),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16834, max_batch_size=8, tp=4),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16834,
+        max_out_len=4096,
         batch_size=8,
         run_cfg=dict(num_gpus=4),
         stop_words=['<|im_end|>', '<|im_start|>'],
diff --git a/configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py b/configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py
index d2b85c2aa..5f0d54b96 100644
--- a/configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py
+++ b/configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen1.5-14b-chat-turbomind',
         path='Qwen/Qwen1.5-14B-Chat',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
         stop_words=['<|im_end|>', '<|im_start|>'],
diff --git a/configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py b/configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py
index ff28ac0be..803ff3336 100644
--- a/configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py
+++ b/configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen1.5-1.8b-chat-turbomind',
         path='Qwen/Qwen1.5-1.8B-Chat',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
         stop_words=['<|im_end|>', '<|im_start|>'],
diff --git a/configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py b/configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py
index 1196548a0..96fd1e43c 100644
--- a/configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py
+++ b/configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen1.5-32b-chat-turbomind',
         path='Qwen/Qwen1.5-32B-Chat',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=2),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=2),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=2),
         stop_words=['<|im_end|>', '<|im_start|>'],
diff --git a/configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py b/configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py
index bde14a295..f9fcc3fb9 100644
--- a/configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py
+++ b/configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen1.5-4b-chat-turbomind',
         path='Qwen/Qwen1.5-4B-Chat',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
         stop_words=['<|im_end|>', '<|im_start|>'],
diff --git a/configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py b/configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py
index 38175eaf3..64a5f7cb6 100644
--- a/configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py
+++ b/configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen1.5-72b-chat-turbomind',
         path='Qwen/Qwen1.5-72B-Chat',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=4),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=4),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=4),
         stop_words=['<|im_end|>', '<|im_start|>'],
diff --git a/configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py b/configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py
index ca733c0b2..1ab393036 100644
--- a/configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py
+++ b/configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen1.5-7b-chat-turbomind',
         path='Qwen/Qwen1.5-7B-Chat',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
         stop_words=['<|im_end|>', '<|im_start|>'],
diff --git a/configs/models/qwen/lmdeploy_qwen2_1_5b_instruct.py b/configs/models/qwen/lmdeploy_qwen2_1_5b_instruct.py
index 502de1876..f050ca382 100644
--- a/configs/models/qwen/lmdeploy_qwen2_1_5b_instruct.py
+++ b/configs/models/qwen/lmdeploy_qwen2_1_5b_instruct.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen2-1.5b-instruct-turbomind',
         path='Qwen/Qwen2-1.5B-Instruct',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/configs/models/qwen/lmdeploy_qwen2_72b_instruct.py b/configs/models/qwen/lmdeploy_qwen2_72b_instruct.py
index 69ecb7981..c29482b5b 100644
--- a/configs/models/qwen/lmdeploy_qwen2_72b_instruct.py
+++ b/configs/models/qwen/lmdeploy_qwen2_72b_instruct.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen2-72b-instruct-turbomind',
         path='Qwen/Qwen2-72B-Instruct',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=4),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=4),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=4),
     )
diff --git a/configs/models/qwen/lmdeploy_qwen2_7b_instruct.py b/configs/models/qwen/lmdeploy_qwen2_7b_instruct.py
index 4dff85e06..05fa25c5e 100644
--- a/configs/models/qwen/lmdeploy_qwen2_7b_instruct.py
+++ b/configs/models/qwen/lmdeploy_qwen2_7b_instruct.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen2-7b-instruct-turbomind',
         path='Qwen/Qwen2-7B-Instruct',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/configs/models/yi/lmdeploy_yi_1_5_34b_chat.py b/configs/models/yi/lmdeploy_yi_1_5_34b_chat.py
new file mode 100644
index 000000000..d296a1008
--- /dev/null
+++ b/configs/models/yi/lmdeploy_yi_1_5_34b_chat.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='yi-1.5-34b-chat-turbomind',
+        path='01-ai/Yi-1.5-34B-Chat',
+        engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
+        max_seq_len=4096,
+        max_out_len=2048,
+        batch_size=16,
+        run_cfg=dict(num_gpus=2),
+    )
+]
diff --git a/configs/models/yi/lmdeploy_yi_1_5_6b_chat.py b/configs/models/yi/lmdeploy_yi_1_5_6b_chat.py
new file mode 100644
index 000000000..eeaf8ea25
--- /dev/null
+++ b/configs/models/yi/lmdeploy_yi_1_5_6b_chat.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='yi-1.5-6b-chat-turbomind',
+        path='01-ai/Yi-1.5-6B-Chat',
+        engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
+        max_seq_len=4096,
+        max_out_len=2048,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/configs/models/yi/lmdeploy_yi_1_5_9b_chat.py b/configs/models/yi/lmdeploy_yi_1_5_9b_chat.py
new file mode 100644
index 000000000..4e33ba232
--- /dev/null
+++ b/configs/models/yi/lmdeploy_yi_1_5_9b_chat.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='yi-1.5-9b-chat-turbomind',
+        path='01-ai/Yi-1.5-9B-Chat',
+        engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
+        max_seq_len=4096,
+        max_out_len=2048,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/configs/models/yi/lmdeploy_yi_34b_chat.py b/configs/models/yi/lmdeploy_yi_34b_chat.py
new file mode 100644
index 000000000..5ed603a6d
--- /dev/null
+++ b/configs/models/yi/lmdeploy_yi_34b_chat.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='yi-34b-chat-turbomind',
+        path='01-ai/Yi-34B-Chat',
+        engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
+        max_seq_len=4096,
+        max_out_len=2048,
+        batch_size=16,
+        run_cfg=dict(num_gpus=2),
+    )
+]
diff --git a/configs/models/yi/lmdeploy_yi_6b_chat.py b/configs/models/yi/lmdeploy_yi_6b_chat.py
new file mode 100644
index 000000000..5c75bfa50
--- /dev/null
+++ b/configs/models/yi/lmdeploy_yi_6b_chat.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='yi-6b-chat-turbomind',
+        path='01-ai/Yi-6B-Chat',
+        engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
+        max_seq_len=4096,
+        max_out_len=2048,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py b/opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py
index 2f8218a62..c5cb8c4d5 100644
--- a/opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py
+++ b/opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py
@@ -6,9 +6,9 @@
         abbr='glm-4-9b-chat-turbomind',
         path='THUDM/glm-4-9b-chat',
         engine_config=dict(max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
         max_seq_len=8192,
-        max_out_len=1024,
+        max_out_len=2048,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/opencompass/configs/models/deepseek/lmdeploy_deepseek_67b_chat.py b/opencompass/configs/models/deepseek/lmdeploy_deepseek_67b_chat.py
index e369e6e12..67624eb89 100644
--- a/opencompass/configs/models/deepseek/lmdeploy_deepseek_67b_chat.py
+++ b/opencompass/configs/models/deepseek/lmdeploy_deepseek_67b_chat.py
@@ -7,8 +7,8 @@
         path='deepseek-ai/deepseek-llm-67b-chat',
         engine_config=dict(max_batch_size=16, tp=4),
         gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
-        max_seq_len=7168,
-        max_out_len=1024,
+        max_seq_len=8192,
+        max_out_len=2048,
         batch_size=16,
         run_cfg=dict(num_gpus=4),
     )
diff --git a/opencompass/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py b/opencompass/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py
index 26aa2afce..2c108cc13 100644
--- a/opencompass/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py
+++ b/opencompass/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py
@@ -7,8 +7,8 @@
         path='deepseek-ai/deepseek-llm-7b-chat',
         engine_config=dict(max_batch_size=16, tp=1),
         gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
-        max_seq_len=7168,
-        max_out_len=1024,
+        max_seq_len=8192,
+        max_out_len=2048,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py
index 5d5c257b1..cf4691f16 100644
--- a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py
+++ b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='internlm2_5-1_8b-chat-turbomind',
         path='internlm/internlm2_5-1_8b-chat',
-        engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
-        max_seq_len=8192,
-        max_out_len=2048,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py
index f1bb1b081..7fb521618 100644
--- a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py
+++ b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='internlm2_5-20b-chat-turbomind',
         path='internlm/internlm2_5-20b-chat',
-        engine_config=dict(session_len=8192, max_batch_size=16, tp=2),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
-        max_seq_len=8192,
-        max_out_len=2048,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=2),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=2),
     )
diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py
index 75fb93713..8dce26843 100644
--- a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py
+++ b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='internlm2_5-7b-chat-turbomind',
         path='internlm/internlm2_5-7b-chat',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py
index 9c358d5a6..f5df7926d 100644
--- a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py
+++ b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='internlm2-chat-1.8b-turbomind',
         path='internlm/internlm2-chat-1_8b',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=8192,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py
index 443715494..23f35636c 100644
--- a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py
+++ b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='internlm2-chat-20b-turbomind',
         path='internlm/internlm2-chat-20b',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=2),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=8192, max_batch_size=16, tp=2),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=8192,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=2),
     )
diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py
index 82ad2e46a..60097e373 100644
--- a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py
+++ b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='internlm2-chat-7b-turbomind',
         path='internlm/internlm2-chat-7b',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=8192,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py
index 8718a6cfc..e9af5578b 100644
--- a/opencompass/configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py
+++ b/opencompass/configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py
@@ -6,9 +6,9 @@
         abbr='internlm-chat-20b-turbomind',
         path='internlm/internlm-chat-20b',
         engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
         max_seq_len=4096,
-        max_out_len=1024,
+        max_out_len=2048,
         batch_size=16,
         run_cfg=dict(num_gpus=2),
     )
diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py
index ea61313af..50656a5f8 100644
--- a/opencompass/configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py
+++ b/opencompass/configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py
@@ -6,9 +6,9 @@
         abbr='internlm-chat-7b-turbomind',
         path='internlm/internlm-chat-7b',
         engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
         max_seq_len=4096,
-        max_out_len=1024,
+        max_out_len=2048,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/opencompass/configs/models/hf_llama/hf_llama3_1_70b_instruct.py b/opencompass/configs/models/hf_llama/hf_llama3_1_70b_instruct.py
index 4a17de935..c7527bb53 100644
--- a/opencompass/configs/models/hf_llama/hf_llama3_1_70b_instruct.py
+++ b/opencompass/configs/models/hf_llama/hf_llama3_1_70b_instruct.py
@@ -5,7 +5,7 @@
         type=HuggingFacewithChatTemplate,
         abbr='llama-3_1-70b-instruct-hf',
         path='meta-llama/Meta-Llama-3.1-70B-Instruct',
-        max_out_len=1024,
+        max_out_len=4096,
         batch_size=8,
         run_cfg=dict(num_gpus=4),
         stop_words=['<|end_of_text|>', '<|eot_id|>'],
diff --git a/opencompass/configs/models/hf_llama/hf_llama3_1_8b.py b/opencompass/configs/models/hf_llama/hf_llama3_1_8b.py
new file mode 100644
index 000000000..a41e1ddfc
--- /dev/null
+++ b/opencompass/configs/models/hf_llama/hf_llama3_1_8b.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFaceBaseModel
+
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='llama-3_1-8b-hf',
+        path='meta-llama/Meta-Llama-3.1-8B-Instruct',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama2_13b_chat.py b/opencompass/configs/models/hf_llama/lmdeploy_llama2_13b_chat.py
index cb42cb294..cacdec9a5 100644
--- a/opencompass/configs/models/hf_llama/lmdeploy_llama2_13b_chat.py
+++ b/opencompass/configs/models/hf_llama/lmdeploy_llama2_13b_chat.py
@@ -6,9 +6,9 @@
         abbr='llama-2-13b-chat-turbomind',
         path='meta-llama/Llama-2-13b-chat-hf',
         engine_config=dict(max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
         max_seq_len=4096,
-        max_out_len=1024,
+        max_out_len=2048,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama2_70b_chat.py b/opencompass/configs/models/hf_llama/lmdeploy_llama2_70b_chat.py
index d6c69c6f9..b850106b3 100644
--- a/opencompass/configs/models/hf_llama/lmdeploy_llama2_70b_chat.py
+++ b/opencompass/configs/models/hf_llama/lmdeploy_llama2_70b_chat.py
@@ -6,9 +6,9 @@
         abbr='llama-2-70b-chat-turbomind',
         path='meta-llama/Llama-2-70b-chat-hf',
         engine_config=dict(max_batch_size=16, tp=4),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
         max_seq_len=4096,
-        max_out_len=1024,
+        max_out_len=2048,
         batch_size=16,
         run_cfg=dict(num_gpus=4),
     )
diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama2_7b_chat.py b/opencompass/configs/models/hf_llama/lmdeploy_llama2_7b_chat.py
index f520ce8b3..aa3452488 100644
--- a/opencompass/configs/models/hf_llama/lmdeploy_llama2_7b_chat.py
+++ b/opencompass/configs/models/hf_llama/lmdeploy_llama2_7b_chat.py
@@ -6,9 +6,9 @@
         abbr='llama-2-7b-chat-turbomind',
         path='meta-llama/Llama-2-7b-chat-hf',
         engine_config=dict(max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
         max_seq_len=4096,
-        max_out_len=1024,
+        max_out_len=2048,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py
index 23f9bc2a1..9674169f5 100644
--- a/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py
+++ b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py
@@ -6,9 +6,9 @@
         abbr='llama-3_1-70b-instruct-turbomind',
         path='meta-llama/Meta-Llama-3.1-70B-Instruct',
         engine_config=dict(max_batch_size=16, tp=4),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=4),
         stop_words=['<|end_of_text|>', '<|eot_id|>'],
diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py
index 429dfec72..2754eb835 100644
--- a/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py
+++ b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py
@@ -6,9 +6,9 @@
         abbr='llama-3_1-8b-instruct-turbomind',
         path='meta-llama/Meta-Llama-3.1-8B-Instruct',
         engine_config=dict(max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
         stop_words=['<|end_of_text|>', '<|eot_id|>'],
diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py b/opencompass/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py
index 333dc0153..12fc944c7 100644
--- a/opencompass/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py
+++ b/opencompass/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py
@@ -6,9 +6,9 @@
         abbr='llama-3-70b-instruct-turbomind',
         path='meta-llama/Meta-Llama-3-70B-Instruct',
         engine_config=dict(max_batch_size=16, tp=4),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=4),
         stop_words=['<|end_of_text|>', '<|eot_id|>'],
diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py b/opencompass/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py
index cc5b3bd45..5a6431b7a 100644
--- a/opencompass/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py
+++ b/opencompass/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py
@@ -6,9 +6,9 @@
         abbr='llama-3-8b-instruct-turbomind',
         path='meta-llama/Meta-Llama-3-8B-Instruct',
         engine_config=dict(max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
         stop_words=['<|end_of_text|>', '<|eot_id|>'],
diff --git a/opencompass/configs/models/mistral/lmdeploy_mistral_7b_instruct_v0_3.py b/opencompass/configs/models/mistral/lmdeploy_mistral_7b_instruct_v0_3.py
new file mode 100644
index 000000000..4c867b602
--- /dev/null
+++ b/opencompass/configs/models/mistral/lmdeploy_mistral_7b_instruct_v0_3.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='mistral-7b-instruct-v0.3-turbomind',
+        path='mistralai/Mistral-7B-Instruct-v0.3',
+        engine_config=dict(session_len=32768, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=32768,
+        max_out_len=4096,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/opencompass/configs/models/mistral/lmdeploy_mixtral_large_instruct_2407.py b/opencompass/configs/models/mistral/lmdeploy_mixtral_large_instruct_2407.py
new file mode 100644
index 000000000..e79a1f73a
--- /dev/null
+++ b/opencompass/configs/models/mistral/lmdeploy_mixtral_large_instruct_2407.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='mixtral-large-instruct-2407-turbomind',
+        path='mistralai/Mistral-Large-Instruct-2407',
+        engine_config=dict(session_len=32768, max_batch_size=16, tp=4),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=32768,
+        max_out_len=4096,
+        batch_size=16,
+        run_cfg=dict(num_gpus=4),
+    )
+]
diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py
index 9b92b8140..bc123b405 100644
--- a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py
+++ b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen1.5-110b-chat-turbomind',
         path='Qwen/Qwen1.5-110B-Chat',
-        engine_config=dict(session_len=7168, max_batch_size=8, tp=4),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16834, max_batch_size=8, tp=4),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16834,
+        max_out_len=4096,
         batch_size=8,
         run_cfg=dict(num_gpus=4),
         stop_words=['<|im_end|>', '<|im_start|>'],
diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py
index d2b85c2aa..5f0d54b96 100644
--- a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py
+++ b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen1.5-14b-chat-turbomind',
         path='Qwen/Qwen1.5-14B-Chat',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
         stop_words=['<|im_end|>', '<|im_start|>'],
diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py
index ff28ac0be..803ff3336 100644
--- a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py
+++ b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen1.5-1.8b-chat-turbomind',
         path='Qwen/Qwen1.5-1.8B-Chat',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
         stop_words=['<|im_end|>', '<|im_start|>'],
diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py
index 1196548a0..96fd1e43c 100644
--- a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py
+++ b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen1.5-32b-chat-turbomind',
         path='Qwen/Qwen1.5-32B-Chat',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=2),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=2),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=2),
         stop_words=['<|im_end|>', '<|im_start|>'],
diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py
index bde14a295..f9fcc3fb9 100644
--- a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py
+++ b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen1.5-4b-chat-turbomind',
         path='Qwen/Qwen1.5-4B-Chat',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
         stop_words=['<|im_end|>', '<|im_start|>'],
diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py
index 38175eaf3..64a5f7cb6 100644
--- a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py
+++ b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen1.5-72b-chat-turbomind',
         path='Qwen/Qwen1.5-72B-Chat',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=4),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=4),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=4),
         stop_words=['<|im_end|>', '<|im_start|>'],
diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py
index ca733c0b2..1ab393036 100644
--- a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py
+++ b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen1.5-7b-chat-turbomind',
         path='Qwen/Qwen1.5-7B-Chat',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
         stop_words=['<|im_end|>', '<|im_start|>'],
diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen2_1_5b_instruct.py b/opencompass/configs/models/qwen/lmdeploy_qwen2_1_5b_instruct.py
index 502de1876..f050ca382 100644
--- a/opencompass/configs/models/qwen/lmdeploy_qwen2_1_5b_instruct.py
+++ b/opencompass/configs/models/qwen/lmdeploy_qwen2_1_5b_instruct.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen2-1.5b-instruct-turbomind',
         path='Qwen/Qwen2-1.5B-Instruct',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen2_72b_instruct.py b/opencompass/configs/models/qwen/lmdeploy_qwen2_72b_instruct.py
index 69ecb7981..c29482b5b 100644
--- a/opencompass/configs/models/qwen/lmdeploy_qwen2_72b_instruct.py
+++ b/opencompass/configs/models/qwen/lmdeploy_qwen2_72b_instruct.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen2-72b-instruct-turbomind',
         path='Qwen/Qwen2-72B-Instruct',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=4),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=4),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=4),
     )
diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen2_7b_instruct.py b/opencompass/configs/models/qwen/lmdeploy_qwen2_7b_instruct.py
index 4dff85e06..05fa25c5e 100644
--- a/opencompass/configs/models/qwen/lmdeploy_qwen2_7b_instruct.py
+++ b/opencompass/configs/models/qwen/lmdeploy_qwen2_7b_instruct.py
@@ -5,10 +5,10 @@
         type=TurboMindModelwithChatTemplate,
         abbr='qwen2-7b-instruct-turbomind',
         path='Qwen/Qwen2-7B-Instruct',
-        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
-        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
-        max_seq_len=7168,
-        max_out_len=1024,
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
         batch_size=16,
         run_cfg=dict(num_gpus=1),
     )
diff --git a/opencompass/configs/models/yi/lmdeploy_yi_1_5_34b_chat.py b/opencompass/configs/models/yi/lmdeploy_yi_1_5_34b_chat.py
new file mode 100644
index 000000000..d296a1008
--- /dev/null
+++ b/opencompass/configs/models/yi/lmdeploy_yi_1_5_34b_chat.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='yi-1.5-34b-chat-turbomind',
+        path='01-ai/Yi-1.5-34B-Chat',
+        engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
+        max_seq_len=4096,
+        max_out_len=2048,
+        batch_size=16,
+        run_cfg=dict(num_gpus=2),
+    )
+]
diff --git a/opencompass/configs/models/yi/lmdeploy_yi_1_5_6b_chat.py b/opencompass/configs/models/yi/lmdeploy_yi_1_5_6b_chat.py
new file mode 100644
index 000000000..eeaf8ea25
--- /dev/null
+++ b/opencompass/configs/models/yi/lmdeploy_yi_1_5_6b_chat.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='yi-1.5-6b-chat-turbomind',
+        path='01-ai/Yi-1.5-6B-Chat',
+        engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
+        max_seq_len=4096,
+        max_out_len=2048,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/opencompass/configs/models/yi/lmdeploy_yi_1_5_9b_chat.py b/opencompass/configs/models/yi/lmdeploy_yi_1_5_9b_chat.py
new file mode 100644
index 000000000..4e33ba232
--- /dev/null
+++ b/opencompass/configs/models/yi/lmdeploy_yi_1_5_9b_chat.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='yi-1.5-9b-chat-turbomind',
+        path='01-ai/Yi-1.5-9B-Chat',
+        engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
+        max_seq_len=4096,
+        max_out_len=2048,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/opencompass/configs/models/yi/lmdeploy_yi_34b_chat.py b/opencompass/configs/models/yi/lmdeploy_yi_34b_chat.py
new file mode 100644
index 000000000..5ed603a6d
--- /dev/null
+++ b/opencompass/configs/models/yi/lmdeploy_yi_34b_chat.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='yi-34b-chat-turbomind',
+        path='01-ai/Yi-34B-Chat',
+        engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
+        max_seq_len=4096,
+        max_out_len=2048,
+        batch_size=16,
+        run_cfg=dict(num_gpus=2),
+    )
+]
diff --git a/opencompass/configs/models/yi/lmdeploy_yi_6b_chat.py b/opencompass/configs/models/yi/lmdeploy_yi_6b_chat.py
new file mode 100644
index 000000000..5c75bfa50
--- /dev/null
+++ b/opencompass/configs/models/yi/lmdeploy_yi_6b_chat.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='yi-6b-chat-turbomind',
+        path='01-ai/Yi-6B-Chat',
+        engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
+        max_seq_len=4096,
+        max_out_len=2048,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+    )
+]