Skip to content

Commit

Permalink
Merge branch 'open-compass:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
bittersweet1999 authored Sep 25, 2024
2 parents 6751e16 + e768194 commit 22685e9
Show file tree
Hide file tree
Showing 74 changed files with 420 additions and 186 deletions.
4 changes: 2 additions & 2 deletions configs/models/chatglm/lmdeploy_glm4_9b_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
abbr='glm-4-9b-chat-turbomind',
path='THUDM/glm-4-9b-chat',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=8192,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
Expand Down
4 changes: 2 additions & 2 deletions configs/models/deepseek/lmdeploy_deepseek_67b_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
path='deepseek-ai/deepseek-llm-67b-chat',
engine_config=dict(max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
max_seq_len=7168,
max_out_len=1024,
max_seq_len=8192,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=4),
)
Expand Down
4 changes: 2 additions & 2 deletions configs/models/deepseek/lmdeploy_deepseek_7b_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
path='deepseek-ai/deepseek-llm-7b-chat',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
max_seq_len=7168,
max_out_len=1024,
max_seq_len=8192,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
Expand Down
8 changes: 4 additions & 4 deletions configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
type=TurboMindModelwithChatTemplate,
abbr='internlm2_5-1_8b-chat-turbomind',
path='internlm/internlm2_5-1_8b-chat',
engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=8192,
max_out_len=2048,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
Expand Down
8 changes: 4 additions & 4 deletions configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
type=TurboMindModelwithChatTemplate,
abbr='internlm2_5-20b-chat-turbomind',
path='internlm/internlm2_5-20b-chat',
engine_config=dict(session_len=8192, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=8192,
max_out_len=2048,
engine_config=dict(session_len=16384, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=2),
)
Expand Down
8 changes: 4 additions & 4 deletions configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
type=TurboMindModelwithChatTemplate,
abbr='internlm2_5-7b-chat-turbomind',
path='internlm/internlm2_5-7b-chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
Expand Down
8 changes: 4 additions & 4 deletions configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
type=TurboMindModelwithChatTemplate,
abbr='internlm2-chat-1.8b-turbomind',
path='internlm/internlm2-chat-1_8b',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=8192,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
Expand Down
8 changes: 4 additions & 4 deletions configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
type=TurboMindModelwithChatTemplate,
abbr='internlm2-chat-20b-turbomind',
path='internlm/internlm2-chat-20b',
engine_config=dict(session_len=7168, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=8192, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=8192,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=2),
)
Expand Down
8 changes: 4 additions & 4 deletions configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
type=TurboMindModelwithChatTemplate,
abbr='internlm2-chat-7b-turbomind',
path='internlm/internlm2-chat-7b',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=8192,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
Expand Down
4 changes: 2 additions & 2 deletions configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
abbr='internlm-chat-20b-turbomind',
path='internlm/internlm-chat-20b',
engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=2),
)
Expand Down
4 changes: 2 additions & 2 deletions configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
abbr='internlm-chat-7b-turbomind',
path='internlm/internlm-chat-7b',
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
Expand Down
2 changes: 1 addition & 1 deletion configs/models/hf_llama/hf_llama3_1_70b_instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
type=HuggingFacewithChatTemplate,
abbr='llama-3_1-70b-instruct-hf',
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
max_out_len=1024,
max_out_len=4096,
batch_size=8,
run_cfg=dict(num_gpus=4),
stop_words=['<|end_of_text|>', '<|eot_id|>'],
Expand Down
12 changes: 12 additions & 0 deletions configs/models/hf_llama/hf_llama3_1_8b.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from opencompass.models import HuggingFaceBaseModel

models = [
dict(
type=HuggingFaceBaseModel,
abbr='llama-3_1-8b-hf',
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
max_out_len=1024,
batch_size=8,
run_cfg=dict(num_gpus=1),
)
]
4 changes: 2 additions & 2 deletions configs/models/hf_llama/lmdeploy_llama2_13b_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
abbr='llama-2-13b-chat-turbomind',
path='meta-llama/Llama-2-13b-chat-hf',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
Expand Down
4 changes: 2 additions & 2 deletions configs/models/hf_llama/lmdeploy_llama2_70b_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
abbr='llama-2-70b-chat-turbomind',
path='meta-llama/Llama-2-70b-chat-hf',
engine_config=dict(max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=4),
)
Expand Down
4 changes: 2 additions & 2 deletions configs/models/hf_llama/lmdeploy_llama2_7b_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
abbr='llama-2-7b-chat-turbomind',
path='meta-llama/Llama-2-7b-chat-hf',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
Expand Down
6 changes: 3 additions & 3 deletions configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
abbr='llama-3_1-70b-instruct-turbomind',
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
engine_config=dict(max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=4),
stop_words=['<|end_of_text|>', '<|eot_id|>'],
Expand Down
6 changes: 3 additions & 3 deletions configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
abbr='llama-3_1-8b-instruct-turbomind',
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|end_of_text|>', '<|eot_id|>'],
Expand Down
6 changes: 3 additions & 3 deletions configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
abbr='llama-3-70b-instruct-turbomind',
path='meta-llama/Meta-Llama-3-70B-Instruct',
engine_config=dict(max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=4),
stop_words=['<|end_of_text|>', '<|eot_id|>'],
Expand Down
6 changes: 3 additions & 3 deletions configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
abbr='llama-3-8b-instruct-turbomind',
path='meta-llama/Meta-Llama-3-8B-Instruct',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|end_of_text|>', '<|eot_id|>'],
Expand Down
15 changes: 15 additions & 0 deletions configs/models/mistral/lmdeploy_mistral_7b_instruct_v0_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from opencompass.models import TurboMindModelwithChatTemplate

models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='mistral-7b-instruct-v0.3-turbomind',
path='mistralai/Mistral-7B-Instruct-v0.3',
engine_config=dict(session_len=32768, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=32768,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
]
15 changes: 15 additions & 0 deletions configs/models/mistral/lmdeploy_mixtral_large_instruct_2407.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from opencompass.models import TurboMindModelwithChatTemplate

models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='mixtral-large-instruct-2407-turbomind',
path='mistralai/Mistral-Large-Instruct-2407',
engine_config=dict(session_len=32768, max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=32768,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=4),
)
]
8 changes: 4 additions & 4 deletions configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-110b-chat-turbomind',
path='Qwen/Qwen1.5-110B-Chat',
engine_config=dict(session_len=7168, max_batch_size=8, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16834, max_batch_size=8, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16834,
max_out_len=4096,
batch_size=8,
run_cfg=dict(num_gpus=4),
stop_words=['<|im_end|>', '<|im_start|>'],
Expand Down
8 changes: 4 additions & 4 deletions configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-14b-chat-turbomind',
path='Qwen/Qwen1.5-14B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|im_end|>', '<|im_start|>'],
Expand Down
8 changes: 4 additions & 4 deletions configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-1.8b-chat-turbomind',
path='Qwen/Qwen1.5-1.8B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|im_end|>', '<|im_start|>'],
Expand Down
8 changes: 4 additions & 4 deletions configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-32b-chat-turbomind',
path='Qwen/Qwen1.5-32B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=2),
stop_words=['<|im_end|>', '<|im_start|>'],
Expand Down
8 changes: 4 additions & 4 deletions configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-4b-chat-turbomind',
path='Qwen/Qwen1.5-4B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|im_end|>', '<|im_start|>'],
Expand Down
8 changes: 4 additions & 4 deletions configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-72b-chat-turbomind',
path='Qwen/Qwen1.5-72B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=4),
stop_words=['<|im_end|>', '<|im_start|>'],
Expand Down
8 changes: 4 additions & 4 deletions configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-7b-chat-turbomind',
path='Qwen/Qwen1.5-7B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|im_end|>', '<|im_start|>'],
Expand Down
Loading

0 comments on commit 22685e9

Please sign in to comment.