diff --git a/python/llm/test/benchmark/igpu-perf/1024-128.yaml b/python/llm/test/benchmark/igpu-perf/1024-128.yaml index 98fab56cbc4..b0bd5f30c20 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128.yaml @@ -1,17 +1,17 @@ repo_id: - #- 'THUDM/chatglm3-6b' - #- 'THUDM/glm-4-9b-chat' - #- 'baichuan-inc/Baichuan2-7B-Chat' - #- 'baichuan-inc/Baichuan2-13B-Chat' - #- 'meta-llama/Llama-2-7b-chat-hf' - #- 'meta-llama/Llama-2-13b-chat-hf' - #- 'meta-llama/Meta-Llama-3-8B-Instruct' - #- 'mistralai/Mistral-7B-Instruct-v0.2' - #- 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - #- 'RWKV/v5-Eagle-7B-HF' - #- '01-ai/Yi-6B-Chat' - #- 'Qwen/Qwen-VL-Chat' - #- 'openbmb/MiniCPM-1B-sft-bf16' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - 'RWKV/v5-Eagle-7B-HF' + - '01-ai/Yi-6B-Chat' + - 'Qwen/Qwen-VL-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' - 'openbmb/MiniCPM-2B-sft-bf16' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_437.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_437.yaml index cc0a24821e1..b87e6c2c865 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_437.yaml @@ -1,9 +1,9 @@ repo_id: - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' - 'microsoft/phi-3-vision-128k-instruct' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml index 60b3bffc61c..39d575680ab 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml @@ -1,16 +1,16 @@ repo_id: - #- 'THUDM/chatglm3-6b' - #- 'THUDM/glm-4-9b-chat' - #- 'baichuan-inc/Baichuan2-7B-Chat' - #- 'baichuan-inc/Baichuan2-13B-Chat' - #- 'meta-llama/Llama-2-7b-chat-hf' - #- 'meta-llama/Llama-2-13b-chat-hf' - #- 'meta-llama/Meta-Llama-3-8B-Instruct' - #- 'mistralai/Mistral-7B-Instruct-v0.2' - #- 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - #- '01-ai/Yi-6B-Chat' - #- 'Qwen/Qwen-VL-Chat' - #- 'openbmb/MiniCPM-1B-sft-bf16' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'Qwen/Qwen-VL-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' - 'openbmb/MiniCPM-2B-sft-bf16' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_437.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_437.yaml index ff58a625bbb..d7172186bb3 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_437.yaml @@ -1,9 +1,9 @@ repo_id: - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' - 'microsoft/phi-3-vision-128k-instruct' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml index dd5304c6695..2730e465d47 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml @@ -1,16 +1,16 @@ repo_id: - #- 'THUDM/chatglm3-6b' - #- 'THUDM/glm-4-9b-chat' - #- 'baichuan-inc/Baichuan2-7B-Chat' - #- 'baichuan-inc/Baichuan2-13B-Chat' - #- 'meta-llama/Llama-2-7b-chat-hf' - #- 'meta-llama/Llama-2-13b-chat-hf' - #- 'meta-llama/Meta-Llama-3-8B-Instruct' - #- 'mistralai/Mistral-7B-Instruct-v0.2' - #- 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - #- '01-ai/Yi-6B-Chat' - #- 'Qwen/Qwen-VL-Chat' - #- 'openbmb/MiniCPM-1B-sft-bf16' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'Qwen/Qwen-VL-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' - 'openbmb/MiniCPM-2B-sft-bf16' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_437.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_437.yaml index 3d48cf9b9f3..3839d0d2951 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_437.yaml @@ -1,9 +1,9 @@ repo_id: - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' - 'microsoft/phi-3-vision-128k-instruct' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml index 850912f7a5f..c53e6283919 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml @@ -1,16 +1,16 @@ repo_id: - #- 'THUDM/chatglm3-6b' - #- 'THUDM/glm-4-9b-chat' - #- 'baichuan-inc/Baichuan2-7B-Chat' - #- 'baichuan-inc/Baichuan2-13B-Chat' - #- 'meta-llama/Llama-2-7b-chat-hf' - #- 'meta-llama/Llama-2-13b-chat-hf' - #- 'meta-llama/Meta-Llama-3-8B-Instruct' - #- 'mistralai/Mistral-7B-Instruct-v0.2' - #- 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - #- '01-ai/Yi-6B-Chat' - #- 'Qwen/Qwen-VL-Chat' - #- 'openbmb/MiniCPM-1B-sft-bf16' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'Qwen/Qwen-VL-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' - 'openbmb/MiniCPM-2B-sft-bf16' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_437.yaml b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_437.yaml index 229d8ec35f9..d6ee670ea29 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_437.yaml @@ -1,9 +1,9 @@ repo_id: - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' - 'microsoft/phi-3-vision-128k-instruct' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml index 14adf06075b..47b9839a789 100644 --- a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml @@ -1,15 +1,15 @@ repo_id: - #- 'THUDM/chatglm3-6b' - #- 'THUDM/glm-4-9b-chat' - #- 'baichuan-inc/Baichuan2-7B-Chat' - #- 'meta-llama/Llama-2-7b-chat-hf' - #- 'meta-llama/Llama-2-13b-chat-hf' - #- 'meta-llama/Meta-Llama-3-8B-Instruct' - #- 'mistralai/Mistral-7B-Instruct-v0.2' - #- 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - #- '01-ai/Yi-6B-Chat' - #- 'Qwen/Qwen-VL-Chat' - #- 'openbmb/MiniCPM-1B-sft-bf16' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'Qwen/Qwen-VL-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' - 'openbmb/MiniCPM-2B-sft-bf16' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_437.yaml b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_437.yaml index ea08a7a120f..ddbf1cf4d19 100644 --- a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_437.yaml @@ -1,9 +1,9 @@ repo_id: - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' - 'microsoft/phi-3-vision-128k-instruct' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml index 23d46b79201..39115e0231b 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml @@ -1,16 +1,16 @@ repo_id: - #- 'THUDM/chatglm3-6b' - #- 'THUDM/glm-4-9b-chat' - #- 'baichuan-inc/Baichuan2-7B-Chat' - #- 'baichuan-inc/Baichuan2-13B-Chat' - #- 'meta-llama/Llama-2-7b-chat-hf' - #- 'meta-llama/Llama-2-13b-chat-hf' - #- 'meta-llama/Meta-Llama-3-8B-Instruct' - #- 'mistralai/Mistral-7B-Instruct-v0.2' - #- 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - #- '01-ai/Yi-6B-Chat' - #- 'Qwen/Qwen-VL-Chat' - #- 'openbmb/MiniCPM-1B-sft-bf16' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'Qwen/Qwen-VL-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' - 'openbmb/MiniCPM-2B-sft-bf16' local_model_hub: 'path to your local model hub' warm_up: 3 diff --git a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_437.yaml b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_437.yaml index e7f6d4673fd..1a969165267 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_437.yaml @@ -1,9 +1,9 @@ repo_id: - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' - 'microsoft/phi-3-vision-128k-instruct' local_model_hub: 'path to your local model hub' warm_up: 3 diff --git a/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml index 04e22f983c0..26e128a564c 100644 --- a/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml @@ -1,14 +1,14 @@ repo_id: - #- 'THUDM/chatglm3-6b' - #- 'THUDM/glm-4-9b-chat' - #- 'baichuan-inc/Baichuan2-7B-Chat' - #- 'meta-llama/Llama-2-7b-chat-hf' - #- 'meta-llama/Llama-2-13b-chat-hf' - #- 'meta-llama/Meta-Llama-3-8B-Instruct' - #- 'mistralai/Mistral-7B-Instruct-v0.2' - #- 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - #- '01-ai/Yi-6B-Chat' - #- 'openbmb/MiniCPM-1B-sft-bf16' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' - 'openbmb/MiniCPM-2B-sft-bf16' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_437.yaml b/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_437.yaml index e08cd4ec921..c85da867aa2 100644 --- a/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_437.yaml @@ -1,9 +1,9 @@ repo_id: - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' - 'microsoft/phi-3-vision-128k-instruct' local_model_hub: 'path to your local model hub' warm_up: 1