diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/README.md b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/README.md index 351311b3283..aa5740d3da5 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/README.md +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/README.md @@ -97,7 +97,7 @@ python llama.py --repo-id-or-model-path meta-llama/Meta-Llama-3-8B-Instruct python qwen2.py # to run Qwen2-7B-Instruct -python qwen2.py --repo-id-or-model-path Qwen/Qwen2-7B-Instruct --inter-pp 4 +python qwen2.py --repo-id-or-model-path Qwen/Qwen2-7B-Instruct # to run MiniCPM-1B-sft-bf16 python minicpm.py diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/qwen2.py b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/qwen2.py index fd0f1482147..2e4d195f80c 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/qwen2.py +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/qwen2.py @@ -43,8 +43,8 @@ parser.add_argument("--max-output-len", type=int, default=1024) parser.add_argument("--max-prompt-len", type=int, default=512) parser.add_argument("--disable-transpose-value-cache", action="store_true", default=False) - parser.add_argument("--intra-pp", type=int, default=2) - parser.add_argument("--inter-pp", type=int, default=1) + parser.add_argument("--intra-pp", type=int, default=None) + parser.add_argument("--inter-pp", type=int, default=None) args = parser.parse_args() model_path = args.repo_id_or_model_path