diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index d0edf0a75b710..1ba424c4eeb14 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -341,9 +341,9 @@ def add_cli_args( help='Maximum context length covered by CUDA ' 'graphs. When a sequence has context length ' 'larger than this, we fall back to eager mode. ' - '(DEPRECATED. Use --max-seq_len-to-capture instead' + '(DEPRECATED. Use --max-seq-len-to-capture instead' ')') - parser.add_argument('--max-seq_len-to-capture', + parser.add_argument('--max-seq-len-to-capture', type=int, default=EngineArgs.max_seq_len_to_capture, help='Maximum sequence length covered by CUDA '