vllm-project · comaniac · Sep 6, 2024 · Sep 4, 2024 · Sep 4, 2024 · Sep 5, 2024
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
@@ -21,10 +21,6 @@
     when using tgi backend, add
         --endpoint /generate_stream
     to the end of the command above.
-
-Use --logprobs <num logprobs> to specify the number of logprobs-per-token
-to return as part of the request (or leave the argument unspecified
-to default to 1 logprob-per-token.
 """
 import argparse
 import asyncio
@@ -735,7 +731,8 @@ def main(args: argparse.Namespace):
         "--logprobs",
         type=int,
         default=None,
-        help="Number of logprobs-per-token to return as part of the request.",
+        help=("Number of logprobs-per-token to return as part of the request "
+              "(default 1)"),
     )
     parser.add_argument(
         "--sonnet-prefix-len",

@@ -57,7 +57,7 @@ def test_multi_step_llm(
                            GPU -> CPU output transfer
       num_prompts: number of example prompts under test
       num_logprobs: corresponds to the `logprobs` argument to the OpenAI
-                    completions endpoint; `None` -> no logprobs 
+                    completions endpoint; `None` -> 1 logprob returned.
     """
 
     prompts = example_prompts