Skip to content

Commit

Permalink
[Frontend] Add --logprobs argument to benchmark_serving.py (vllm-pr…
Browse files Browse the repository at this point in the history
  • Loading branch information
afeldman-nm authored Sep 6, 2024
1 parent baa5467 commit e5cab71
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 1 deletion.
2 changes: 2 additions & 0 deletions benchmarks/backend_request_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class RequestFuncInput:
model: str
best_of: int = 1
use_beam_search: bool = False
logprobs: Optional[int] = None


@dataclass
Expand Down Expand Up @@ -236,6 +237,7 @@ async def async_request_openai_completions(
"temperature": 0.0,
"best_of": request_func_input.best_of,
"max_tokens": request_func_input.output_len,
"logprobs": request_func_input.logprobs,
"stream": True,
}
headers = {
Expand Down
16 changes: 16 additions & 0 deletions benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ async def benchmark(
model_id: str,
tokenizer: PreTrainedTokenizerBase,
input_requests: List[Tuple[str, int, int]],
logprobs: Optional[int],
best_of: int,
use_beam_search: bool,
request_rate: float,
Expand All @@ -339,6 +340,7 @@ async def benchmark(
api_url=api_url,
prompt_len=test_prompt_len,
output_len=test_output_len,
logprobs=logprobs,
best_of=best_of,
use_beam_search=use_beam_search,
)
Expand All @@ -358,6 +360,7 @@ async def benchmark(
api_url=base_url + "/start_profile",
prompt_len=test_prompt_len,
output_len=test_output_len,
logprobs=logprobs,
best_of=best_of,
use_beam_search=use_beam_search,
)
Expand All @@ -379,6 +382,7 @@ async def benchmark(
api_url=api_url,
prompt_len=prompt_len,
output_len=output_len,
logprobs=logprobs,
best_of=best_of,
use_beam_search=use_beam_search,
)
Expand All @@ -396,6 +400,7 @@ async def benchmark(
api_url=base_url + "/stop_profile",
prompt_len=test_prompt_len,
output_len=test_output_len,
logprobs=logprobs,
best_of=best_of,
use_beam_search=use_beam_search,
)
Expand Down Expand Up @@ -580,6 +585,7 @@ def main(args: argparse.Namespace):
model_id=model_id,
tokenizer=tokenizer,
input_requests=input_requests,
logprobs=args.logprobs,
best_of=args.best_of,
use_beam_search=args.use_beam_search,
request_rate=args.request_rate,
Expand Down Expand Up @@ -721,6 +727,16 @@ def main(args: argparse.Namespace):
help=
"Number of output tokens per request, used only for sonnet dataset.",
)
parser.add_argument(
"--logprobs",
type=int,
default=None,
help=("Number of logprobs-per-token to compute & return as part of "
"the request. If unspecified, then either (1) if beam search "
"is disabled, no logprobs are computed & a single dummy "
"logprob is returned for each token; or (2) if beam search "
"is enabled 1 logprob per token is computed"),
)
parser.add_argument(
"--sonnet-prefix-len",
type=int,
Expand Down
2 changes: 1 addition & 1 deletion tests/multi_step/test_correctness_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_multi_step_llm(
GPU -> CPU output transfer
num_prompts: number of example prompts under test
num_logprobs: corresponds to the `logprobs` argument to the OpenAI
completions endpoint; `None` -> no logprobs
completions endpoint; `None` -> 1 logprob returned.
"""

prompts = example_prompts
Expand Down

0 comments on commit e5cab71

Please sign in to comment.