diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py index 6a50ee59ea67a..9717e008972fe 100644 --- a/vllm/model_executor/layers/sampler.py +++ b/vllm/model_executor/layers/sampler.py @@ -302,7 +302,9 @@ def _sample_from_prompt( # Random sampling. # Sample `best_of` tokens for the prompt. num_seqs = sampling_params.best_of - next_token_ids = torch.multinomial(prob, num_samples=num_seqs) + next_token_ids = torch.multinomial(prob, + num_samples=num_seqs, + replacement=True) next_token_ids = next_token_ids.tolist() return next_token_ids