Skip to content

Commit

Permalink
[V1][BugFix] Fix Generator construction in greedy + seed case (vllm-p…
Browse files Browse the repository at this point in the history
…roject#10097)

Signed-off-by: Nick Hill <[email protected]>
  • Loading branch information
njhill authored Nov 7, 2024
1 parent e7b84c3 commit 1fa020c
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions vllm/v1/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
for req_data in scheduler_output.scheduled_new_reqs:
req_id = req_data.req_id
sampling_params = req_data.sampling_params
if sampling_params.seed is not None:
if sampling_params.sampling_type == SamplingType.RANDOM_SEED:
generator = torch.Generator(device=self.device)
generator.manual_seed(sampling_params.seed)
else:
Expand Down Expand Up @@ -382,7 +382,8 @@ def execute_model(
# Rewind the generator state as if the token was not sampled.
generator = self.input_batch.generators.get(i)
if generator is not None:
generator.set_offset(generator.get_offset() - 1)
# This relies on cuda-specific torch-internal impl details
generator.set_offset(generator.get_offset() - 4)

if sampler_output.logprob_token_ids is None:
logprob_token_ids = None
Expand Down

0 comments on commit 1fa020c

Please sign in to comment.