Skip to content

Commit

Permalink
add mark_step for hpu
Browse files Browse the repository at this point in the history
Signed-off-by: Kunshang Ji <[email protected]>
  • Loading branch information
jikunshang committed Nov 12, 2024
1 parent eea55cc commit 4cd3598
Showing 1 changed file with 15 additions and 0 deletions.
15 changes: 15 additions & 0 deletions vllm/worker/hpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,20 @@ def precompute_indices_and_offsets(block_size, slot_mapping, is_prompt):
return indices, offsets


def modify_decoder_layer(module: torch.nn.Module, suffix="DecoderLayer"):
if module.__class__.__name__.endswith(suffix):
module.original_forward = module.forward

def new_forward(self, *args, **kwargs):
ret = self.original_forward(*args, **kwargs)
htorch.core.mark_step()
return ret

module.forward = new_forward.__get__(module)
for child_name, child_module in module.named_children():
modify_decoder_layer(child_module)


class HpuModelAdapter:

def __init__(self, model, block_size, dtype, enforce_eager):
Expand Down Expand Up @@ -636,6 +650,7 @@ def load_model(self) -> None:
else:
self.model = self.model.to("hpu")
htcore.mark_step()
modify_decoder_layer(self.model)
torch.hpu.synchronize()

with HabanaMemoryProfiler() as m_wrap:
Expand Down

0 comments on commit 4cd3598

Please sign in to comment.