Skip to content

Commit

Permalink
[Model] Remove redundant softmax when using PoolingType.STEP (vllm-pr…
Browse files Browse the repository at this point in the history
  • Loading branch information
Maybewuss authored Nov 18, 2024
1 parent c7dec92 commit 01aae1c
Showing 1 changed file with 1 addition and 2 deletions.
3 changes: 1 addition & 2 deletions vllm/model_executor/layers/pooler.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,13 @@ def forward(
if returned_token_ids is not None and len(returned_token_ids) > 0:
hidden_states = hidden_states[:, returned_token_ids]

logits = hidden_states.softmax(dim=-1)
step_tag_id = self.step_tag_id

offset = 0
pooled_data_lst = []
for prompt_len, seq_data_i in zip(
prompt_lens, pooling_metadata.seq_data.values()):
pooled_data_i = logits[offset:offset + prompt_len]
pooled_data_i = hidden_states[offset:offset + prompt_len]
if step_tag_id is not None:
token_ids = torch.tensor(seq_data_i.prompt_token_ids)
pooled_data_i = pooled_data_i[token_ids == step_tag_id]
Expand Down

0 comments on commit 01aae1c

Please sign in to comment.