Skip to content

Commit

Permalink
use split/squeeze instead of slice for performance (#409)
Browse files Browse the repository at this point in the history
* use split/squeeze instead of slice for performance

GPU may not have perf difference but HPU perf improves with this

* add copyrights
  • Loading branch information
polisettyvarma authored Jul 8, 2024
1 parent c3a13be commit 330f9f2
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions megatron/model/transformer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company.
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

"""Transformer."""
Expand Down Expand Up @@ -660,9 +661,10 @@ def repeat_kv(self, hidden_states, n_rep):
head_dim)

def split_tensor(self, mixed_x_layer):
query_layer = mixed_x_layer[:, :, :, :-2, :].reshape(mixed_x_layer.shape[:2] + (-1, self.hidden_size_per_attention_head))
key_layer = mixed_x_layer[:, :, :, -2, :]
value_layer = mixed_x_layer[:, :, :, -1, :]
query_layer, key_layer, value_layer = torch.split(mixed_x_layer, [self.num_key_value_groups, 1, 1], dim=-2)
query_layer = query_layer.reshape(mixed_x_layer.shape[:2] + (self.num_attention_heads_per_partition, self.hidden_size_per_attention_head))
key_layer = torch.squeeze(key_layer, -2)
value_layer = torch.squeeze(value_layer, -2)

return query_layer, key_layer, value_layer

Expand Down

0 comments on commit 330f9f2

Please sign in to comment.