Skip to content

Commit

Permalink
change kernel allocation for sum layers
Browse files Browse the repository at this point in the history
  • Loading branch information
liuanji committed Mar 30, 2024
1 parent ca8b641 commit 5373ddc
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/pyjuice/layer/sum_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,9 +504,9 @@ def _fw_triton_block_sparse_tlmm_kernel(node_mars, element_mars, params, nids, c

if use_fp16 == 1:
# Built-in matmul kernel of triton + float16
epars_fp16 = (epars * (2**12)).to(tl.float16)
epars_fp16 = (epars * (2**4)).to(tl.float16)
emars_fp16 = emars_sub.to(tl.float16)
nmars = tl.dot(epars_fp16, emars_fp16).to(tl.float32) / (2**12)
nmars = tl.dot(epars_fp16, emars_fp16).to(tl.float32) / (2**4)
else:
# Built-in matmul kernel of triton + float32
nmars = tl.dot(epars, emars_sub)
Expand Down Expand Up @@ -1260,7 +1260,7 @@ def _backward(self, node_flows: torch.Tensor, element_flows: torch.Tensor,
elif num_edges <= 32768:
mode = self.BLOCK_SPARSE
else:
mode = self.SPARSE
mode = self.BLOCK_SPARSE

if mode == self.BLOCK_SPARSE:
self._backward_block_sparse(
Expand Down

0 comments on commit 5373ddc

Please sign in to comment.