From 34e2855b513983e14d2548f9c42dbea2bf9917d1 Mon Sep 17 00:00:00 2001 From: Michal Szutenberg <37601244+szutenberg@users.noreply.github.com> Date: Wed, 17 Jul 2024 07:28:17 +0200 Subject: [PATCH] WA for numerically unstable block_softmax (#104) --- vllm/hpu/ops.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/hpu/ops.py b/vllm/hpu/ops.py index b27e7e712c160..a8aa35dc39df9 100644 --- a/vllm/hpu/ops.py +++ b/vllm/hpu/ops.py @@ -42,10 +42,12 @@ def block2batch(tensor, block_mapping): def block_softmax(batch_size, attn, block_mapping): + attn.sub_(10.0) attn = attn.exp_() sums = attn.sum(dim=-1).unsqueeze(-1) sums = block2batch(sums, block_mapping) sums = batch2block(sums, block_mapping) + sums.add_(1.0e-12) attn.div_(sums) return attn