From 4db04baf2a7f21caf7ca9fc182e5a8ab96fb747c Mon Sep 17 00:00:00 2001 From: Or Sharir Date: Wed, 13 Mar 2024 21:18:25 +0200 Subject: [PATCH] Add missing kernel for CodeLlama-34B on A/H100 (no tensor parallelism) when using Multi-LoRA. (#3350) --- csrc/punica/bgmv/bgmv_config.h | 1 + tests/lora/test_punica.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/csrc/punica/bgmv/bgmv_config.h b/csrc/punica/bgmv/bgmv_config.h index 4dc90de1ab42a..a7415dfc91369 100644 --- a/csrc/punica/bgmv/bgmv_config.h +++ b/csrc/punica/bgmv/bgmv_config.h @@ -43,6 +43,7 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X, f(in_T, out_T, W_T, narrow, 14336) \ f(in_T, out_T, W_T, narrow, 16384) \ f(in_T, out_T, W_T, narrow, 20480) \ + f(in_T, out_T, W_T, narrow, 22016) \ f(in_T, out_T, W_T, narrow, 24576) \ f(in_T, out_T, W_T, narrow, 28672) \ f(in_T, out_T, W_T, narrow, 32000) \ diff --git a/tests/lora/test_punica.py b/tests/lora/test_punica.py index cbe0f6fa2e851..fd707766c6a30 100644 --- a/tests/lora/test_punica.py +++ b/tests/lora/test_punica.py @@ -45,7 +45,7 @@ def _lora_ref_impl( H1 = H2 = [ 128, 256, 512, 1024, 1280, 2048, 2560, 2752, 3072, 3456, 3584, 4096, 5120, 5504, 5632, 6144, 6912, 7168, 8192, 9216, 10240, 11008, 13824, 14336, - 24576, 32000, 32256, 32512, 32768, 33024 + 22016, 24576, 32000, 32256, 32512, 32768, 33024 ] SEED = [0xabcdabcd987]