[ Bugfix ] Fix AutoFP8 fp8 marlin (vllm-project#6609)

ywang96 · Jul 20, 2024 · 082ecd8 · 082ecd8
1 parent f952bbc
commit 082ecd8
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py b/vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py
@@ -76,7 +76,8 @@ def prepare_fp8_layer_for_marlin(layer: torch.nn.Module) -> None:
     # WEIGHT SCALES
     # Currently Marlin doesn't support per-tensor scales, so we
     # expand it to channelwise
-    is_channelwise = layer.weight_scale.shape[0] == part_size_n
+    is_channelwise = (len(layer.weight_scale.shape) > 0
+                      and layer.weight_scale.shape[0] == part_size_n)
     if is_channelwise:
         scales = layer.weight_scale
     else: