diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index de062d609fdfa..205a7e19811e8 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -251,7 +251,8 @@ def process_weights_after_loading(self, layer: Module) -> None: if envs.VLLM_FP8_PADDING and weight.stride(-1) == 1 \ and (weight.stride(-2) * weight.element_size()) % 512 == 0: num_pad = 256 // weight.element_size() - weight = F.pad(weight, (0, num_pad), "constant", 0)[..., :-num_pad] + weight = F.pad(weight, (0, num_pad), "constant", + 0)[..., :-num_pad] torch.cuda.empty_cache() # Update layer with new values.