Skip to content

Commit

Permalink
[Bugfix] Fix spurious "No compiled cutlass_scaled_mm ..." for W8A8 on…
Browse files Browse the repository at this point in the history
… Turing (vllm-project#9487)

Signed-off-by: NickLucche <[email protected]>
  • Loading branch information
LucasWilkinson authored and NickLucche committed Oct 31, 2024
1 parent 3f1a3a8 commit b81a246
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
message(STATUS "Building Marlin kernels for archs: ${MARLIN_ARCHS}")
else()
message(STATUS "Not building Marlin kernels as no compatible archs found"
"in CUDA target architectures")
" in CUDA target architectures")
endif()

#
Expand Down Expand Up @@ -432,7 +432,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
message(STATUS "Building Marlin MOE kernels for archs: ${MARLIN_MOE_ARCHS}")
else()
message(STATUS "Not building Marlin MOE kernels as no compatible archs found"
"in CUDA target architectures")
" in CUDA target architectures")
endif()
endif()

Expand Down
8 changes: 5 additions & 3 deletions csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,11 @@ void cutlass_scaled_mm(torch::Tensor& c, torch::Tensor const& a,
return;
}

// Turing
TORCH_CHECK(version_num >= 75);
cutlass_scaled_mm_sm75(c, a, b, a_scales, b_scales, bias);
if (version_num >= 75) {
// Turing
cutlass_scaled_mm_sm75(c, a, b, a_scales, b_scales, bias);
return;
}
#endif

TORCH_CHECK_NOT_IMPLEMENTED(
Expand Down

0 comments on commit b81a246

Please sign in to comment.