Skip to content

Commit

Permalink
CUDA: fix tensor core logic for Pascal and HIP
Browse files Browse the repository at this point in the history
  • Loading branch information
JohannesGaessler committed Dec 29, 2023
1 parent 65e5f6d commit 9517fce
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@
// TODO: improve this to be correct for more hardware
// for example, currently fails for GeForce GTX 1660 which is TURING arch (> VOLTA) but does not have tensor cores
// probably other such cases, and not sure what happens on AMD hardware
#if !defined(GGML_CUDA_FORCE_MMQ)
#if !defined(GGML_CUDA_FORCE_MMQ) && !defined(GGML_USE_HIPBLAS)
#define CUDA_USE_TENSOR_CORES
#endif

Expand Down Expand Up @@ -8663,7 +8663,7 @@ static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1
}

#ifdef CUDA_USE_TENSOR_CORES
const bool use_tensor_cores = true;
const bool use_tensor_cores = min_compute_capability >= CC_VOLTA;
#else
const bool use_tensor_cores = false;
#endif
Expand Down Expand Up @@ -8706,7 +8706,7 @@ static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1

// when tensor cores are available, use them for large batch size
// ref: https://github.com/ggerganov/llama.cpp/pull/3776
if (use_tensor_cores && min_compute_capability >= CC_VOLTA && src1->ne[1] > MMQ_MAX_BATCH_SIZE) {
if (use_tensor_cores && src1->ne[1] > MMQ_MAX_BATCH_SIZE) {
use_mul_mat_q = false;
}

Expand Down

0 comments on commit 9517fce

Please sign in to comment.