From 8a25bd41b3c1c9b1bdb983e67a22c5b1be32256d Mon Sep 17 00:00:00 2001 From: Kylin Date: Sun, 20 Aug 2023 00:07:50 +0800 Subject: [PATCH 1/2] ggml: support CUDA's half type for aarch64(#1455) support CUDA's half type for aarch64 in ggml_fp16_t definition --- ggml.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ggml.h b/ggml.h index bdbd128004332..413bc1de58525 100644 --- a/ggml.h +++ b/ggml.h @@ -255,9 +255,11 @@ extern "C" { #endif -#ifdef __ARM_NEON +#if defined(__ARM_NEON) && !defined(GGML_CUDA_F16) // we use the built-in 16-bit float type typedef __fp16 ggml_fp16_t; +#elif defined(GGML_CUDA_F16) + typedef half ggml_fp16_t; #else typedef uint16_t ggml_fp16_t; #endif From 16ab5f1b184f90552cc944e82b2f25d68d2ce367 Mon Sep 17 00:00:00 2001 From: Kylin Date: Sun, 20 Aug 2023 01:57:24 +0800 Subject: [PATCH 2/2] ggml: use __CUDACC__ to recognise nvcc compiler --- ggml.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ggml.h b/ggml.h index 413bc1de58525..4fa78a4273d82 100644 --- a/ggml.h +++ b/ggml.h @@ -255,11 +255,10 @@ extern "C" { #endif -#if defined(__ARM_NEON) && !defined(GGML_CUDA_F16) - // we use the built-in 16-bit float type - typedef __fp16 ggml_fp16_t; -#elif defined(GGML_CUDA_F16) +#if defined(__ARM_NEON) && defined(__CUDACC__) typedef half ggml_fp16_t; +#elif defined(__ARM_NEON) + typedef __fp16 ggml_fp16_t; #else typedef uint16_t ggml_fp16_t; #endif