-
Notifications
You must be signed in to change notification settings - Fork 3.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
compilation error on aarch64 #1018
Comments
Referencing ggerganov/llama.cpp#1455 and other issues related to aarch64, I made the following changes: diff --git a/Makefile b/Makefile
index 7bb7e31..837005e 100644
--- a/Makefile
+++ b/Makefile
@@ -160,12 +160,12 @@ ifdef WHISPER_OPENBLAS
endif
ifdef WHISPER_CUBLAS
- CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
- CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
+ CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/aarch64-linux/include
+ CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/aarch64-linux/include
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
WHISPER_OBJ += ggml-cuda.o
NVCC = nvcc
- NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native
+ NVCCFLAGS = --forward-unknown-to-host-compiler -arch=sm_53
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@
diff --git a/ggml.h b/ggml.h
index 51a616c..dae3b9d 100644
--- a/ggml.h
+++ b/ggml.h
@@ -212,9 +212,11 @@
extern "C" {
#endif
-#ifdef __ARM_NEON
+#if defined(__ARM_NEON) && !defined(__CUDACC__)
// we use the built-in 16-bit float type
typedef __fp16 ggml_fp16_t;
+#elif defined(__ARM_NEON) && defined(__CUDACC__)
+ typedef half ggml_fp16_t;
#else
typedef uint16_t ggml_fp16_t;
#endif However, this leads to new errors:
I spent a long time hamfisting CUDA docs to see if I could add those missing errors, but the error at line 676 consistently failed (either at compile time or at runtime) even after trying to hack in those definitions. I'm at a loss from here, but I'm pretty sure part of the error is that the Jetson Nano Devkit/Tegra X1 is on CUDA 10.2 but the Orins people are using successfully are on CUDA 11 or later. |
@paulcombe For Jetson Nano, also make following changes: diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 50df20e..50fc308 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -1855,7 +1855,7 @@ void ggml_init_cublas() {
// create cublas handle
CUBLAS_CHECK(cublasCreate(&g_cublas_handles[id]));
- CUBLAS_CHECK(cublasSetMathMode(g_cublas_handles[id], CUBLAS_TF32_TENSOR_OP_MATH));
+ CUBLAS_CHECK(cublasSetMathMode(g_cublas_handles[id], CUBLAS_TENSOR_OP_MATH));
}
// configure logging to stdout
@@ -2375,7 +2375,7 @@ static void ggml_cuda_op(const ggml_tensor * src0, const ggml_tensor * src1, ggm
// wait for main GPU data if necessary
if (split && id != g_main_device) {
- CUDA_CHECK(cudaStreamWaitEvent(cudaStream_main, src0_extra->events[g_main_device]));
+ CUDA_CHECK(cudaStreamWaitEvent(cudaStream_main, src0_extra->events[g_main_device],0));
}
if (src0_on_device && src0_is_contiguous) {
@@ -2577,7 +2577,7 @@ static void ggml_cuda_op(const ggml_tensor * src0, const ggml_tensor * src1, ggm
CUDA_CHECK(cudaSetDevice(g_main_device));
for (int id = 0; id < g_device_count; ++id) {
if (id != g_main_device) {
- CUDA_CHECK(cudaStreamWaitEvent(g_cudaStreams_main[g_main_device], src0_extra->events[id]));
+ CUDA_CHECK(cudaStreamWaitEvent(g_cudaStreams_main[g_main_device], src0_extra->events[id],0));
}
}
} |
I confirmed that with the modifications suggested above, that I've applied to b948361,
We've verified using jtop that
|
@paulcombe Edit: I commented out
and it worked. |
I am trying to compile whisper.cpp on jetson nx device (aarch64).
When I compile the make file I am erroring out with following:
Anything you can suggeest to get past this?
The text was updated successfully, but these errors were encountered: