-
Notifications
You must be signed in to change notification settings - Fork 0
/
cuda_shim.cc
36 lines (29 loc) · 1.25 KB
/
cuda_shim.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#define _GNU_SOURCE
#include <unistd.h>
#include <stdio.h>
#include <dlfcn.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include "kutrace_lib.h"
static cudaError_t(*real_cudaMemcpyAsync)(void*, const void*, size_t, cudaMemcpyKind, cudaStream_t) = NULL;
static cudaError_t(*real_cudaLaunchKernel)(const void*, dim3, dim3, void**, size_t, cudaStream_t) = NULL;
void __attribute__((constructor)) init(void)
{
printf("In the shim init function\n");
real_cudaMemcpyAsync =
reinterpret_cast<cudaError_t(*)( void*, const void*, size_t, cudaMemcpyKind, cudaStream_t)>
(dlsym(RTLD_NEXT, "cudaMemcpyAsync"));
real_cudaLaunchKernel =
reinterpret_cast<cudaError_t(*)(const void*, dim3, dim3, void**, size_t, cudaStream_t)>
(dlsym(RTLD_NEXT, "cudaLaunchKernel"));
}
extern "C" cudaError_t cudaMemcpyAsync ( void* dst, const void* src, size_t count, cudaMemcpyKind kind, cudaStream_t str )
{
printf("Inside cudaMemcpyAsync shim\n");
return real_cudaMemcpyAsync( dst, src, count, kind, str );
}
extern "C" cudaError_t cudaLaunchKernel(const void* func, dim3 gridDim, dim3 blockDim, void** args, size_t sharedMem, cudaStream_t stream)
{
printf("Inside cudaLaunchKernel\n");
return real_cudaLaunchKernel(func, gridDim, blockDim, args, sharedMem, stream);
}