-
-
Notifications
You must be signed in to change notification settings - Fork 88
/
cuda_sample.cu
41 lines (29 loc) · 1.15 KB
/
cuda_sample.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
// a cuda app. we will convert this to opencl, and run it :-)
#include <iostream>
#include <memory>
using namespace std;
#include <cuda_runtime.h>
__global__ void setValue(float *data, int idx, float value) {
if(threadIdx.x == 0) {
data[idx] = value;
}
}
int main(int argc, char *argv[]) {
int N = 1024;
float *gpuFloats;
cudaMalloc((void**)(&gpuFloats), N * sizeof(float));
setValue<<<dim3(32, 1, 1), dim3(32, 1, 1)>>>(gpuFloats, 2, 123.0f);
float hostFloats[4];
cudaMemcpy(hostFloats, gpuFloats, 4 * sizeof(float), cudaMemcpyDeviceToHost);
cout << "hostFloats[2] " << hostFloats[2] << endl;
setValue<<<dim3(32, 1, 1), dim3(32, 1, 1)>>>(gpuFloats, 2, 222.0f);
cudaMemcpy(hostFloats, gpuFloats, 4 * sizeof(float), cudaMemcpyDeviceToHost);
cout << "hostFloats[2] " << hostFloats[2] << endl;
hostFloats[2] = 444.0f;
cudaMemcpy(gpuFloats, hostFloats, 4 * sizeof(float), cudaMemcpyHostToDevice);
hostFloats[2] = 555.0f;
cudaMemcpy(hostFloats, gpuFloats, 4 * sizeof(float), cudaMemcpyDeviceToHost);
cout << "hostFloats[2] " << hostFloats[2] << endl;
cudaFree(gpuFloats);
return 0;
}