-
Notifications
You must be signed in to change notification settings - Fork 7
/
basic_memcpy.cu
43 lines (32 loc) · 1.17 KB
/
basic_memcpy.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#include <cstdio>
__global__ void addOne(int *array, int size) {
if (threadIdx.x < size)
array[threadIdx.x] += 1;
}
int main(int argc, char *argv[]) {
// Allocate 32 integer array of paged memory
int numElements = 32;
int *h_array = (int *) malloc(sizeof(int)*numElements);
// Alternative: page-locked memory
// int *h_array;
// cudaMallocHost(&h_array, sizeof(int)*numElements);
// Allocate 32 integer array of device memory
int *d_array;
cudaMalloc(&d_array, sizeof(int)*numElements);
// Initialize the array with elements 0, 1, ..., n-1
for (int i = 0; i < numElements; i++)
h_array[i] = i;
printf("Initial array contents: ");
for (int i = 0; i < numElements; i++)
printf("%d ",h_array[i]);
printf("\n");
cudaMemcpy(d_array, h_array, sizeof(int)*numElements, cudaMemcpyHostToDevice);
addOne<<<1,numElements>>>(d_array, numElements);
cudaMemcpy(h_array, d_array, sizeof(int)*numElements, cudaMemcpyDeviceToHost);
printf("Final array contents: ");
for (int i = 0; i < numElements; i++)
printf("%d ",h_array[i]);
printf("\n");
free(h_array);
cudaFree(d_array);
}