-
Notifications
You must be signed in to change notification settings - Fork 50
/
test_vector_add.py
executable file
·38 lines (30 loc) · 1.06 KB
/
test_vector_add.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python
"""Minimal example for a CUDA Kernel unit test with the Kernel Tuner"""
import numpy
from kernel_tuner import run_kernel
import pytest
def test_vector_add():
#Check pycuda is installed and if a CUDA capable device is present, if not skip the test
try:
import pycuda.driver as drv
drv.init()
except (ImportError, Exception):
pytest.skip("PyCuda not installed or no CUDA device detected")
kernel_string = """
__global__ void vector_add(float *c, float *a, float *b, int n) {
int i = blockIdx.x * block_size_x + threadIdx.x;
if (i<n) {
c[i] = a[i] + b[i];
}
}
"""
size = 10000000
problem_size = (size, 1)
a = numpy.random.randn(size).astype(numpy.float32)
b = numpy.random.randn(size).astype(numpy.float32)
c = numpy.zeros_like(b)
n = numpy.int32(size)
args = [c, a, b, n]
params = {"block_size_x": 512}
answer = run_kernel("vector_add", kernel_string, problem_size, args, params)
assert numpy.allclose(answer[0], a+b, atol=1e-8)