Skip to content

Commit

Permalink
Merge branch 'fpga_codegen_fix' of https://github.com/manuelburger/dace
Browse files Browse the repository at this point in the history
… into fpga_codegen_fix

    # Please enter a commit message to explain why this merge is necessary,
  • Loading branch information
manuelburger committed Mar 23, 2020
2 parents 834c410 + 5e9ca83 commit 04fa088
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 0 deletions.
40 changes: 40 additions & 0 deletions dace/runtime/include/dace/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,46 @@ static DACE_CONSTEXPR DACE_HDFI int sgn(T val) {
return (T(0) < val) - (val < T(0));
}


// Workarounds for float16 in CUDA
// NOTES: * Half precision types are not trivially convertible, so other types
// will be implicitly converted to it in min/max.
// * half comparisons are designated "device-only", so they must call
// device-only functions as well.
#ifdef __CUDACC__
template <typename... Ts>
DACE_CONSTEXPR __device__ __forceinline__ dace::float16 min(const dace::float16& a, const dace::float16& b, const Ts&... c)
{
return (a < b) ? min(a, c...) : min(b, c...);
}
template <typename T, typename... Ts>
DACE_CONSTEXPR __device__ __forceinline__ dace::float16 min(const dace::float16& a, const T& b, const Ts&... c)
{
return (a < dace::float16(b)) ? min(a, c...) : min(dace::float16(b), c...);
}
template <typename T, typename... Ts>
DACE_CONSTEXPR __device__ __forceinline__ dace::float16 min(const T& a, const dace::float16& b, const Ts&... c)
{
return (dace::float16(a) < b) ? min(dace::float16(a), c...) : min(b, c...);
}
template <typename... Ts>
DACE_CONSTEXPR __device__ __forceinline__ dace::float16 max(const dace::float16& a, const dace::float16& b, const Ts&... c)
{
return (a > b) ? max(a, c...) : max(b, c...);
}
template <typename T, typename... Ts>
DACE_CONSTEXPR __device__ __forceinline__ dace::float16 max(const dace::float16& a, const T& b, const Ts&... c)
{
return (a > dace::float16(b)) ? max(a, c...) : max(dace::float16(b), c...);
}
template <typename T, typename... Ts>
DACE_CONSTEXPR __device__ __forceinline__ dace::float16 max(const T& a, const dace::float16& b, const Ts&... c)
{
return (dace::float16(a) > b) ? max(dace::float16(a), c...) : max(b, c...);
}
#endif


#ifndef DACE_SYNTHESIS

// Computes integer floor, rounding the remainder towards negative infinity.
Expand Down
69 changes: 69 additions & 0 deletions tests/half_cudatest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
""" Tests for half-precision syntax quirks. """

import dace
import numpy as np

N = dace.symbol('N')


def test_relu():
@dace.program
def halftest(A: dace.float16[N]):
out = np.ndarray([N], dace.float16)
for i in dace.map[0:N]:
with dace.tasklet:
a << A[i]
o >> out[i]
o = a if a > dace.float16(0) else dace.float16(0)
return out

A = np.random.rand(20).astype(np.float16)
sdfg = halftest.to_sdfg()
sdfg.apply_gpu_transformations()
out = sdfg(A=A, N=20)
assert np.allclose(out, np.maximum(A, 0))


def test_relu_2():
@dace.program
def halftest(A: dace.float16[N]):
out = np.ndarray([N], dace.float16)
for i in dace.map[0:N]:
with dace.tasklet:
a << A[i]
o >> out[i]
o = max(a, 0)
return out

A = np.random.rand(20).astype(np.float16)
sdfg = halftest.to_sdfg()
sdfg.apply_gpu_transformations()
out = sdfg(A=A, N=20)
assert np.allclose(out, np.maximum(A, 0))


def test_dropout():
@dace.program
def halftest(A: dace.float16[N], mask: dace.int32[N]):
out = np.ndarray([N], dace.float16)
for i in dace.map[0:N]:
with dace.tasklet:
a << A[i]
d << mask[i]
o >> out[i]
#o = a * dace.float16(d)
o = a if d else dace.float16(0)
return out

A = np.random.rand(20).astype(np.float16)
mask = np.random.randint(0, 2, size=[20]).astype(np.int32)
sdfg = halftest.to_sdfg()
sdfg.apply_gpu_transformations()
out = sdfg(A=A, mask=mask, N=20)
assert np.allclose(out, A * mask)


if __name__ == '__main__':
test_relu()
test_relu_2()
test_dropout()

0 comments on commit 04fa088

Please sign in to comment.