From 3ecd33e29aa6dc5da5581567a7738a714d297693 Mon Sep 17 00:00:00 2001 From: benchislett Date: Mon, 19 Aug 2019 10:19:46 +0900 Subject: [PATCH 1/4] Add some more complex operations --- src/device/cuda/math.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/device/cuda/math.jl b/src/device/cuda/math.jl index 0c03c5af..cf2a3e2d 100644 --- a/src/device/cuda/math.jl +++ b/src/device/cuda/math.jl @@ -103,6 +103,9 @@ @inline ldexp(x::Float64, y::Int32) = @wrap __nv_ldexp(x::double, y::i32)::double @inline ldexp(x::Float32, y::Int32) = @wrap __nv_ldexpf(x::float, y::i32)::float +@inline exp(x::Complex{Float64}) = exp(x.re) * (cos(x.im) + 1.0im * sin(x.im)) +@inline exp(x::Complex{Float32}) = exp(x.re) * (cos(x.im) + 1.0im * sin(x.im)) +@inline exp_fast(x::Complex{Float32}) = exp_fast(x.re) * (cos_fast(x.im) + 1.0im * sin_fast(x.im)) ## error @@ -170,6 +173,8 @@ @inline abs(f::Float32) = @wrap __nv_fabsf(f::float)::float @inline abs(x::Int64) = @wrap __nv_llabs(x::i64)::i64 +@inline abs(x::Complex{Float64}) = hypot(x.re, x.im) +@inline abs(x::Complex{Float32}) = hypot(x.re, x.im) ## roots and powers @@ -192,6 +197,9 @@ @inline pow(x::Float32, y::Int32) = @wrap __nv_powif(x::float, y::i32)::float @inline pow(x::Union{Float32, Float64}, y::Int64) = pow(x, Int32(y)) +@inline abs2(x::Complex{Float64}) = x.re * x.re + x.im * x.im +@inline abs2(x::Complex{Float32}) = x.re * x.re + x.im * x.im + ## rounding and selection # TODO: differentiate in return type, map correctly From 323aa8f7c4a0a55f514b10996583427f6af6bf62 Mon Sep 17 00:00:00 2001 From: Filippo Vicentini Date: Sat, 28 Sep 2019 14:42:59 +0200 Subject: [PATCH 2/4] add complex angle and logarithm functions --- src/device/cuda/math.jl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/device/cuda/math.jl b/src/device/cuda/math.jl index cf2a3e2d..9a9635b2 100644 --- a/src/device/cuda/math.jl +++ b/src/device/cuda/math.jl @@ -32,9 +32,14 @@ @inline atan(x::Float64) = @wrap __nv_atan(x::double)::double @inline atan(x::Float32) = @wrap __nv_atanf(x::float)::float +# ! CUDAnative.atan2 is equivalent to Base.atan @inline atan2(x::Float64, y::Float64) = @wrap __nv_atan2(x::double, y::double)::double @inline atan2(x::Float32, y::Float32) = @wrap __nv_atan2f(x::float, y::float)::float +@inline angle(x::ComplexF64) = atan2(x.re, y.im) +@inline angle(x::ComplexF32) = atan2(x.re, y.im) +@inline angle(x::Float64) = 0.0 +@inline angle(x::Float32) = 0.0 ## hyperbolic @@ -66,6 +71,10 @@ @inline log(x::Float32) = @wrap __nv_logf(x::float)::float @inline log_fast(x::Float32) = @wrap __nv_fast_logf(x::float)::float +@inline log(x::ComplexF64) = log(abs(x)) + im * angle(x) +@inline log(x::ComplexF32) = log(abs(x)) + im * angle(x) +@inline log_fast(x::ComplexF32) = log_fast(abs(x)) + im * angle(x) + @inline log10(x::Float64) = @wrap __nv_log10(x::double)::double @inline log10(x::Float32) = @wrap __nv_log10f(x::float)::float @inline log10_fast(x::Float32) = @wrap __nv_fast_log10f(x::float)::float From 345956cba6528702ebd06ca7c261b3296f1d0dad Mon Sep 17 00:00:00 2001 From: Filippo Vicentini Date: Sat, 28 Sep 2019 15:25:47 +0200 Subject: [PATCH 3/4] Add tests for all those complex functions (and a bunch of real ones) --- src/device/cuda/math.jl | 8 +-- test/device/cuda.jl | 122 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 125 insertions(+), 5 deletions(-) diff --git a/src/device/cuda/math.jl b/src/device/cuda/math.jl index 9a9635b2..ae3ae02a 100644 --- a/src/device/cuda/math.jl +++ b/src/device/cuda/math.jl @@ -36,10 +36,10 @@ @inline atan2(x::Float64, y::Float64) = @wrap __nv_atan2(x::double, y::double)::double @inline atan2(x::Float32, y::Float32) = @wrap __nv_atan2f(x::float, y::float)::float -@inline angle(x::ComplexF64) = atan2(x.re, y.im) -@inline angle(x::ComplexF32) = atan2(x.re, y.im) -@inline angle(x::Float64) = 0.0 -@inline angle(x::Float32) = 0.0 +@inline angle(x::ComplexF64) = atan2(x.im, x.re) +@inline angle(x::ComplexF32) = atan2(x.im, x.re) +@inline angle(x::Float64) = signbit(x) * -3.141592653589793 +@inline angle(x::Float32) = signbit(x) * -3.1415927f0 ## hyperbolic diff --git a/test/device/cuda.jl b/test/device/cuda.jl index 29a34566..c4011c9c 100644 --- a/test/device/cuda.jl +++ b/test/device/cuda.jl @@ -99,9 +99,129 @@ end val = Array(buf) @test val[] ≈ x^y end -end + @testset "angle" begin + buf = CuTestArray(zeros(Float32)) + cbuf = CuTestArray(zeros(Float32)) + + function cuda_kernel(a, x) + a[] = CUDAnative.angle(x) + return + end + + #op(::Float32) + x = rand(Float32) + @cuda cuda_kernel(buf, x) + val = Array(buf) + @test val[] ≈ angle(x) + @cuda cuda_kernel(buf, -x) + val = Array(buf) + @test val[] ≈ angle(-x) + + #op(::ComplexF32) + x = rand(ComplexF32) + @cuda cuda_kernel(cbuf, x) + val = Array(cbuf) + @test val[] ≈ angle(x) + @cuda cuda_kernel(cbuf, -x) + val = Array(cbuf) + @test val[] ≈ angle(-x) + + #op(::Float64) + x = rand(Float64) + @cuda cuda_kernel(buf, x) + val = Array(buf) + @test val[] ≈ angle(x) + @cuda cuda_kernel(buf, -x) + val = Array(buf) + @test val[] ≈ angle(-x) + + #op(::ComplexF64) + x = rand(ComplexF64) + @cuda cuda_kernel(cbuf, x) + val = Array(cbuf) + @test val[] ≈ angle(x) + @cuda cuda_kernel(cbuf, -x) + val = Array(cbuf) + @test val[] ≈ angle(-x) + end + + # dictionary of key=>tuple, where the tuple should + # contain the cpu command and the cuda function to test. + ops = Dict("exp"=>(exp, CUDAnative.exp), + "angle"=>(angle, CUDAnative.angle), + "exp2"=>(exp2, CUDAnative.exp2), + "exp10"=>(exp10, CUDAnative.exp10), + "expm1"=>(expm1, CUDAnative.expm1)) + + @testset "$key" for key=keys(ops) + cpu_op, cuda_op = ops[key] + + buf = CuTestArray(zeros(Float32)) + + function cuda_kernel(a, x) + a[] = cuda_op(x) + return + end + + #op(::Float32) + x = rand(Float32) + @cuda cuda_kernel(buf, x) + val = Array(buf) + @test val[] ≈ cpu_op(x) + @cuda cuda_kernel(buf, -x) + val = Array(buf) + @test val[] ≈ cpu_op(-x) + + #op(::Float64) + x = rand(Float64) + @cuda cuda_kernel(buf, x) + val = Array(buf) + @test val[] ≈ cpu_op(x) + @cuda cuda_kernel(buf, -x) + val = Array(buf) + @test val[] ≈ cpu_op(-x) + end + + # dictionary of key=>tuple, where the tuple should + # contain the cpu command and the cuda function to test. + ops = Dict("exp"=>(exp, CUDAnative.exp), + "abs"=>(abs, CUDAnative.abs), + "abs2"=>(abs2, CUDAnative.abs2), + "angle"=>(angle, CUDAnative.angle), + "log"=>(log, CUDAnative.log)) + + @testset "Complex - $key" for key=keys(ops) + cpu_op, cuda_op = ops[key] + + buf = CuTestArray(zeros(Complex{Float32})) + + function cuda_kernel(a, x) + a[] = cuda_op(x) + return + end + + #op(::ComplexF32, ::ComplexF32) + x = rand(ComplexF32) + @cuda cuda_kernel(buf, x) + val = Array(buf) + @test val[] ≈ cpu_op(x) + @cuda cuda_kernel(buf, -x) + val = Array(buf) + @test val[] ≈ cpu_op(-x) + + #op(::ComplexF64, ::ComplexF64) + x = rand(ComplexF64) + @cuda cuda_kernel(buf, x) + val = Array(buf) + @test val[] ≈ cpu_op(x) + @cuda cuda_kernel(buf, -x) + val = Array(buf) + @test val[] ≈ cpu_op(-x) + end +end + ############################################################################################ From 0a6fe2634a49a96b15c237f8ce9f0d568a30ebb5 Mon Sep 17 00:00:00 2001 From: Filippo Vicentini Date: Sat, 28 Sep 2019 15:38:31 +0200 Subject: [PATCH 4/4] Fix angle for real numbers --- src/device/cuda/math.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/device/cuda/math.jl b/src/device/cuda/math.jl index 37e3571b..5b5c08e4 100644 --- a/src/device/cuda/math.jl +++ b/src/device/cuda/math.jl @@ -38,8 +38,8 @@ @inline angle(x::ComplexF64) = atan2(x.im, x.re) @inline angle(x::ComplexF32) = atan2(x.im, x.re) -@inline angle(x::Float64) = signbit(x) * -3.141592653589793 -@inline angle(x::Float32) = signbit(x) * -3.1415927f0 +@inline angle(x::Float64) = signbit(x) * 3.141592653589793 +@inline angle(x::Float32) = signbit(x) * 3.1415927f0 ## hyperbolic