From 3ecd33e29aa6dc5da5581567a7738a714d297693 Mon Sep 17 00:00:00 2001
From: benchislett <chislett.ben@gmail.com>
Date: Mon, 19 Aug 2019 10:19:46 +0900
Subject: [PATCH 1/4] Add some more complex operations

---
 src/device/cuda/math.jl | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/device/cuda/math.jl b/src/device/cuda/math.jl
index 0c03c5af..cf2a3e2d 100644
--- a/src/device/cuda/math.jl
+++ b/src/device/cuda/math.jl
@@ -103,6 +103,9 @@
 @inline ldexp(x::Float64, y::Int32) = @wrap __nv_ldexp(x::double, y::i32)::double
 @inline ldexp(x::Float32, y::Int32) = @wrap __nv_ldexpf(x::float, y::i32)::float
 
+@inline exp(x::Complex{Float64}) = exp(x.re) * (cos(x.im) + 1.0im * sin(x.im))
+@inline exp(x::Complex{Float32}) = exp(x.re) * (cos(x.im) + 1.0im * sin(x.im))
+@inline exp_fast(x::Complex{Float32}) = exp_fast(x.re) * (cos_fast(x.im) + 1.0im * sin_fast(x.im))
 
 ## error
 
@@ -170,6 +173,8 @@
 @inline abs(f::Float32) = @wrap __nv_fabsf(f::float)::float
 @inline abs(x::Int64) =   @wrap __nv_llabs(x::i64)::i64
 
+@inline abs(x::Complex{Float64}) = hypot(x.re, x.im)
+@inline abs(x::Complex{Float32}) = hypot(x.re, x.im)
 
 ## roots and powers
 
@@ -192,6 +197,9 @@
 @inline pow(x::Float32, y::Int32) =   @wrap __nv_powif(x::float, y::i32)::float
 @inline pow(x::Union{Float32, Float64}, y::Int64) = pow(x, Int32(y))
 
+@inline abs2(x::Complex{Float64}) = x.re * x.re + x.im * x.im
+@inline abs2(x::Complex{Float32}) = x.re * x.re + x.im * x.im
+
 ## rounding and selection
 
 # TODO: differentiate in return type, map correctly

From 323aa8f7c4a0a55f514b10996583427f6af6bf62 Mon Sep 17 00:00:00 2001
From: Filippo Vicentini <filippovicentini@gmail.com>
Date: Sat, 28 Sep 2019 14:42:59 +0200
Subject: [PATCH 2/4] add complex angle and logarithm functions

---
 src/device/cuda/math.jl | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/device/cuda/math.jl b/src/device/cuda/math.jl
index cf2a3e2d..9a9635b2 100644
--- a/src/device/cuda/math.jl
+++ b/src/device/cuda/math.jl
@@ -32,9 +32,14 @@
 @inline atan(x::Float64) = @wrap __nv_atan(x::double)::double
 @inline atan(x::Float32) = @wrap __nv_atanf(x::float)::float
 
+# ! CUDAnative.atan2 is equivalent to Base.atan
 @inline atan2(x::Float64, y::Float64) = @wrap __nv_atan2(x::double, y::double)::double
 @inline atan2(x::Float32, y::Float32) = @wrap __nv_atan2f(x::float, y::float)::float
 
+@inline angle(x::ComplexF64) = atan2(x.re, y.im)
+@inline angle(x::ComplexF32) = atan2(x.re, y.im)
+@inline angle(x::Float64) = 0.0
+@inline angle(x::Float32) = 0.0
 
 ## hyperbolic
 
@@ -66,6 +71,10 @@
 @inline log(x::Float32) = @wrap __nv_logf(x::float)::float
 @inline log_fast(x::Float32) = @wrap __nv_fast_logf(x::float)::float
 
+@inline log(x::ComplexF64) = log(abs(x)) + im * angle(x)
+@inline log(x::ComplexF32) = log(abs(x)) + im * angle(x)
+@inline log_fast(x::ComplexF32) = log_fast(abs(x)) + im * angle(x)
+
 @inline log10(x::Float64) = @wrap __nv_log10(x::double)::double
 @inline log10(x::Float32) = @wrap __nv_log10f(x::float)::float
 @inline log10_fast(x::Float32) = @wrap __nv_fast_log10f(x::float)::float

From 345956cba6528702ebd06ca7c261b3296f1d0dad Mon Sep 17 00:00:00 2001
From: Filippo Vicentini <filippovicentini@gmail.com>
Date: Sat, 28 Sep 2019 15:25:47 +0200
Subject: [PATCH 3/4] Add tests for all those complex functions (and a bunch of
 real ones)

---
 src/device/cuda/math.jl |   8 +--
 test/device/cuda.jl     | 122 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 125 insertions(+), 5 deletions(-)

diff --git a/src/device/cuda/math.jl b/src/device/cuda/math.jl
index 9a9635b2..ae3ae02a 100644
--- a/src/device/cuda/math.jl
+++ b/src/device/cuda/math.jl
@@ -36,10 +36,10 @@
 @inline atan2(x::Float64, y::Float64) = @wrap __nv_atan2(x::double, y::double)::double
 @inline atan2(x::Float32, y::Float32) = @wrap __nv_atan2f(x::float, y::float)::float
 
-@inline angle(x::ComplexF64) = atan2(x.re, y.im)
-@inline angle(x::ComplexF32) = atan2(x.re, y.im)
-@inline angle(x::Float64) = 0.0
-@inline angle(x::Float32) = 0.0
+@inline angle(x::ComplexF64) = atan2(x.im, x.re)
+@inline angle(x::ComplexF32) = atan2(x.im, x.re)
+@inline angle(x::Float64) = signbit(x) * -3.141592653589793
+@inline angle(x::Float32) = signbit(x) * -3.1415927f0
 
 ## hyperbolic
 
diff --git a/test/device/cuda.jl b/test/device/cuda.jl
index 29a34566..c4011c9c 100644
--- a/test/device/cuda.jl
+++ b/test/device/cuda.jl
@@ -99,9 +99,129 @@ end
         val = Array(buf)
         @test val[] ≈ x^y
     end
-end
 
 
+    @testset "angle" begin
+        buf  = CuTestArray(zeros(Float32))
+        cbuf = CuTestArray(zeros(Float32))
+
+        function cuda_kernel(a, x)
+            a[] = CUDAnative.angle(x)
+            return
+        end
+
+        #op(::Float32)
+        x   = rand(Float32)
+        @cuda cuda_kernel(buf, x)
+        val = Array(buf)
+        @test val[] ≈ angle(x)
+        @cuda cuda_kernel(buf, -x)
+        val = Array(buf)
+        @test val[] ≈ angle(-x)
+
+        #op(::ComplexF32)
+        x   = rand(ComplexF32)
+        @cuda cuda_kernel(cbuf, x)
+        val = Array(cbuf)
+        @test val[] ≈ angle(x)
+        @cuda cuda_kernel(cbuf, -x)
+        val = Array(cbuf)
+        @test val[] ≈ angle(-x)
+
+        #op(::Float64)
+        x   = rand(Float64)
+        @cuda cuda_kernel(buf, x)
+        val = Array(buf)
+        @test val[] ≈ angle(x)
+        @cuda cuda_kernel(buf, -x)
+        val = Array(buf)
+        @test val[] ≈ angle(-x)
+
+        #op(::ComplexF64)
+        x   = rand(ComplexF64)
+        @cuda cuda_kernel(cbuf, x)
+        val = Array(cbuf)
+        @test val[] ≈ angle(x)
+        @cuda cuda_kernel(cbuf, -x)
+        val = Array(cbuf)
+        @test val[] ≈ angle(-x)
+    end
+
+    # dictionary of key=>tuple, where the tuple should
+    # contain the cpu command and the cuda function to test.
+    ops = Dict("exp"=>(exp, CUDAnative.exp),
+               "angle"=>(angle, CUDAnative.angle),
+               "exp2"=>(exp2, CUDAnative.exp2),
+               "exp10"=>(exp10, CUDAnative.exp10),
+               "expm1"=>(expm1, CUDAnative.expm1))
+
+    @testset "$key" for key=keys(ops)
+        cpu_op, cuda_op = ops[key]
+
+        buf = CuTestArray(zeros(Float32))
+
+        function cuda_kernel(a, x)
+            a[] = cuda_op(x)
+            return
+        end
+
+        #op(::Float32)
+        x   = rand(Float32)
+        @cuda cuda_kernel(buf, x)
+        val = Array(buf)
+        @test val[] ≈ cpu_op(x)
+        @cuda cuda_kernel(buf, -x)
+        val = Array(buf)
+        @test val[] ≈ cpu_op(-x)
+
+        #op(::Float64)
+        x   = rand(Float64)
+        @cuda cuda_kernel(buf, x)
+        val = Array(buf)
+        @test val[] ≈ cpu_op(x)
+        @cuda cuda_kernel(buf, -x)
+        val = Array(buf)
+        @test val[] ≈ cpu_op(-x)
+    end
+
+    # dictionary of key=>tuple, where the tuple should
+    # contain the cpu command and the cuda function to test.
+    ops = Dict("exp"=>(exp, CUDAnative.exp),
+               "abs"=>(abs, CUDAnative.abs),
+               "abs2"=>(abs2, CUDAnative.abs2),
+               "angle"=>(angle, CUDAnative.angle),
+               "log"=>(log, CUDAnative.log))
+
+    @testset "Complex - $key" for key=keys(ops)
+        cpu_op, cuda_op = ops[key]
+
+        buf = CuTestArray(zeros(Complex{Float32}))
+
+        function cuda_kernel(a, x)
+            a[] = cuda_op(x)
+            return
+        end
+
+        #op(::ComplexF32, ::ComplexF32)
+        x   = rand(ComplexF32)
+        @cuda cuda_kernel(buf, x)
+        val = Array(buf)
+        @test val[] ≈ cpu_op(x)
+        @cuda cuda_kernel(buf, -x)
+        val = Array(buf)
+        @test val[] ≈ cpu_op(-x)
+
+        #op(::ComplexF64, ::ComplexF64)
+        x   = rand(ComplexF64)
+        @cuda cuda_kernel(buf, x)
+        val = Array(buf)
+        @test val[] ≈ cpu_op(x)
+        @cuda cuda_kernel(buf, -x)
+        val = Array(buf)
+        @test val[] ≈ cpu_op(-x)
+    end
+end
+
 
 ############################################################################################
 

From 0a6fe2634a49a96b15c237f8ce9f0d568a30ebb5 Mon Sep 17 00:00:00 2001
From: Filippo Vicentini <filippovicentini@gmail.com>
Date: Sat, 28 Sep 2019 15:38:31 +0200
Subject: [PATCH 4/4] Fix angle for real numbers

---
 src/device/cuda/math.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/device/cuda/math.jl b/src/device/cuda/math.jl
index 37e3571b..5b5c08e4 100644
--- a/src/device/cuda/math.jl
+++ b/src/device/cuda/math.jl
@@ -38,8 +38,8 @@
 
 @inline angle(x::ComplexF64) = atan2(x.im, x.re)
 @inline angle(x::ComplexF32) = atan2(x.im, x.re)
-@inline angle(x::Float64) = signbit(x) * -3.141592653589793
-@inline angle(x::Float32) = signbit(x) * -3.1415927f0
+@inline angle(x::Float64) = signbit(x) * 3.141592653589793
+@inline angle(x::Float32) = signbit(x) * 3.1415927f0
 
 ## hyperbolic