Skip to content
This repository has been archived by the owner on May 27, 2021. It is now read-only.

Commit

Permalink
Merge #466
Browse files Browse the repository at this point in the history
466: Add some more complex operations - Take 2 r=maleadt a=PhilipVinc

This is the commit from @benchislett in PR #445, with the addition of the `angle` and `log` operations and several tests for all functions included in the PR.

@benchislett If you prefer to take my commits into your PR, please feel free to do it. I'm doing this just because I have some urgency in having those merged.

On a side note:
I would love to add more functions (notably, `log1p` and `expm1`, `sort`) but I am not sure how to do this. Giving a look at [thrust](https://github.com/thrust/thrust/blob/7df7efe3542a0ab549530bc478467320467e0094/thrust/detail/complex/csqrt.h) they have a bunch of if/else logic like in base Julia. Is this even a good thing to do on the GPU?

cc @maleadt 

Co-authored-by: benchislett <[email protected]>
Co-authored-by: Filippo Vicentini <[email protected]>
  • Loading branch information
3 people authored Oct 2, 2019
2 parents a4a56bd + 0a6fe26 commit d98e60e
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 1 deletion.
17 changes: 17 additions & 0 deletions src/device/cuda/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,14 @@
@inline atan(x::Float64) = @wrap __nv_atan(x::double)::double
@inline atan(x::Float32) = @wrap __nv_atanf(x::float)::float

# ! CUDAnative.atan2 is equivalent to Base.atan
@inline atan2(x::Float64, y::Float64) = @wrap __nv_atan2(x::double, y::double)::double
@inline atan2(x::Float32, y::Float32) = @wrap __nv_atan2f(x::float, y::float)::float

@inline angle(x::ComplexF64) = atan2(x.im, x.re)
@inline angle(x::ComplexF32) = atan2(x.im, x.re)
@inline angle(x::Float64) = signbit(x) * 3.141592653589793
@inline angle(x::Float32) = signbit(x) * 3.1415927f0

## hyperbolic

Expand Down Expand Up @@ -66,6 +71,10 @@
@inline log(x::Float32) = @wrap __nv_logf(x::float)::float
@inline log_fast(x::Float32) = @wrap __nv_fast_logf(x::float)::float

@inline log(x::ComplexF64) = log(abs(x)) + im * angle(x)
@inline log(x::ComplexF32) = log(abs(x)) + im * angle(x)
@inline log_fast(x::ComplexF32) = log_fast(abs(x)) + im * angle(x)

@inline log10(x::Float64) = @wrap __nv_log10(x::double)::double
@inline log10(x::Float32) = @wrap __nv_log10f(x::float)::float
@inline log10_fast(x::Float32) = @wrap __nv_fast_log10f(x::float)::float
Expand Down Expand Up @@ -103,6 +112,9 @@
@inline ldexp(x::Float64, y::Int32) = @wrap __nv_ldexp(x::double, y::i32)::double
@inline ldexp(x::Float32, y::Int32) = @wrap __nv_ldexpf(x::float, y::i32)::float

@inline exp(x::Complex{Float64}) = exp(x.re) * (cos(x.im) + 1.0im * sin(x.im))
@inline exp(x::Complex{Float32}) = exp(x.re) * (cos(x.im) + 1.0im * sin(x.im))
@inline exp_fast(x::Complex{Float32}) = exp_fast(x.re) * (cos_fast(x.im) + 1.0im * sin_fast(x.im))

## error

Expand Down Expand Up @@ -170,6 +182,8 @@
@inline abs(f::Float32) = @wrap __nv_fabsf(f::float)::float
@inline abs(x::Int64) = @wrap __nv_llabs(x::i64)::i64

@inline abs(x::Complex{Float64}) = hypot(x.re, x.im)
@inline abs(x::Complex{Float32}) = hypot(x.re, x.im)

## roots and powers

Expand All @@ -192,6 +206,9 @@
@inline pow(x::Float32, y::Int32) = @wrap __nv_powif(x::float, y::i32)::float
@inline pow(x::Union{Float32, Float64}, y::Int64) = pow(x, Int32(y))

@inline abs2(x::Complex{Float64}) = x.re * x.re + x.im * x.im
@inline abs2(x::Complex{Float32}) = x.re * x.re + x.im * x.im

## rounding and selection

# TODO: differentiate in return type, map correctly
Expand Down
121 changes: 120 additions & 1 deletion test/device/cuda.jl
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,127 @@ end
Tuple{Float64}, optimize=false)[1])
@test !occursin("Float32", codeinfo_str)
end
end

@testset "angle" begin
buf = CuTestArray(zeros(Float32))
cbuf = CuTestArray(zeros(Float32))

function cuda_kernel(a, x)
a[] = CUDAnative.angle(x)
return
end

#op(::Float32)
x = rand(Float32)
@cuda cuda_kernel(buf, x)
val = Array(buf)
@test val[] angle(x)
@cuda cuda_kernel(buf, -x)
val = Array(buf)
@test val[] angle(-x)

#op(::ComplexF32)
x = rand(ComplexF32)
@cuda cuda_kernel(cbuf, x)
val = Array(cbuf)
@test val[] angle(x)
@cuda cuda_kernel(cbuf, -x)
val = Array(cbuf)
@test val[] angle(-x)

#op(::Float64)
x = rand(Float64)
@cuda cuda_kernel(buf, x)
val = Array(buf)
@test val[] angle(x)
@cuda cuda_kernel(buf, -x)
val = Array(buf)
@test val[] angle(-x)

#op(::ComplexF64)
x = rand(ComplexF64)
@cuda cuda_kernel(cbuf, x)
val = Array(cbuf)
@test val[] angle(x)
@cuda cuda_kernel(cbuf, -x)
val = Array(cbuf)
@test val[] angle(-x)
end

# dictionary of key=>tuple, where the tuple should
# contain the cpu command and the cuda function to test.
ops = Dict("exp"=>(exp, CUDAnative.exp),
"angle"=>(angle, CUDAnative.angle),
"exp2"=>(exp2, CUDAnative.exp2),
"exp10"=>(exp10, CUDAnative.exp10),
"expm1"=>(expm1, CUDAnative.expm1))

@testset "$key" for key=keys(ops)
cpu_op, cuda_op = ops[key]

buf = CuTestArray(zeros(Float32))

function cuda_kernel(a, x)
a[] = cuda_op(x)
return
end

#op(::Float32)
x = rand(Float32)
@cuda cuda_kernel(buf, x)
val = Array(buf)
@test val[] cpu_op(x)
@cuda cuda_kernel(buf, -x)
val = Array(buf)
@test val[] cpu_op(-x)

#op(::Float64)
x = rand(Float64)
@cuda cuda_kernel(buf, x)
val = Array(buf)
@test val[] cpu_op(x)
@cuda cuda_kernel(buf, -x)
val = Array(buf)
@test val[] cpu_op(-x)
end

# dictionary of key=>tuple, where the tuple should
# contain the cpu command and the cuda function to test.
ops = Dict("exp"=>(exp, CUDAnative.exp),
"abs"=>(abs, CUDAnative.abs),
"abs2"=>(abs2, CUDAnative.abs2),
"angle"=>(angle, CUDAnative.angle),
"log"=>(log, CUDAnative.log))

@testset "Complex - $key" for key=keys(ops)
cpu_op, cuda_op = ops[key]

buf = CuTestArray(zeros(Complex{Float32}))

function cuda_kernel(a, x)
a[] = cuda_op(x)
return
end

#op(::ComplexF32, ::ComplexF32)
x = rand(ComplexF32)
@cuda cuda_kernel(buf, x)
val = Array(buf)
@test val[] cpu_op(x)
@cuda cuda_kernel(buf, -x)
val = Array(buf)
@test val[] cpu_op(-x)

#op(::ComplexF64, ::ComplexF64)
x = rand(ComplexF64)
@cuda cuda_kernel(buf, x)
val = Array(buf)
@test val[] cpu_op(x)
@cuda cuda_kernel(buf, -x)
val = Array(buf)
@test val[] cpu_op(-x)
end
end


############################################################################################
Expand Down

0 comments on commit d98e60e

Please sign in to comment.