From cf88dfed72f28cde6b24836027c84ae384801dcf Mon Sep 17 00:00:00 2001 From: Anton Smirnov Date: Sat, 6 Jul 2024 16:22:34 +0300 Subject: [PATCH 1/8] Add GPUNumber --- Project.toml | 1 + src/GPUArrays.jl | 3 +++ src/host/gpunumber.jl | 30 ++++++++++++++++++++++++++++++ src/host/indexing.jl | 7 ++++--- src/host/mapreduce.jl | 20 +++++++++++--------- test/testsuite.jl | 3 +++ 6 files changed, 52 insertions(+), 12 deletions(-) create mode 100644 src/host/gpunumber.jl diff --git a/Project.toml b/Project.toml index 93c18549..b0b27643 100644 --- a/Project.toml +++ b/Project.toml @@ -3,6 +3,7 @@ uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" version = "10.2.3" [deps] +AbstractNumbers = "85c772de-338a-5e7f-b815-41e76c26ac1f" Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" diff --git a/src/GPUArrays.jl b/src/GPUArrays.jl index 2d4f1bd9..b1d00d2d 100644 --- a/src/GPUArrays.jl +++ b/src/GPUArrays.jl @@ -1,5 +1,7 @@ module GPUArrays +import AbstractNumbers as AN + using Serialization using Random using LinearAlgebra @@ -25,6 +27,7 @@ include("device/synchronization.jl") # host abstractions include("host/abstractarray.jl") include("host/construction.jl") +include("host/gpunumber.jl") ## integrations and specialized methods include("host/base.jl") include("host/indexing.jl") diff --git a/src/host/gpunumber.jl b/src/host/gpunumber.jl new file mode 100644 index 00000000..5dde4e7b --- /dev/null +++ b/src/host/gpunumber.jl @@ -0,0 +1,30 @@ +# Custom GPU-compatible `Number` interface. +struct GPUNumber{T <: AbstractGPUArray} <: AN.AbstractNumber{T} + val::T + + function GPUNumber(val::T) where T <: AbstractGPUArray + length(val) != 1 && error( + "`GPUNumber` accepts only 1-element GPU arrays, " * + "instead `$(length(val))`-element array was given.") + new{T}(val) + end +end + +AN.number(g::GPUNumber) = @allowscalar g.val[] + +maybe_number(g::GPUNumber) = AN.number(g) +maybe_number(g) = g + +number_type(::GPUNumber{T}) where T = eltype(T) + +# When operations involve other `::Number` types, +# do not convert back to `GPUNumber`. +AN.like(::Type{<: GPUNumber}, x) = x + +# When broadcasting, just pass the array itself. +Base.broadcastable(g::GPUNumber) = g.val + +# Overload to avoid copies. +Base.one(g::GPUNumber) = one(number_type(g)) +Base.zero(g::GPUNumber) = zero(number_type(g)) +Base.identity(g::GPUNumber) = g diff --git a/src/host/indexing.jl b/src/host/indexing.jl index 659fb029..fa4d5f14 100644 --- a/src/host/indexing.jl +++ b/src/host/indexing.jl @@ -204,7 +204,7 @@ function Base.findfirst(f::Function, A::AnyGPUArray) end res = mapreduce((x, y)->(f(x), y), reduction, A, indices; - init = (false, dummy_index)) + init = (false, dummy_index)) |> AN.number if res[1] # out of consistency with Base.findarray, return a CartesianIndex # when the input is a multidimensional array @@ -230,14 +230,15 @@ function findminmax(binop, A::AnyGPUArray; init, dims) end if dims == Colon() - res = mapreduce(tuple, reduction, A, indices; init = (init, dummy_index)) + res = mapreduce(tuple, reduction, A, indices; + init = (init, dummy_index)) |> AN.number # out of consistency with Base.findarray, return a CartesianIndex # when the input is a multidimensional array return (res[1], ndims(A) == 1 ? res[2] : CartesianIndices(A)[res[2]]) else res = mapreduce(tuple, reduction, A, indices; - init = (init, dummy_index), dims=dims) + init = (init, dummy_index), dims=dims) |> maybe_number vals = map(x->x[1], res) inds = map(x->ndims(A) == 1 ? x[2] : CartesianIndices(A)[x[2]], res) return (vals, inds) diff --git a/src/host/mapreduce.jl b/src/host/mapreduce.jl index 32520ebc..5933439a 100644 --- a/src/host/mapreduce.jl +++ b/src/host/mapreduce.jl @@ -68,20 +68,20 @@ function _mapreduce(f::F, op::OP, As::Vararg{Any,N}; dims::D, init) where {F,OP, end if dims === Colon() - @allowscalar R[] + GPUNumber(R) else R end end -Base.any(A::AnyGPUArray{Bool}) = mapreduce(identity, |, A) -Base.all(A::AnyGPUArray{Bool}) = mapreduce(identity, &, A) +Base.any(A::AnyGPUArray{Bool}) = mapreduce(identity, |, A) |> AN.number +Base.all(A::AnyGPUArray{Bool}) = mapreduce(identity, &, A) |> AN.number -Base.any(f::Function, A::AnyGPUArray) = mapreduce(f, |, A) -Base.all(f::Function, A::AnyGPUArray) = mapreduce(f, &, A) +Base.any(f::Function, A::AnyGPUArray) = mapreduce(f, |, A) |> AN.number +Base.all(f::Function, A::AnyGPUArray) = mapreduce(f, &, A) |> AN.number Base.count(pred::Function, A::AnyGPUArray; dims=:, init=0) = - mapreduce(pred, Base.add_sum, A; init=init, dims=dims) + mapreduce(pred, Base.add_sum, A; init=init, dims=dims) |> maybe_number # avoid calling into `initarray!` for (fname, op) in [(:sum, :(Base.add_sum)), (:prod, :(Base.mul_prod)), @@ -94,7 +94,8 @@ for (fname, op) in [(:sum, :(Base.add_sum)), (:prod, :(Base.mul_prod)), end end -LinearAlgebra.ishermitian(A::AbstractGPUMatrix) = mapreduce(==, &, A, adjoint(A)) +LinearAlgebra.ishermitian(A::AbstractGPUMatrix) = + mapreduce(==, &, A, adjoint(A)) |> AN.number # comparisons @@ -105,7 +106,7 @@ function Base.isequal(A::AnyGPUArray, B::AnyGPUArray) if axes(A) != axes(B) return false end - mapreduce(isequal, &, A, B; init=true) + mapreduce(isequal, &, A, B; init=true) |> AN.number end # returns `missing` when missing values are involved @@ -129,6 +130,7 @@ function Base.:(==)(A::AnyGPUArray, B::AnyGPUArray) (; is_missing=false, is_equal=a.is_equal & b.is_equal) end end - res = mapreduce(mapper, reducer, A, B; init=(; is_missing=false, is_equal=true)) + res = mapreduce(mapper, reducer, A, B; + init=(; is_missing=false, is_equal=true)) |> AN.number res.is_missing ? missing : res.is_equal end diff --git a/test/testsuite.jl b/test/testsuite.jl index e7c14646..2724a2ed 100644 --- a/test/testsuite.jl +++ b/test/testsuite.jl @@ -34,6 +34,9 @@ function test_result(as::NTuple{N,Any}, bs::NTuple{N,Any}; kwargs...) where {N} test_result(a, b; kwargs...) end end +# Special case for `extrema` accross all dims. +test_result(as::NTuple{N,Any}, bs::GPUArrays.GPUNumber; kwargs...) where {N} = + test_result(as, GPUArrays.maybe_number(bs)) function compare(f, AT::Type{<:AbstractGPUArray}, xs...; kwargs...) # copy on the CPU, adapt on the GPU, but keep Ref's From 16bb79fdb48fecbc39348dcfce268e44af4fe34c Mon Sep 17 00:00:00 2001 From: Anton Smirnov Date: Sun, 7 Jul 2024 00:52:14 +0300 Subject: [PATCH 2/8] Add compat --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index b0b27643..88429e92 100644 --- a/Project.toml +++ b/Project.toml @@ -15,6 +15,7 @@ Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [compat] +AbstractNumbers = "0.2" Adapt = "4.0" GPUArraysCore = "= 0.1.6" LLVM = "3.9, 4, 5, 6, 7, 8" From aa095e85121b373cc74c7b37c30a91dbf8575c27 Mon Sep 17 00:00:00 2001 From: Anton Smirnov Date: Sun, 7 Jul 2024 12:12:49 +0300 Subject: [PATCH 3/8] Add more methods --- src/host/gpunumber.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/host/gpunumber.jl b/src/host/gpunumber.jl index 5dde4e7b..fa79e6a3 100644 --- a/src/host/gpunumber.jl +++ b/src/host/gpunumber.jl @@ -26,5 +26,7 @@ Base.broadcastable(g::GPUNumber) = g.val # Overload to avoid copies. Base.one(g::GPUNumber) = one(number_type(g)) +Base.one(::Type{GPUNumber{T}}) where T = one(eltype(T)) Base.zero(g::GPUNumber) = zero(number_type(g)) +Base.zero(::Type{GPUNumber{T}}) where T = zero(eltype(T)) Base.identity(g::GPUNumber) = g From 982e7cc40f7a255b50f25c765a0c21e5d95cc6d9 Mon Sep 17 00:00:00 2001 From: Anton Smirnov Date: Sun, 7 Jul 2024 22:06:07 +0300 Subject: [PATCH 4/8] Add more methods --- src/host/gpunumber.jl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/host/gpunumber.jl b/src/host/gpunumber.jl index fa79e6a3..81e6931c 100644 --- a/src/host/gpunumber.jl +++ b/src/host/gpunumber.jl @@ -11,7 +11,6 @@ struct GPUNumber{T <: AbstractGPUArray} <: AN.AbstractNumber{T} end AN.number(g::GPUNumber) = @allowscalar g.val[] - maybe_number(g::GPUNumber) = AN.number(g) maybe_number(g) = g @@ -30,3 +29,12 @@ Base.one(::Type{GPUNumber{T}}) where T = one(eltype(T)) Base.zero(g::GPUNumber) = zero(number_type(g)) Base.zero(::Type{GPUNumber{T}}) where T = zero(eltype(T)) Base.identity(g::GPUNumber) = g + +Base.getindex(g::GPUNumber) = AN.number(g) + +Base.isequal(g::GPUNumber, v::Number) = isequal(g[], v) +Base.isequal(v::Number, g::GPUNumber) = isequal(v, g[]) + +Base.nextpow(a, x::GPUNumber) = nextpow(a, x[]) +Base.nextpow(a::GPUNumber, x) = nextpow(a[], x) +Base.nextpow(a::GPUNumber, x::GPUNumber) = nextpow(a[], x[]) From 59f0a88733fd4dcfb652d9191d2b661d2965893c Mon Sep 17 00:00:00 2001 From: Anton Smirnov Date: Sun, 7 Jul 2024 23:01:37 +0300 Subject: [PATCH 5/8] More methods... --- src/host/gpunumber.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/host/gpunumber.jl b/src/host/gpunumber.jl index 81e6931c..e0be0078 100644 --- a/src/host/gpunumber.jl +++ b/src/host/gpunumber.jl @@ -38,3 +38,5 @@ Base.isequal(v::Number, g::GPUNumber) = isequal(v, g[]) Base.nextpow(a, x::GPUNumber) = nextpow(a, x[]) Base.nextpow(a::GPUNumber, x) = nextpow(a[], x) Base.nextpow(a::GPUNumber, x::GPUNumber) = nextpow(a[], x[]) + +Base.convert(::Type{Number}, g::GPUNumber) = g[] From 8e4835405b1970e146d8d23ecc7b30a5f164ab23 Mon Sep 17 00:00:00 2001 From: Anton Smirnov Date: Tue, 9 Jul 2024 15:11:48 +0300 Subject: [PATCH 6/8] Return GPUNumber only when eltype is Number --- src/host/indexing.jl | 7 +++---- src/host/mapreduce.jl | 20 +++++++++++--------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/host/indexing.jl b/src/host/indexing.jl index fa4d5f14..31c943cb 100644 --- a/src/host/indexing.jl +++ b/src/host/indexing.jl @@ -203,8 +203,7 @@ function Base.findfirst(f::Function, A::AnyGPUArray) return (false, dummy_index) end - res = mapreduce((x, y)->(f(x), y), reduction, A, indices; - init = (false, dummy_index)) |> AN.number + res = mapreduce((x, y)->(f(x), y), reduction, A, indices; init = (false, dummy_index)) if res[1] # out of consistency with Base.findarray, return a CartesianIndex # when the input is a multidimensional array @@ -231,14 +230,14 @@ function findminmax(binop, A::AnyGPUArray; init, dims) if dims == Colon() res = mapreduce(tuple, reduction, A, indices; - init = (init, dummy_index)) |> AN.number + init = (init, dummy_index)) # out of consistency with Base.findarray, return a CartesianIndex # when the input is a multidimensional array return (res[1], ndims(A) == 1 ? res[2] : CartesianIndices(A)[res[2]]) else res = mapreduce(tuple, reduction, A, indices; - init = (init, dummy_index), dims=dims) |> maybe_number + init = (init, dummy_index), dims=dims) vals = map(x->x[1], res) inds = map(x->ndims(A) == 1 ? x[2] : CartesianIndices(A)[x[2]], res) return (vals, inds) diff --git a/src/host/mapreduce.jl b/src/host/mapreduce.jl index 5933439a..754607eb 100644 --- a/src/host/mapreduce.jl +++ b/src/host/mapreduce.jl @@ -68,17 +68,20 @@ function _mapreduce(f::F, op::OP, As::Vararg{Any,N}; dims::D, init) where {F,OP, end if dims === Colon() - GPUNumber(R) + # Return `GPUNumber` for `Number` eltypes, otherwise - transfer to host. + eltype(R) <: Number ? + GPUNumber(reshape(R, :)) : + @allowscalar(R[]) else R end end -Base.any(A::AnyGPUArray{Bool}) = mapreduce(identity, |, A) |> AN.number -Base.all(A::AnyGPUArray{Bool}) = mapreduce(identity, &, A) |> AN.number +Base.any(A::AnyGPUArray{Bool}) = mapreduce(identity, |, A)[] +Base.all(A::AnyGPUArray{Bool}) = mapreduce(identity, &, A)[] -Base.any(f::Function, A::AnyGPUArray) = mapreduce(f, |, A) |> AN.number -Base.all(f::Function, A::AnyGPUArray) = mapreduce(f, &, A) |> AN.number +Base.any(f::Function, A::AnyGPUArray) = mapreduce(f, |, A)[] +Base.all(f::Function, A::AnyGPUArray) = mapreduce(f, &, A)[] Base.count(pred::Function, A::AnyGPUArray; dims=:, init=0) = mapreduce(pred, Base.add_sum, A; init=init, dims=dims) |> maybe_number @@ -94,8 +97,7 @@ for (fname, op) in [(:sum, :(Base.add_sum)), (:prod, :(Base.mul_prod)), end end -LinearAlgebra.ishermitian(A::AbstractGPUMatrix) = - mapreduce(==, &, A, adjoint(A)) |> AN.number +LinearAlgebra.ishermitian(A::AbstractGPUMatrix) = mapreduce(==, &, A, adjoint(A))[] # comparisons @@ -106,7 +108,7 @@ function Base.isequal(A::AnyGPUArray, B::AnyGPUArray) if axes(A) != axes(B) return false end - mapreduce(isequal, &, A, B; init=true) |> AN.number + mapreduce(isequal, &, A, B; init=true)[] end # returns `missing` when missing values are involved @@ -131,6 +133,6 @@ function Base.:(==)(A::AnyGPUArray, B::AnyGPUArray) end end res = mapreduce(mapper, reducer, A, B; - init=(; is_missing=false, is_equal=true)) |> AN.number + init=(; is_missing=false, is_equal=true)) res.is_missing ? missing : res.is_equal end From d951132c65387c1f964d47ceac7892434d4fd48a Mon Sep 17 00:00:00 2001 From: Anton Smirnov Date: Tue, 9 Jul 2024 15:15:22 +0300 Subject: [PATCH 7/8] Cleanup --- test/testsuite.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/testsuite.jl b/test/testsuite.jl index 2724a2ed..e7c14646 100644 --- a/test/testsuite.jl +++ b/test/testsuite.jl @@ -34,9 +34,6 @@ function test_result(as::NTuple{N,Any}, bs::NTuple{N,Any}; kwargs...) where {N} test_result(a, b; kwargs...) end end -# Special case for `extrema` accross all dims. -test_result(as::NTuple{N,Any}, bs::GPUArrays.GPUNumber; kwargs...) where {N} = - test_result(as, GPUArrays.maybe_number(bs)) function compare(f, AT::Type{<:AbstractGPUArray}, xs...; kwargs...) # copy on the CPU, adapt on the GPU, but keep Ref's From f01a7d13553294c2eaa86d330de2131215d986b4 Mon Sep 17 00:00:00 2001 From: Anton Smirnov Date: Thu, 11 Jul 2024 00:27:59 +0300 Subject: [PATCH 8/8] Refactoring --- src/GPUArrays.jl | 2 +- src/host/gpunumber.jl | 42 +++++++++++++++++++++--------------------- src/host/mapreduce.jl | 4 ++-- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/GPUArrays.jl b/src/GPUArrays.jl index b1d00d2d..0bc6fd8c 100644 --- a/src/GPUArrays.jl +++ b/src/GPUArrays.jl @@ -1,6 +1,6 @@ module GPUArrays -import AbstractNumbers as AN +import AbstractNumbers using Serialization using Random diff --git a/src/host/gpunumber.jl b/src/host/gpunumber.jl index e0be0078..e2660740 100644 --- a/src/host/gpunumber.jl +++ b/src/host/gpunumber.jl @@ -1,42 +1,42 @@ # Custom GPU-compatible `Number` interface. -struct GPUNumber{T <: AbstractGPUArray} <: AN.AbstractNumber{T} +struct AsyncNumber{T <: AbstractGPUArray} <: AbstractNumbers.AbstractNumber{T} val::T - function GPUNumber(val::T) where T <: AbstractGPUArray + function AsyncNumber(val::T) where T <: AbstractGPUArray length(val) != 1 && error( - "`GPUNumber` accepts only 1-element GPU arrays, " * + "`AsyncNumber` accepts only 1-element GPU arrays, " * "instead `$(length(val))`-element array was given.") new{T}(val) end end -AN.number(g::GPUNumber) = @allowscalar g.val[] -maybe_number(g::GPUNumber) = AN.number(g) +AbstractNumbers.number(g::AsyncNumber) = @allowscalar g.val[] +maybe_number(g::AsyncNumber) = AbstractNumbers.number(g) maybe_number(g) = g -number_type(::GPUNumber{T}) where T = eltype(T) +number_type(::AsyncNumber{T}) where T = eltype(T) # When operations involve other `::Number` types, -# do not convert back to `GPUNumber`. -AN.like(::Type{<: GPUNumber}, x) = x +# do not convert back to `AsyncNumber`. +AbstractNumbers.like(::Type{<: AsyncNumber}, x) = x # When broadcasting, just pass the array itself. -Base.broadcastable(g::GPUNumber) = g.val +Base.broadcastable(g::AsyncNumber) = g.val # Overload to avoid copies. -Base.one(g::GPUNumber) = one(number_type(g)) -Base.one(::Type{GPUNumber{T}}) where T = one(eltype(T)) -Base.zero(g::GPUNumber) = zero(number_type(g)) -Base.zero(::Type{GPUNumber{T}}) where T = zero(eltype(T)) -Base.identity(g::GPUNumber) = g +Base.one(g::AsyncNumber) = one(number_type(g)) +Base.one(::Type{AsyncNumber{T}}) where T = one(eltype(T)) +Base.zero(g::AsyncNumber) = zero(number_type(g)) +Base.zero(::Type{AsyncNumber{T}}) where T = zero(eltype(T)) +Base.identity(g::AsyncNumber) = g -Base.getindex(g::GPUNumber) = AN.number(g) +Base.getindex(g::AsyncNumber) = AbstractNumbers.number(g) -Base.isequal(g::GPUNumber, v::Number) = isequal(g[], v) -Base.isequal(v::Number, g::GPUNumber) = isequal(v, g[]) +Base.isequal(g::AsyncNumber, v::Number) = isequal(g[], v) +Base.isequal(v::Number, g::AsyncNumber) = isequal(v, g[]) -Base.nextpow(a, x::GPUNumber) = nextpow(a, x[]) -Base.nextpow(a::GPUNumber, x) = nextpow(a[], x) -Base.nextpow(a::GPUNumber, x::GPUNumber) = nextpow(a[], x[]) +Base.nextpow(a, x::AsyncNumber) = nextpow(a, x[]) +Base.nextpow(a::AsyncNumber, x) = nextpow(a[], x) +Base.nextpow(a::AsyncNumber, x::AsyncNumber) = nextpow(a[], x[]) -Base.convert(::Type{Number}, g::GPUNumber) = g[] +Base.convert(::Type{Number}, g::AsyncNumber) = g[] diff --git a/src/host/mapreduce.jl b/src/host/mapreduce.jl index 754607eb..594bbf5b 100644 --- a/src/host/mapreduce.jl +++ b/src/host/mapreduce.jl @@ -68,9 +68,9 @@ function _mapreduce(f::F, op::OP, As::Vararg{Any,N}; dims::D, init) where {F,OP, end if dims === Colon() - # Return `GPUNumber` for `Number` eltypes, otherwise - transfer to host. + # Return `AsyncNumber` for `Number` eltypes, otherwise - transfer to host. eltype(R) <: Number ? - GPUNumber(reshape(R, :)) : + AsyncNumber(reshape(R, :)) : @allowscalar(R[]) else R