diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index dee2f66..fc9e227 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -18,9 +18,9 @@ jobs: fail-fast: false matrix: version: - - '1.7' # Minimum required Julia version. + - '1.9' # Minimum required Julia version (supporting extensions) - '1' # Latest stable 1.x release of Julia - - 'nightly' + #- 'nightly' # CUDA fails to pre-compile on nightly os: - ubuntu-latest - macOS-latest diff --git a/Project.toml b/Project.toml index 9c69f2d..8c27a7b 100644 --- a/Project.toml +++ b/Project.toml @@ -4,20 +4,24 @@ authors = ["Samuel Omlin"] version = "0.1.4" [deps] -AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" +# [weakdeps] +# AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" +# CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + [compat] -Adapt = "3" -AMDGPU = "0.3.7, 0.4, 0.5, 0.6, 0.7, 0.8" -CUDA = "3.12, 4, 5" -julia = "1.7" +Adapt = "3" #TODO: , 4" +# AMDGPU = "0.3.7, 0.4, 0.5, 0.6, 0.7, 0.8" +# CUDA = "3.12, 4, 5" +julia = "1.9" # Minimum required Julia version (supporting extensions) StaticArrays = "1" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" [targets] -test = ["Test"] +test = ["Test", "AMDGPU", "CUDA"] \ No newline at end of file diff --git a/examples/memcopyCellArray3D.jl b/examples/memcopyCellArray3D.jl index 89826b3..3e49803 100644 --- a/examples/memcopyCellArray3D.jl +++ b/examples/memcopyCellArray3D.jl @@ -1,5 +1,7 @@ using CellArrays, StaticArrays, CUDA +@define_CuCellArray + function copy3D!(T2::CellArray, T::CellArray, Ci::CellArray) ix = (blockIdx().x-1) * blockDim().x + threadIdx().x iy = (blockIdx().y-1) * blockDim().y + threadIdx().y diff --git a/examples/memcopyCellArray3D_ParallelStencil.jl b/examples/memcopyCellArray3D_ParallelStencil.jl index 1e61446..c53f218 100644 --- a/examples/memcopyCellArray3D_ParallelStencil.jl +++ b/examples/memcopyCellArray3D_ParallelStencil.jl @@ -1,5 +1,6 @@ const USE_GPU = true using CellArrays, StaticArrays +import CUDA using ParallelStencil using ParallelStencil.FiniteDifferences3D @static if USE_GPU diff --git a/src/CellArray.jl b/src/CellArray.jl index c0951e5..ded2874 100644 --- a/src/CellArray.jl +++ b/src/CellArray.jl @@ -1,4 +1,4 @@ -using StaticArrays, Adapt, CUDA, AMDGPU +using StaticArrays, Adapt ## Constants @@ -104,10 +104,20 @@ Construct an uninitialized `N`-dimensional `CellArray` containing `Cells` of typ See also: [`CellArray`](@ref), [`CuCellArray`](@ref), [`ROCCellArray`](@ref) """ -CPUCellArray{T,N,B,T_elem} = CellArray{T,N,B,Array{T_elem,_N}} +const CPUCellArray{T,N,B,T_elem} = CellArray{T,N,B,Array{T_elem,_N}} + +CPUCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B} = (check_T(T); CPUCellArray{T,N,B,eltype(T)}(undef, dims)) +CPUCellArray{T,B}(::UndefInitializer, dims::Int...) where {T<:Cell,B} = CPUCellArray{T,B}(undef, dims) +CPUCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N} = CPUCellArray{T,0}(undef, dims) +CPUCellArray{T}(::UndefInitializer, dims::Int...) where {T<:Cell} = CPUCellArray{T}(undef, dims) """ + @define_CuCellArray + +Define the following type alias and constructors in the caller module: + +******************************************************************************** CuCellArray{T<:Cell,N,B,T_elem} <: AbstractArray{T,N} where Cell <: Union{Number, SArray, FieldArray} `N`-dimensional CellArray with cells of type `T`, blocklength `B`, and `T_array` being a `CuArray` of element type `T_elem`: alias for `CellArray{T,N,B,CuArray{T_elem,CellArrays._N}}`. @@ -120,11 +130,32 @@ CPUCellArray{T,N,B,T_elem} = CellArray{T,N,B,Array{T_elem,_N}} Construct an uninitialized `N`-dimensional `CellArray` containing `Cells` of type `T` which are stored in an array of kind `CuArray`. See also: [`CellArray`](@ref), [`CPUCellArray`](@ref), [`ROCCellArray`](@ref) +******************************************************************************** + +!!! note "Avoiding unneeded dependencies" + The type aliases and constructors for GPU `CellArray`s are provided via macros to avoid unneeded dependencies on the GPU packages in CellArrays. + +See also: [`@define_ROCCellArray`](@ref) """ -CuCellArray{T,N,B,T_elem} = CellArray{T,N,B,CuArray{T_elem,_N}} +macro define_CuCellArray() esc(define_CuCellArray()) end + +function define_CuCellArray() + quote + const CuCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,CUDA.CuArray{T_elem,CellArrays._N}} + CuCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N,B} = (CellArrays.check_T(T); CuCellArray{T,N,B,eltype(T)}(undef, dims)) + CuCellArray{T,B}(::UndefInitializer, dims::Int...) where {T<:CellArrays.Cell,B} = CuCellArray{T,B}(undef, dims) + CuCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N} = CuCellArray{T,0}(undef, dims) + CuCellArray{T}(::UndefInitializer, dims::Int...) where {T<:CellArrays.Cell} = CuCellArray{T}(undef, dims) + end +end """ + @define_ROCCellArray + +Define the following type alias and constructors in the caller module: + +******************************************************************************** ROCCellArray{T<:Cell,N,B,T_elem} <: AbstractArray{T,N} where Cell <: Union{Number, SArray, FieldArray} `N`-dimensional CellArray with cells of type `T`, blocklength `B`, and `T_array` being a `ROCArray` of element type `T_elem`: alias for `CellArray{T,N,B,ROCArray{T_elem,CellArrays._N}}`. @@ -137,25 +168,25 @@ CuCellArray{T,N,B,T_elem} = CellArray{T,N,B,CuArray{T_elem,_N}} Construct an uninitialized `N`-dimensional `CellArray` containing `Cells` of type `T` which are stored in an array of kind `ROCArray`. See also: [`CellArray`](@ref), [`CPUCellArray`](@ref), [`CuCellArray`](@ref) -""" -ROCCellArray{T,N,B,T_elem} = CellArray{T,N,B,ROCArray{T_elem,_N}} +******************************************************************************** +!!! note "Avoiding unneeded dependencies" + The type aliases and constructors for GPU `CellArray`s are provided via macros to avoid unneeded dependencies on the GPU packages in CellArrays. -CPUCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B} = (check_T(T); CPUCellArray{T,N,B,eltype(T)}(undef, dims)) - CuCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B} = (check_T(T); CuCellArray{T,N,B,eltype(T)}(undef, dims)) -ROCCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N,B} = (check_T(T); ROCCellArray{T,N,B,eltype(T)}(undef, dims)) +See also: [`@define_CuCellArray`](@ref) +""" +macro define_ROCCellArray() esc(define_ROCCellArray()) end -CPUCellArray{T,B}(::UndefInitializer, dims::Int...) where {T<:Cell,B} = CPUCellArray{T,B}(undef, dims) - CuCellArray{T,B}(::UndefInitializer, dims::Int...) where {T<:Cell,B} = CuCellArray{T,B}(undef, dims) -ROCCellArray{T,B}(::UndefInitializer, dims::Int...) where {T<:Cell,B} = ROCCellArray{T,B}(undef, dims) +function define_ROCCellArray() + quote + const ROCCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,AMDGPU.ROCArray{T_elem,CellArrays._N}} -CPUCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N} = CPUCellArray{T,0}(undef, dims) - CuCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N} = CuCellArray{T,0}(undef, dims) -ROCCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:Cell,N} = ROCCellArray{T,0}(undef, dims) - -CPUCellArray{T}(::UndefInitializer, dims::Int...) where {T<:Cell} = CPUCellArray{T}(undef, dims) - CuCellArray{T}(::UndefInitializer, dims::Int...) where {T<:Cell} = CuCellArray{T}(undef, dims) -ROCCellArray{T}(::UndefInitializer, dims::Int...) where {T<:Cell} = ROCCellArray{T}(undef, dims) + ROCCellArray{T,B}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N,B} = (CellArrays.check_T(T); ROCCellArray{T,N,B,eltype(T)}(undef, dims)) + ROCCellArray{T,B}(::UndefInitializer, dims::Int...) where {T<:CellArrays.Cell,B} = ROCCellArray{T,B}(undef, dims) + ROCCellArray{T}(::UndefInitializer, dims::NTuple{N,Int}) where {T<:CellArrays.Cell,N} = ROCCellArray{T,0}(undef, dims) + ROCCellArray{T}(::UndefInitializer, dims::Int...) where {T<:CellArrays.Cell} = ROCCellArray{T}(undef, dims) + end +end ## AbstractArray methods @@ -166,18 +197,6 @@ ROCCellArray{T}(::UndefInitializer, dims::Int...) where {T<:Cell} = ROCCellArray @inline Base.length(T::Type{<:Number}, args...) = 1 -@inline function Base.similar(A::CPUCellArray{T0,N0,B,T_elem0}, ::Type{T}, dims::NTuple{N,Int}) where {T0,N0,B,T_elem0,T<:Cell,N} - CPUCellArray{T,N,B,eltype(T)}(undef, dims) -end - -@inline function Base.similar(A::CuCellArray{T0,N0,B,T_elem0}, ::Type{T}, dims::NTuple{N,Int}) where {T0,N0,B,T_elem0,T<:Cell,N} - CuCellArray{T,N,B,eltype(T)}(undef, dims) -end - -@inline function Base.similar(A::ROCCellArray{T0,N0,B,T_elem0}, ::Type{T}, dims::NTuple{N,Int}) where {T0,N0,B,T_elem0,T<:Cell,N} - ROCCellArray{T,N,B,eltype(T)}(undef, dims) -end - @inline function Base.similar(A::CellArray{T0,N0,B,T_array0}, ::Type{T}, dims::NTuple{N,Int}) where {T0,N0,B,T_array0,T<:Cell,N} check_T(T) T_arraykind = Base.typename(T_array0).wrapper # Note: an alternative would be: T_array = typeof(similar(A.data, eltype(T), dims.*0)); CellArray{T,N,B}(T_array, dims) diff --git a/src/CellArrays.jl b/src/CellArrays.jl index 16b1fcc..86bab21 100644 --- a/src/CellArrays.jl +++ b/src/CellArrays.jl @@ -6,11 +6,11 @@ Provides support for an AbstractArray subtype `CellArray`, which represents arra # General overview and examples https://github.com/omlins/CellArray.jl -# Constructors +# Constructors and type aliases - [`CellArray`](@ref) - [`CPUCellArray`](@ref) -- [`CuCellArray`](@ref) -- [`ROCCellArray`](@ref) +- `CuCellArray` (available via [`@define_CuCellArray`](@ref)) +- `ROCCellArray` (available via [`@define_ROCCellArray`](@ref)) # Functions (additional to standard AbstractArray functionality) - [`cellsize`](@ref) @@ -31,5 +31,5 @@ using .Exceptions include("CellArray.jl") ## Exports (need to be after include of submodules if re-exports from them) -export CellArray, CPUCellArray, CuCellArray, ROCCellArray, cellsize, blocklength, field +export CellArray, CPUCellArray, @define_CuCellArray, @define_ROCCellArray, cellsize, blocklength, field end diff --git a/test/test_CellArray.jl b/test/test_CellArray.jl index 6ae29b6..e7d0068 100644 --- a/test/test_CellArray.jl +++ b/test/test_CellArray.jl @@ -1,9 +1,11 @@ using Test using CUDA, AMDGPU, StaticArrays import CellArrays -import CellArrays: CPUCellArray, CuCellArray, ROCCellArray, cellsize, blocklength, _N +import CellArrays: CPUCellArray, @define_CuCellArray, @define_ROCCellArray, cellsize, blocklength, _N import CellArrays: IncoherentArgumentError, ArgumentError +@define_CuCellArray +@define_ROCCellArray test_cuda = CUDA.functional() test_amdgpu = AMDGPU.functional()