From 4cb13918101400fe3a04bfab79346aef28241cff Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Fri, 18 Oct 2024 16:22:17 +0200 Subject: [PATCH 01/29] add padding in field allocators --- src/ParallelKernel/FieldAllocators.jl | 63 +++++++++++++++++++-------- 1 file changed, 45 insertions(+), 18 deletions(-) diff --git a/src/ParallelKernel/FieldAllocators.jl b/src/ParallelKernel/FieldAllocators.jl index 849f1c63..5f0ffac8 100644 --- a/src/ParallelKernel/FieldAllocators.jl +++ b/src/ParallelKernel/FieldAllocators.jl @@ -447,28 +447,55 @@ function _allocate(caller::Module; gridsize=nothing, fields=nothing, allocator=n end function _field(caller::Module, gridsize, allocator=:@zeros; eltype=nothing, sizetemplate=nothing) + padding = get_padding(caller) eltype = determine_eltype(caller, eltype) - if (sizetemplate == :X) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-1,-2,-2) : (length($gridsize)==2) ? (-1,-2) : -1)) - elseif (sizetemplate == :Y) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2,-1,-2) : (length($gridsize)==2) ? (-2,-1) : -2)) - elseif (sizetemplate == :Z) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2,-2,-1) : (length($gridsize)==2) ? (-2,-2) : -2)) - elseif (sizetemplate == :BX) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (+1, 0, 0) : (length($gridsize)==2) ? (+1, 0) : +1)) - elseif (sizetemplate == :BY) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0,+1, 0) : (length($gridsize)==2) ? ( 0,+1) : 0)) - elseif (sizetemplate == :BZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0, 0,+1) : (length($gridsize)==2) ? ( 0, 0) : 0)) - elseif (sizetemplate == :XX) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0,-2,-2) : (length($gridsize)==2) ? ( 0,-2) : 0)) - elseif (sizetemplate == :YY) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2, 0,-2) : (length($gridsize)==2) ? (-2, 0) : -2)) - elseif (sizetemplate == :ZZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2,-2, 0) : (length($gridsize)==2) ? (-2,-2) : -2)) - elseif (sizetemplate == :XY) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-1,-1,-2) : (length($gridsize)==2) ? (-1,-1) : -1)) - elseif (sizetemplate == :XZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-1,-2,-1) : (length($gridsize)==2) ? (-1,-2) : -1)) - elseif (sizetemplate == :YZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2,-1,-1) : (length($gridsize)==2) ? (-2,-1) : -2)) - else arraysize = gridsize + if padding + if (sizetemplate in (:X, :BX)) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (+1, 0, 0) : (length($gridsize)==2) ? (+1, 0) : +1)) + elseif (sizetemplate in (:Y, :BY)) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0,+1, 0) : (length($gridsize)==2) ? ( 0,+1) : 0)) + elseif (sizetemplate in (:Z, :BZ)) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0, 0,+1) : (length($gridsize)==2) ? ( 0, 0) : 0)) + elseif (sizetemplate == :XY) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (+1,+1, 0) : (length($gridsize)==2) ? (+1,+1) : +1)) + elseif (sizetemplate == :XZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (+1, 0,+1) : (length($gridsize)==2) ? (+1, 0) : +1)) + elseif (sizetemplate == :YZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0,+1,+1) : (length($gridsize)==2) ? ( 0,+1) : 0)) + elseif (isnothing(sizetemplate) || sizetemplate in (:XX, :YY, :ZZ)) arraysize = gridsize + else @ModuleInternalError("unexpected sizetemplate.") + end + else + if (sizetemplate == :X) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-1,-2,-2) : (length($gridsize)==2) ? (-1,-2) : -1)) + elseif (sizetemplate == :Y) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2,-1,-2) : (length($gridsize)==2) ? (-2,-1) : -2)) + elseif (sizetemplate == :Z) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2,-2,-1) : (length($gridsize)==2) ? (-2,-2) : -2)) + elseif (sizetemplate == :BX) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (+1, 0, 0) : (length($gridsize)==2) ? (+1, 0) : +1)) + elseif (sizetemplate == :BY) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0,+1, 0) : (length($gridsize)==2) ? ( 0,+1) : 0)) + elseif (sizetemplate == :BZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0, 0,+1) : (length($gridsize)==2) ? ( 0, 0) : 0)) + elseif (sizetemplate == :XX) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0,-2,-2) : (length($gridsize)==2) ? ( 0,-2) : 0)) + elseif (sizetemplate == :YY) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2, 0,-2) : (length($gridsize)==2) ? (-2, 0) : -2)) + elseif (sizetemplate == :ZZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2,-2, 0) : (length($gridsize)==2) ? (-2,-2) : -2)) + elseif (sizetemplate == :XY) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-1,-1,-2) : (length($gridsize)==2) ? (-1,-1) : -1)) + elseif (sizetemplate == :XZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-1,-2,-1) : (length($gridsize)==2) ? (-1,-2) : -1)) + elseif (sizetemplate == :YZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2,-1,-1) : (length($gridsize)==2) ? (-2,-1) : -2)) + elseif isnothing(sizetemplate) arraysize = gridsize + else @ModuleInternalError("unexpected sizetemplate.") + end end - if is_same(allocator, :@zeros) return :(ParallelStencil.ParallelKernel.@zeros($arraysize..., eltype=$eltype)) - elseif is_same(allocator, :@ones) return :(ParallelStencil.ParallelKernel.@ones($arraysize..., eltype=$eltype)) - elseif is_same(allocator, :@rand) return :(ParallelStencil.ParallelKernel.@rand($arraysize..., eltype=$eltype)) - elseif is_same(allocator, :@falses) return :(ParallelStencil.ParallelKernel.@falses($arraysize..., eltype=$eltype)) - elseif is_same(allocator, :@trues) return :(ParallelStencil.ParallelKernel.@trues($arraysize..., eltype=$eltype)) + + if is_same(allocator, :@zeros) arrayalloc = :(ParallelStencil.ParallelKernel.@zeros($arraysize..., eltype=$eltype)) + elseif is_same(allocator, :@ones) arrayalloc = :(ParallelStencil.ParallelKernel.@ones($arraysize..., eltype=$eltype)) + elseif is_same(allocator, :@rand) arrayalloc = :(ParallelStencil.ParallelKernel.@rand($arraysize..., eltype=$eltype)) + elseif is_same(allocator, :@falses) arrayalloc = :(ParallelStencil.ParallelKernel.@falses($arraysize..., eltype=$eltype)) + elseif is_same(allocator, :@trues) arrayalloc = :(ParallelStencil.ParallelKernel.@trues($arraysize..., eltype=$eltype)) else @ModuleInternalError("unexpected allocator macro.") end + + if padding + if (sizetemplate in (:X, :Y, :Z, :XY, :XZ, :YZ)) return :(view($arrayalloc, (:).(2, $arraysize.-1)...)) + elseif (sizetemplate == :XX) return :(view($arrayalloc, (:).(map(+, $gridsize.*0, (1,2,2)), map(+, $arraysize, ( 0,-1,-1)))...)) + elseif (sizetemplate == :YY) return :(view($arrayalloc, (:).(map(+, $gridsize.*0, (2,1,2)), map(+, $arraysize, (-1, 0,-1)))...)) + elseif (sizetemplate == :ZZ) return :(view($arrayalloc, (:).(map(+, $gridsize.*0, (2,2,1)), map(+, $arraysize, (-1,-1, 0)))...)) + elseif (isnothing(sizetemplate) || sizetemplate in (:BX, :BY, :BZ)) return arrayalloc + else @ModuleInternalError("unexpected sizetemplate.") + end + else + return arrayalloc + end end function _vectorfield(caller::Module, gridsize, allocator=:@zeros; eltype=nothing, sizetemplate=nothing) From e20c2cafb3719a92b53aa529eda08c6905e32bd5 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Fri, 18 Oct 2024 16:23:06 +0200 Subject: [PATCH 02/29] add padding keyword argument --- src/ParallelKernel/init_parallel_kernel.jl | 26 +++++++++++++++------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/ParallelKernel/init_parallel_kernel.jl b/src/ParallelKernel/init_parallel_kernel.jl index e91ed867..6dcb322f 100644 --- a/src/ParallelKernel/init_parallel_kernel.jl +++ b/src/ParallelKernel/init_parallel_kernel.jl @@ -1,5 +1,6 @@ """ @init_parallel_kernel(package, numbertype) + @init_parallel_kernel(package, numbertype, inbounds=..., padding=...) Initialize the package ParallelKernel, giving access to its main functionality. Creates a module `Data` in the module where `@init_parallel_kernel` is called from. The module `Data` contains the types as `Data.Number`, `Data.Array` and `Data.CellArray` (type `?Data` *after* calling `@init_parallel_kernel` to see the full description of the module). @@ -7,25 +8,26 @@ Initialize the package ParallelKernel, giving access to its main functionality. - `package::Module`: the package used for parallelization (CUDA or AMDGPU for GPU, or Threads or Polyester for CPU). - `numbertype::DataType`: the type of numbers used by @zeros, @ones, @rand and @fill and in all array types of module `Data` (e.g. Float32 or Float64). It is contained in `Data.Number` after @init_parallel_kernel. - `inbounds::Bool=false`: whether to apply `@inbounds` to the kernels by default (overwritable in each kernel definition). +- `padding::Bool=false`: whether to apply padding to the fields allocated with macros from [`ParallelKernel.FieldAllocators`](@ref). See also: [`Data`](@ref) """ macro init_parallel_kernel(args...) check_already_initialized(__module__) posargs, kwargs_expr = split_args(args) - if (length(args) > 3) @ArgumentError("too many arguments.") + if (length(args) > 4) @ArgumentError("too many arguments.") elseif (0 < length(posargs) < 2) @ArgumentError("there must be either two or zero positional arguments.") end kwargs = split_kwargs(kwargs_expr) if (length(posargs) == 2) package, numbertype_val = extract_posargs_init(__module__, posargs...) else package, numbertype_val = extract_kwargs_init(__module__, kwargs) end - inbounds_val = extract_kwargs_nopos(__module__, kwargs) + inbounds_val, padding_val = extract_kwargs_nopos(__module__, kwargs) if (package == PKG_NONE) @ArgumentError("the package argument cannot be ommited.") end #TODO: this error message will disappear, once the package can be defined at runtime. - esc(init_parallel_kernel(__module__, package, numbertype_val, inbounds_val)) + esc(init_parallel_kernel(__module__, package, numbertype_val, inbounds_val, padding_val)) end -function init_parallel_kernel(caller::Module, package::Symbol, numbertype::DataType, inbounds::Bool; datadoc_call=:(), parent_module::String="ParallelKernel") +function init_parallel_kernel(caller::Module, package::Symbol, numbertype::DataType, inbounds::Bool, padding::Bool; datadoc_call=:(), parent_module::String="ParallelKernel") if package == PKG_CUDA if (isinteractive() && !is_installed("CUDA")) @NotInstalledError("CUDA was selected as package for parallelization, but CUDA.jl is not installed. CUDA functionality is provided as an extension of $parent_module and CUDA.jl needs therefore to be installed independently (type `add CUDA` in the julia package manager).") end indextype = INT_CUDA @@ -74,6 +76,7 @@ function init_parallel_kernel(caller::Module, package::Symbol, numbertype::DataT set_package(caller, package) set_numbertype(caller, numbertype) set_inbounds(caller, inbounds) + set_padding(caller, padding) set_initialized(caller, true) return nothing end @@ -83,12 +86,14 @@ macro is_initialized() is_initialized(__module__) end macro get_package() esc(get_package(__module__)) end # NOTE: escaping is required here, to avoid that the symbol is evaluated in this module, instead of just being returned as a symbol. macro get_numbertype() get_numbertype(__module__) end macro get_inbounds() get_inbounds(__module__) end +macro get_padding() get_padding(__module__) end let - global is_initialized, set_initialized, set_package, get_package, set_numbertype, get_numbertype, set_inbounds, get_inbounds, check_initialized, check_already_initialized + global is_initialized, set_initialized, set_package, get_package, set_numbertype, get_numbertype, set_inbounds, get_inbounds, set_padding, get_padding, check_initialized, check_already_initialized _is_initialized::Dict{Module, Bool} = Dict{Module, Bool}() package::Dict{Module, Symbol} = Dict{Module, Symbol}() numbertype::Dict{Module, DataType} = Dict{Module, DataType}() inbounds::Dict{Module, Bool} = Dict{Module, Bool}() + padding::Dict{Module, Bool} = Dict{Module, Bool}() set_initialized(caller::Module, flag::Bool) = (_is_initialized[caller] = flag) is_initialized(caller::Module) = haskey(_is_initialized, caller) && _is_initialized[caller] set_package(caller::Module, pkg::Symbol) = (package[caller] = pkg) @@ -97,6 +102,8 @@ let get_numbertype(caller::Module) = numbertype[caller] set_inbounds(caller::Module, flag::Bool) = (inbounds[caller] = flag) get_inbounds(caller::Module) = inbounds[caller] + set_padding(caller::Module, flag::Bool) = (padding[caller] = flag) + get_padding(caller::Module) = padding[caller] check_initialized(caller::Module) = if !is_initialized(caller) @NotInitializedError("no ParallelKernel macro or function can be called before @init_parallel_kernel in each module (missing call in $caller).") end check_already_initialized(caller::Module) = if is_initialized(caller) @IncoherentCallError("ParallelKernel has already been initialized for the module $caller.") end end @@ -109,8 +116,8 @@ function extract_posargs_init(caller::Module, package, numbertype) # NOTE: this end function extract_kwargs_init(caller::Module, kwargs::Dict) - if (:package in keys(kwargs)) package = kwargs[:package]; check_package(package) - else package = PKG_NONE + if (:package in keys(kwargs)) package = kwargs[:package]; check_package(package) + else package = PKG_NONE end if (:numbertype in keys(kwargs)) numbertype_val = eval_arg(caller, kwargs[:numbertype]); check_numbertype(numbertype_val) else numbertype_val = NUMBERTYPE_NONE @@ -122,7 +129,10 @@ function extract_kwargs_nopos(caller::Module, kwargs::Dict) if (:inbounds in keys(kwargs)) inbounds_val = eval_arg(caller, kwargs[:inbounds]); check_inbounds(inbounds_val) else inbounds_val = false end - return inbounds_val + if (:padding in keys(kwargs)) padding_val = eval_arg(caller, kwargs[:padding]); check_padding(padding_val) + else padding_val = false + end + return inbounds_val, padding end function define_import(caller::Module, package::Symbol, parent_module::String) From e6283ad57b8d30b7f01d82e739abf8709cf0a02e Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Fri, 18 Oct 2024 16:23:28 +0200 Subject: [PATCH 03/29] add padding keyword argument --- src/init_parallel_stencil.jl | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/init_parallel_stencil.jl b/src/init_parallel_stencil.jl index 0cd790ad..3cb5dcde 100644 --- a/src/init_parallel_stencil.jl +++ b/src/init_parallel_stencil.jl @@ -37,28 +37,29 @@ See also: [`Data`](@ref) """ macro init_parallel_stencil(args...) posargs, kwargs_expr = split_args(args) - if (length(args) > 5) @ArgumentError("too many arguments.") + if (length(args) > 6) @ArgumentError("too many arguments.") elseif (0 < length(posargs) < 3) @ArgumentError("there must be either three or zero positional arguments.") end kwargs = split_kwargs(kwargs_expr) if (length(posargs) == 3) package, numbertype_val, ndims_val = extract_posargs_init(__module__, posargs...) else package, numbertype_val, ndims_val = extract_kwargs_init(__module__, kwargs) end - inbounds_val, memopt_val = extract_kwargs_nopos(__module__, kwargs) + inbounds_val, padding_val, memopt_val = extract_kwargs_nopos(__module__, kwargs) if (package == PKG_NONE) @ArgumentError("the package argument cannot be ommited.") end #TODO: this error message will disappear, once the package can be defined at runtime. - check_already_initialized(__module__, package, numbertype_val, ndims_val, inbounds_val, memopt_val) - esc(init_parallel_stencil(__module__, package, numbertype_val, ndims_val, inbounds_val, memopt_val)) + check_already_initialized(__module__, package, numbertype_val, ndims_val, inbounds_val, padding_val, memopt_val) + esc(init_parallel_stencil(__module__, package, numbertype_val, ndims_val, inbounds_val, padding_val, memopt_val)) end -function init_parallel_stencil(caller::Module, package::Symbol, numbertype::DataType, ndims::Integer, inbounds::Bool, memopt::Bool) +function init_parallel_stencil(caller::Module, package::Symbol, numbertype::DataType, ndims::Integer, inbounds::Bool, padding::Bool, memopt::Bool) if (numbertype == NUMBERTYPE_NONE) datadoc_call = :(@doc replace(ParallelStencil.ParallelKernel.DATA_DOC_NUMBERTYPE_NONE, "ParallelKernel" => "ParallelStencil", "@init_parallel_kernel" => "@init_parallel_stencil") Data) else datadoc_call = :(@doc replace(ParallelStencil.ParallelKernel.DATA_DOC, "ParallelKernel" => "ParallelStencil", "@init_parallel_kernel" => "@init_parallel_stencil") Data) end - return_expr = ParallelKernel.init_parallel_kernel(caller, package, numbertype, inbounds; datadoc_call=datadoc_call, parent_module="ParallelStencil") + return_expr = ParallelKernel.init_parallel_kernel(caller, package, numbertype, inbounds, padding; datadoc_call=datadoc_call, parent_module="ParallelStencil") set_package(caller, package) set_numbertype(caller, numbertype) set_ndims(caller, ndims) set_inbounds(caller, inbounds) + set_padding(caller, padding) set_memopt(caller, memopt) set_initialized(caller, true) return return_expr @@ -70,14 +71,16 @@ macro get_package() esc(get_package(__module__)) end # NOTE: escaping is require macro get_numbertype() get_numbertype(__module__) end macro get_ndims() get_ndims(__module__) end macro get_inbounds() get_inbounds(__module__) end +macro get_padding() get_padding(__module__) end macro get_memopt() get_memopt(__module__) end let - global is_initialized, set_initialized, set_package, get_package, set_numbertype, get_numbertype, set_ndims, get_ndims, set_inbounds, get_inbounds, set_memopt, get_memopt, check_initialized, check_already_initialized + global is_initialized, set_initialized, set_package, get_package, set_numbertype, get_numbertype, set_ndims, get_ndims, set_inbounds, get_inbounds, set_padding, get_padding, set_memopt, get_memopt, check_initialized, check_already_initialized _is_initialized::Dict{Module, Bool} = Dict{Module, Bool}() package::Dict{Module, Symbol} = Dict{Module, Symbol}() numbertype::Dict{Module, DataType} = Dict{Module, DataType}() ndims::Dict{Module, Integer} = Dict{Module, Integer}() inbounds::Dict{Module, Bool} = Dict{Module, Bool}() + padding::Dict{Module, Bool} = Dict{Module, Bool}() memopt::Dict{Module, Bool} = Dict{Module, Bool}() set_initialized(caller::Module, flag::Bool) = (_is_initialized[caller] = flag) is_initialized(caller::Module) = haskey(_is_initialized, caller) && _is_initialized[caller] @@ -89,13 +92,15 @@ let get_ndims(caller::Module) = ndims[caller] set_inbounds(caller::Module, flag::Bool) = (inbounds[caller] = flag) get_inbounds(caller::Module) = inbounds[caller] + set_padding(caller::Module, flag::Bool) = (padding[caller] = flag) + get_padding(caller::Module) = padding[caller] set_memopt(caller::Module, flag::Bool) = (memopt[caller] = flag) get_memopt(caller::Module) = memopt[caller] check_initialized(caller::Module) = if !is_initialized(caller) @NotInitializedError("no ParallelStencil macro or function can be called before @init_parallel_stencil in each module (missing call in $caller).") end - function check_already_initialized(caller::Module, package::Symbol, numbertype::DataType, ndims::Integer, inbounds::Bool, memopt::Bool) + function check_already_initialized(caller::Module, package::Symbol, numbertype::DataType, ndims::Integer, inbounds::Bool, padding::Bool, memopt::Bool) if is_initialized(caller) - if package==get_package(caller) && numbertype==get_numbertype(caller) && ndims==get_ndims(caller) && inbounds==get_inbounds(caller) && memopt==get_memopt(caller) + if package==get_package(caller) && numbertype==get_numbertype(caller) && ndims==get_ndims(caller) && inbounds==get_inbounds(caller) && padding==get_padding(caller) && memopt==get_memopt(caller) if !isinteractive() @warn "ParallelStencil has already been initialized for the module $caller, with the same arguments. You are likely using ParallelStencil in an inconsistent way: @init_parallel_stencil should only be called once at the beginning of each module, right after 'using ParallelStencil'. Note: this warning is only shown in non-interactive mode." end else @IncoherentCallError("ParallelStencil has already been initialized for the module $caller, with different arguments. If you are using ParallelStencil interactively in the REPL and want to avoid restarting Julia, then you can call ParallelStencil.@reset_parallel_stencil() and rerun all parts of your code (in module $caller) that use ParallelStencil features (including kernel definitions and array allocations). If you are using ParallelStencil non-interactively, then you are using ParallelStencil in an invalid way: @init_parallel_stencil should only be called once at the beginning of each module, right after 'using ParallelStencil'.") @@ -120,9 +125,9 @@ function extract_kwargs_init(caller::Module, kwargs::Dict) end function extract_kwargs_nopos(caller::Module, kwargs::Dict) - inbounds_val = ParallelKernel.extract_kwargs_nopos(caller, kwargs) + inbounds_val, padding_val = ParallelKernel.extract_kwargs_nopos(caller, kwargs) if (:memopt in keys(kwargs)) memopt_val = eval_arg(caller, kwargs[:memopt]); check_memopt(memopt_val) else memopt_val = false end - return inbounds_val, memopt_val + return inbounds_val, padding_val, memopt_val end \ No newline at end of file From bb6731522fd33edb119f693e573ff68cc5971fda Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Fri, 18 Oct 2024 16:39:35 +0200 Subject: [PATCH 04/29] add padding in field allocators --- src/ParallelKernel/FieldAllocators.jl | 50 +++++++++++++-------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/ParallelKernel/FieldAllocators.jl b/src/ParallelKernel/FieldAllocators.jl index 5f0ffac8..be5495c3 100644 --- a/src/ParallelKernel/FieldAllocators.jl +++ b/src/ParallelKernel/FieldAllocators.jl @@ -450,38 +450,38 @@ function _field(caller::Module, gridsize, allocator=:@zeros; eltype=nothing, siz padding = get_padding(caller) eltype = determine_eltype(caller, eltype) if padding - if (sizetemplate in (:X, :BX)) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (+1, 0, 0) : (length($gridsize)==2) ? (+1, 0) : +1)) - elseif (sizetemplate in (:Y, :BY)) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0,+1, 0) : (length($gridsize)==2) ? ( 0,+1) : 0)) - elseif (sizetemplate in (:Z, :BZ)) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0, 0,+1) : (length($gridsize)==2) ? ( 0, 0) : 0)) - elseif (sizetemplate == :XY) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (+1,+1, 0) : (length($gridsize)==2) ? (+1,+1) : +1)) - elseif (sizetemplate == :XZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (+1, 0,+1) : (length($gridsize)==2) ? (+1, 0) : +1)) - elseif (sizetemplate == :YZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0,+1,+1) : (length($gridsize)==2) ? ( 0,+1) : 0)) + if (sizetemplate in (:X, :BX)) arraysize = :(map(+, $gridsize, (+1, 0, 0))) + elseif (sizetemplate in (:Y, :BY)) arraysize = :(map(+, $gridsize, ( 0,+1, 0))) + elseif (sizetemplate in (:Z, :BZ)) arraysize = :(map(+, $gridsize, ( 0, 0,+1))) + elseif (sizetemplate == :XY) arraysize = :(map(+, $gridsize, (+1,+1, 0))) + elseif (sizetemplate == :XZ) arraysize = :(map(+, $gridsize, (+1, 0,+1))) + elseif (sizetemplate == :YZ) arraysize = :(map(+, $gridsize, ( 0,+1,+1))) elseif (isnothing(sizetemplate) || sizetemplate in (:XX, :YY, :ZZ)) arraysize = gridsize else @ModuleInternalError("unexpected sizetemplate.") end else - if (sizetemplate == :X) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-1,-2,-2) : (length($gridsize)==2) ? (-1,-2) : -1)) - elseif (sizetemplate == :Y) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2,-1,-2) : (length($gridsize)==2) ? (-2,-1) : -2)) - elseif (sizetemplate == :Z) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2,-2,-1) : (length($gridsize)==2) ? (-2,-2) : -2)) - elseif (sizetemplate == :BX) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (+1, 0, 0) : (length($gridsize)==2) ? (+1, 0) : +1)) - elseif (sizetemplate == :BY) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0,+1, 0) : (length($gridsize)==2) ? ( 0,+1) : 0)) - elseif (sizetemplate == :BZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0, 0,+1) : (length($gridsize)==2) ? ( 0, 0) : 0)) - elseif (sizetemplate == :XX) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0,-2,-2) : (length($gridsize)==2) ? ( 0,-2) : 0)) - elseif (sizetemplate == :YY) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2, 0,-2) : (length($gridsize)==2) ? (-2, 0) : -2)) - elseif (sizetemplate == :ZZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2,-2, 0) : (length($gridsize)==2) ? (-2,-2) : -2)) - elseif (sizetemplate == :XY) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-1,-1,-2) : (length($gridsize)==2) ? (-1,-1) : -1)) - elseif (sizetemplate == :XZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-1,-2,-1) : (length($gridsize)==2) ? (-1,-2) : -1)) - elseif (sizetemplate == :YZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2,-1,-1) : (length($gridsize)==2) ? (-2,-1) : -2)) - elseif isnothing(sizetemplate) arraysize = gridsize + if (sizetemplate == :X) arraysize = :(map(+, $gridsize, (-1,-2,-2))) + elseif (sizetemplate == :Y) arraysize = :(map(+, $gridsize, (-2,-1,-2))) + elseif (sizetemplate == :Z) arraysize = :(map(+, $gridsize, (-2,-2,-1))) + elseif (sizetemplate == :BX) arraysize = :(map(+, $gridsize, (+1, 0, 0))) + elseif (sizetemplate == :BY) arraysize = :(map(+, $gridsize, ( 0,+1, 0))) + elseif (sizetemplate == :BZ) arraysize = :(map(+, $gridsize, ( 0, 0,+1))) + elseif (sizetemplate == :XX) arraysize = :(map(+, $gridsize, ( 0,-2,-2))) + elseif (sizetemplate == :YY) arraysize = :(map(+, $gridsize, (-2, 0,-2))) + elseif (sizetemplate == :ZZ) arraysize = :(map(+, $gridsize, (-2,-2, 0))) + elseif (sizetemplate == :XY) arraysize = :(map(+, $gridsize, (-1,-1,-2))) + elseif (sizetemplate == :XZ) arraysize = :(map(+, $gridsize, (-1,-2,-1))) + elseif (sizetemplate == :YZ) arraysize = :(map(+, $gridsize, (-2,-1,-1))) + elseif isnothing(sizetemplate) arraysize = gridsize else @ModuleInternalError("unexpected sizetemplate.") end end - - if is_same(allocator, :@zeros) arrayalloc = :(ParallelStencil.ParallelKernel.@zeros($arraysize..., eltype=$eltype)) - elseif is_same(allocator, :@ones) arrayalloc = :(ParallelStencil.ParallelKernel.@ones($arraysize..., eltype=$eltype)) - elseif is_same(allocator, :@rand) arrayalloc = :(ParallelStencil.ParallelKernel.@rand($arraysize..., eltype=$eltype)) - elseif is_same(allocator, :@falses) arrayalloc = :(ParallelStencil.ParallelKernel.@falses($arraysize..., eltype=$eltype)) - elseif is_same(allocator, :@trues) arrayalloc = :(ParallelStencil.ParallelKernel.@trues($arraysize..., eltype=$eltype)) + + if is_same(allocator, :@zeros) arrayalloc = :(ParallelStencil.ParallelKernel.@zeros($arraysize..., eltype=$eltype)) + elseif is_same(allocator, :@ones) arrayalloc = :(ParallelStencil.ParallelKernel.@ones($arraysize..., eltype=$eltype)) + elseif is_same(allocator, :@rand) arrayalloc = :(ParallelStencil.ParallelKernel.@rand($arraysize..., eltype=$eltype)) + elseif is_same(allocator, :@falses) arrayalloc = :(ParallelStencil.ParallelKernel.@falses($arraysize..., eltype=$eltype)) + elseif is_same(allocator, :@trues) arrayalloc = :(ParallelStencil.ParallelKernel.@trues($arraysize..., eltype=$eltype)) else @ModuleInternalError("unexpected allocator macro.") end From fdc4a1b2c330498624c333f44cecf3d2c99bc51d Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Fri, 18 Oct 2024 17:50:36 +0200 Subject: [PATCH 05/29] add padding in field allocators --- src/ParallelKernel/FieldAllocators.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ParallelKernel/FieldAllocators.jl b/src/ParallelKernel/FieldAllocators.jl index be5495c3..0a37e9d1 100644 --- a/src/ParallelKernel/FieldAllocators.jl +++ b/src/ParallelKernel/FieldAllocators.jl @@ -448,7 +448,7 @@ end function _field(caller::Module, gridsize, allocator=:@zeros; eltype=nothing, sizetemplate=nothing) padding = get_padding(caller) - eltype = determine_eltype(caller, eltype) + eltype = determine_eltype(caller, eltype) if padding if (sizetemplate in (:X, :BX)) arraysize = :(map(+, $gridsize, (+1, 0, 0))) elseif (sizetemplate in (:Y, :BY)) arraysize = :(map(+, $gridsize, ( 0,+1, 0))) @@ -490,7 +490,7 @@ function _field(caller::Module, gridsize, allocator=:@zeros; eltype=nothing, siz elseif (sizetemplate == :XX) return :(view($arrayalloc, (:).(map(+, $gridsize.*0, (1,2,2)), map(+, $arraysize, ( 0,-1,-1)))...)) elseif (sizetemplate == :YY) return :(view($arrayalloc, (:).(map(+, $gridsize.*0, (2,1,2)), map(+, $arraysize, (-1, 0,-1)))...)) elseif (sizetemplate == :ZZ) return :(view($arrayalloc, (:).(map(+, $gridsize.*0, (2,2,1)), map(+, $arraysize, (-1,-1, 0)))...)) - elseif (isnothing(sizetemplate) || sizetemplate in (:BX, :BY, :BZ)) return arrayalloc + elseif (isnothing(sizetemplate) || sizetemplate in (:BX, :BY, :BZ)) return :(view($arrayalloc, (:).(1, $arraysize)...)) else @ModuleInternalError("unexpected sizetemplate.") end else From 1919d634f4668ba3a95c7bcc01744a79e8e06a1d Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Fri, 18 Oct 2024 17:51:13 +0200 Subject: [PATCH 06/29] add unit tests for padding --- test/ParallelKernel/test_allocators.jl | 102 +++++++++++++++++++++++-- 1 file changed, 97 insertions(+), 5 deletions(-) diff --git a/test/ParallelKernel/test_allocators.jl b/test/ParallelKernel/test_allocators.jl index 07a701de..f84bafa6 100644 --- a/test/ParallelKernel/test_allocators.jl +++ b/test/ParallelKernel/test_allocators.jl @@ -458,9 +458,9 @@ const DATA_INDEX = ParallelStencil.INT_THREADS # TODO: using Data.Index does not end @reset_parallel_kernel() end; - @testset "6. Fields" begin + @testset "6. Fields (padding=$padding)" for padding in (false, true) @require !@is_initialized() - @init_parallel_kernel($package, Float16) + @init_parallel_kernel($package, Float16, padding=$padding) @require @is_initialized() (nx, ny, nz) = (3, 4, 5) @testset "mapping to array allocators" begin @@ -489,7 +489,7 @@ const DATA_INDEX = ParallelStencil.INT_THREADS # TODO: using Data.Index does not @test occursin("@trues", @prettystring(1, @YZField((nx, ny, nz), @trues, eltype=Float32))) end; end; - @testset "gridsize (3D)" begin + @testset "field size (3D)" begin @test size( @Field((nx, ny, nz))) == (nx, ny, nz ) @test size( @XField((nx, ny, nz))) == (nx-1, ny-2, nz-2) @test size( @YField((nx, ny, nz))) == (nx-2, ny-1, nz-2) @@ -508,7 +508,7 @@ const DATA_INDEX = ParallelStencil.INT_THREADS # TODO: using Data.Index does not @test size.(Tuple( @TensorField((nx, ny, nz)))) == (size(@XXField((nx, ny, nz))), size(@YYField((nx, ny, nz))), size(@ZZField((nx, ny, nz))), size(@XYField((nx, ny, nz))), size(@XZField((nx, ny, nz))), size(@YZField((nx, ny, nz)))) end; - @testset "gridsize (2D)" begin + @testset "field size (2D)" begin @test size( @Field((nx, ny))) == (nx, ny, ) @test size( @XField((nx, ny))) == (nx-1, ny-2) @test size( @YField((nx, ny))) == (nx-2, ny-1) @@ -527,7 +527,7 @@ const DATA_INDEX = ParallelStencil.INT_THREADS # TODO: using Data.Index does not @test size.(Tuple( @TensorField((nx, ny)))) == (size(@XXField((nx, ny))), size(@YYField((nx, ny))), size(@XYField((nx, ny)))) end; - @testset "gridsize (1D)" begin + @testset "field size (1D)" begin @test size( @Field((nx,))) == (nx, ) @test size( @XField((nx,))) == (nx-1,) @test size( @YField((nx,))) == (nx-2,) @@ -545,6 +545,98 @@ const DATA_INDEX = ParallelStencil.INT_THREADS # TODO: using Data.Index does not @test size.(Tuple(@BVectorField((nx,)))) == (size(@BXField((nx,))),) @test size.(Tuple( @TensorField((nx,)))) == (size(@XXField((nx,))),) end; + @static if $padding + @testset "array size (3D)" begin + @test size( @Field((nx, ny, nz)).parent) == (nx, ny, nz ) + @test size( @XField((nx, ny, nz)).parent) == (nx+1, ny, nz ) + @test size( @YField((nx, ny, nz)).parent) == (nx, ny+1, nz ) + @test size( @ZField((nx, ny, nz)).parent) == (nx, ny, nz+1) + @test size(@BXField((nx, ny, nz)).parent) == (nx+1, ny, nz ) + @test size(@BYField((nx, ny, nz)).parent) == (nx, ny+1, nz ) + @test size(@BZField((nx, ny, nz)).parent) == (nx, ny, nz+1) + @test size(@XXField((nx, ny, nz)).parent) == (nx, ny, nz ) + @test size(@YYField((nx, ny, nz)).parent) == (nx, ny, nz ) + @test size(@ZZField((nx, ny, nz)).parent) == (nx, ny, nz ) + @test size(@XYField((nx, ny, nz)).parent) == (nx+1, ny+1, nz ) + @test size(@XZField((nx, ny, nz)).parent) == (nx+1, ny, nz+1) + @test size(@YZField((nx, ny, nz)).parent) == (nx, ny+1, nz+1) + end; + @testset "array size (2D)" begin + @test size( @Field((nx, ny)).parent) == (nx, ny ) + @test size( @XField((nx, ny)).parent) == (nx+1, ny ) + @test size( @YField((nx, ny)).parent) == (nx, ny+1) + @test size( @ZField((nx, ny)).parent) == (nx, ny ) + @test size(@BXField((nx, ny)).parent) == (nx+1, ny ) + @test size(@BYField((nx, ny)).parent) == (nx, ny+1) + @test size(@BZField((nx, ny)).parent) == (nx, ny ) + @test size(@XXField((nx, ny)).parent) == (nx, ny ) + @test size(@YYField((nx, ny)).parent) == (nx, ny ) + @test size(@ZZField((nx, ny)).parent) == (nx, ny ) + @test size(@XYField((nx, ny)).parent) == (nx+1, ny+1) + @test size(@XZField((nx, ny)).parent) == (nx+1, ny ) + @test size(@YZField((nx, ny)).parent) == (nx, ny+1) + end; + @testset "array size (1D)" begin + @test size( @Field((nx,)).parent) == (nx, ) + @test size( @XField((nx,)).parent) == (nx+1,) + @test size( @YField((nx,)).parent) == (nx, ) + @test size( @ZField((nx,)).parent) == (nx, ) + @test size(@BXField((nx,)).parent) == (nx+1,) + @test size(@BYField((nx,)).parent) == (nx, ) + @test size(@BZField((nx,)).parent) == (nx, ) + @test size(@XXField((nx,)).parent) == (nx, ) + @test size(@YYField((nx,)).parent) == (nx, ) + @test size(@ZZField((nx,)).parent) == (nx, ) + @test size(@XYField((nx,)).parent) == (nx+1,) + @test size(@XZField((nx,)).parent) == (nx+1,) + @test size(@YZField((nx,)).parent) == (nx, ) + end; + @testset "view ranges (3D)" begin + @test @Field((nx, ny, nz)).indices == (1:nx, 1:ny, 1:nz ) + @test @XField((nx, ny, nz)).indices == (2:nx, 2:ny-1, 2:nz-1) + @test @YField((nx, ny, nz)).indices == (2:nx-1, 2:ny, 2:nz-1) + @test @ZField((nx, ny, nz)).indices == (2:nx-1, 2:ny-1, 2:nz ) + @test @BXField((nx, ny, nz)).indices == (1:nx+1, 1:ny, 1:nz ) + @test @BYField((nx, ny, nz)).indices == (1:nx, 1:ny+1, 1:nz ) + @test @BZField((nx, ny, nz)).indices == (1:nx, 1:ny, 1:nz+1) + @test @XXField((nx, ny, nz)).indices == (1:nx, 2:ny-1, 2:nz-1) + @test @YYField((nx, ny, nz)).indices == (2:nx-1, 1:ny, 2:nz-1) + @test @ZZField((nx, ny, nz)).indices == (2:nx-1, 2:ny-1, 1:nz ) + @test @XYField((nx, ny, nz)).indices == (2:nx, 2:ny, 2:nz-1) + @test @XZField((nx, ny, nz)).indices == (2:nx, 2:ny-1, 2:nz ) + @test @YZField((nx, ny, nz)).indices == (2:nx-1, 2:ny, 2:nz ) + end; + @testset "view ranges (2D)" begin + @test @Field((nx, ny)).indices == (1:nx, 1:ny ) + @test @XField((nx, ny)).indices == (2:nx, 2:ny-1) + @test @YField((nx, ny)).indices == (2:nx-1, 2:ny ) + @test @ZField((nx, ny)).indices == (2:nx-1, 2:ny-1) + @test @BXField((nx, ny)).indices == (1:nx+1, 1:ny ) + @test @BYField((nx, ny)).indices == (1:nx, 1:ny+1) + @test @BZField((nx, ny)).indices == (1:nx, 1:ny ) + @test @XXField((nx, ny)).indices == (1:nx, 2:ny-1) + @test @YYField((nx, ny)).indices == (2:nx-1, 1:ny ) + @test @ZZField((nx, ny)).indices == (2:nx-1, 2:ny-1) + @test @XYField((nx, ny)).indices == (2:nx, 2:ny ) + @test @XZField((nx, ny)).indices == (2:nx, 2:ny-1) + @test @YZField((nx, ny)).indices == (2:nx-1, 2:ny ) + end; + @testset "view ranges (1D)" begin + @test @Field((nx,)).indices == (1:nx, ) + @test @XField((nx,)).indices == (2:nx, ) + @test @YField((nx,)).indices == (2:nx-1,) + @test @ZField((nx,)).indices == (2:nx-1,) + @test @BXField((nx,)).indices == (1:nx+1,) + @test @BYField((nx,)).indices == (1:nx, ) + @test @BZField((nx,)).indices == (1:nx, ) + @test @XXField((nx,)).indices == (1:nx, ) + @test @YYField((nx,)).indices == (2:nx-1,) + @test @ZZField((nx,)).indices == (2:nx-1,) + @test @XYField((nx,)).indices == (2:nx, ) + @test @XZField((nx,)).indices == (2:nx, ) + @test @YZField((nx,)).indices == (2:nx-1,) + end; + end; @testset "eltype" begin @test eltype(@Field((nx, ny, nz))) == Float16 @test eltype(@Field((nx, ny, nz), eltype=Float32)) == Float32 From 4a838ac5955caeb7175ecadd8bcebbb735ba5bd1 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Fri, 18 Oct 2024 17:51:45 +0200 Subject: [PATCH 07/29] add unit tests for padding initialization --- test/ParallelKernel/test_init_parallel_kernel.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/ParallelKernel/test_init_parallel_kernel.jl b/test/ParallelKernel/test_init_parallel_kernel.jl index d2599597..c539da73 100644 --- a/test/ParallelKernel/test_init_parallel_kernel.jl +++ b/test/ParallelKernel/test_init_parallel_kernel.jl @@ -1,7 +1,7 @@ using Test import ParallelStencil using ParallelStencil.ParallelKernel -import ParallelStencil.ParallelKernel: @reset_parallel_kernel, @is_initialized, @get_package, @get_numbertype, @get_inbounds, NUMBERTYPE_NONE, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, SCALARTYPES, ARRAYTYPES, FIELDTYPES +import ParallelStencil.ParallelKernel: @reset_parallel_kernel, @is_initialized, @get_package, @get_numbertype, @get_inbounds, @get_padding, NUMBERTYPE_NONE, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, SCALARTYPES, ARRAYTYPES, FIELDTYPES import ParallelStencil.ParallelKernel: @require, @symbols import ParallelStencil.ParallelKernel: extract_posargs_init, extract_kwargs_init, check_already_initialized, set_initialized, is_initialized, check_initialized using ParallelStencil.ParallelKernel.Exceptions @@ -26,6 +26,7 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t @test @get_package() == $package @test @get_numbertype() == ComplexF16 @test @get_inbounds() == false + @test @get_padding() == false end; @testset "Data" begin @test @isdefined(Data) @@ -81,14 +82,15 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t end; @reset_parallel_kernel() end; - @testset "2. initialization of ParallelKernel without numbertype, with inbounds" begin + @testset "2. initialization of ParallelKernel without numbertype, with inbounds and padding" begin @require !@is_initialized() - @init_parallel_kernel(package = $package, inbounds = true) + @init_parallel_kernel(package = $package, inbounds = true, padding = true) @testset "initialized" begin @test @is_initialized() @test @get_package() == $package @test @get_numbertype() == NUMBERTYPE_NONE @test @get_inbounds() == true + @test @get_padding() == true end; @testset "Data" begin # NOTE: no scalar types @test @isdefined(Data) From e44bef2d03f79f38460891535c5f7eebc56ccfbd Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Fri, 18 Oct 2024 17:51:58 +0200 Subject: [PATCH 08/29] add unit tests for padding initialization --- test/test_init_parallel_stencil.jl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/test/test_init_parallel_stencil.jl b/test/test_init_parallel_stencil.jl index c4d51706..5fdfc91c 100644 --- a/test/test_init_parallel_stencil.jl +++ b/test/test_init_parallel_stencil.jl @@ -1,8 +1,8 @@ using Test using ParallelStencil -import ParallelStencil: @reset_parallel_stencil, @is_initialized, @get_package, @get_numbertype, @get_ndims, @get_inbounds, @get_memopt, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, PKG_NONE, NUMBERTYPE_NONE, NDIMS_NONE +import ParallelStencil: @reset_parallel_stencil, @is_initialized, @get_package, @get_numbertype, @get_ndims, @get_inbounds, @get_padding, @get_memopt, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, PKG_NONE, NUMBERTYPE_NONE, NDIMS_NONE import ParallelStencil: @require, @symbols -import ParallelStencil: extract_posargs_init, extract_kwargs_init, check_already_initialized, set_initialized, is_initialized, check_initialized, set_package, set_numbertype, set_ndims, set_inbounds, set_memopt +import ParallelStencil: extract_posargs_init, extract_kwargs_init, check_already_initialized, set_initialized, is_initialized, check_initialized, set_package, set_numbertype, set_ndims, set_inbounds, set_padding, set_memopt using ParallelStencil.Exceptions TEST_PACKAGES = SUPPORTED_PACKAGES @static if PKG_CUDA in TEST_PACKAGES @@ -27,6 +27,7 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t @test @get_ndims() == 3 @test @get_memopt() == false @test @get_inbounds() == false + @test @get_padding() == false end; @testset "Data" begin @test @isdefined(Data) @@ -56,9 +57,9 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t end; @reset_parallel_stencil() end; - @testset "2. initialization of ParallelStencil without numbertype and ndims, with memopt, with inbounds" begin + @testset "2. initialization of ParallelStencil without numbertype and ndims, with memopt, inbounds and padding" begin @require !@is_initialized() - @init_parallel_stencil(package = $package, inbounds = true, memopt = true) + @init_parallel_stencil(package = $package, inbounds = true, padding = true, memopt = true) @testset "initialized" begin @test @is_initialized() @test @get_package() == $package @@ -66,6 +67,7 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t @test @get_ndims() == NDIMS_NONE @test @get_memopt() == true @test @get_inbounds() == true + @test @get_padding() == true end; @testset "Data" begin @test @isdefined(Data) @@ -90,6 +92,7 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t set_ndims(@__MODULE__, 3) set_memopt(@__MODULE__, false) set_inbounds(@__MODULE__, false) + set_padding(@__MODULE__, false) @require is_initialized(@__MODULE__) @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :Threads, Float64, 3, false, false) @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float32, 3, false, false) From 94b20ebeeaf6e7271107732892bd63a14d307ef8 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Mon, 21 Oct 2024 19:24:43 +0200 Subject: [PATCH 09/29] add padding in field allocators --- src/ParallelKernel/FieldAllocators.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ParallelKernel/FieldAllocators.jl b/src/ParallelKernel/FieldAllocators.jl index 0a37e9d1..aea0c1eb 100644 --- a/src/ParallelKernel/FieldAllocators.jl +++ b/src/ParallelKernel/FieldAllocators.jl @@ -29,7 +29,7 @@ To see a description of a macro type `?` (including the `@`). module FieldAllocators using ..Exceptions -import ..ParallelKernel: check_initialized, get_numbertype, extract_kwargvalues, split_args, clean_args, is_same, extract_tuple, extract_kwargs +import ..ParallelKernel: check_initialized, get_numbertype, get_padding, extract_kwargvalues, split_args, clean_args, is_same, extract_tuple, extract_kwargs import ..ParallelKernel: NUMBERTYPE_NONE, FIELDTYPES From bcf45274edef4ce83e26725afd65ff26e264d34d Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Mon, 21 Oct 2024 19:25:03 +0200 Subject: [PATCH 10/29] add padding in field allocators --- src/ParallelKernel/init_parallel_kernel.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ParallelKernel/init_parallel_kernel.jl b/src/ParallelKernel/init_parallel_kernel.jl index 6dcb322f..665efb5d 100644 --- a/src/ParallelKernel/init_parallel_kernel.jl +++ b/src/ParallelKernel/init_parallel_kernel.jl @@ -132,7 +132,7 @@ function extract_kwargs_nopos(caller::Module, kwargs::Dict) if (:padding in keys(kwargs)) padding_val = eval_arg(caller, kwargs[:padding]); check_padding(padding_val) else padding_val = false end - return inbounds_val, padding + return inbounds_val, padding_val end function define_import(caller::Module, package::Symbol, parent_module::String) From 04b47e907429fe1b94812fe053822c125f2258b7 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Mon, 21 Oct 2024 19:26:33 +0200 Subject: [PATCH 11/29] add padding in field allocators --- src/ParallelKernel/shared.jl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/ParallelKernel/shared.jl b/src/ParallelKernel/shared.jl index 1d00a8f1..8298c631 100644 --- a/src/ParallelKernel/shared.jl +++ b/src/ParallelKernel/shared.jl @@ -61,6 +61,7 @@ const ERRMSG_UNSUPPORTED_PACKAGE = "unsupported package for parallelization" const ERRMSG_CHECK_PACKAGE = "package has to be functional and one of the following: $(join(SUPPORTED_PACKAGES,", "))" const ERRMSG_CHECK_NUMBERTYPE = "numbertype has to be one of the following (and evaluatable at parse time): $(join(SUPPORTED_NUMBERTYPES,", "))" const ERRMSG_CHECK_INBOUNDS = "inbounds must be a evaluatable at parse time (e.g. literal or constant) and has to be of type Bool." +const ERRMSG_CHECK_PADDING = "padding must be a evaluatable at parse time (e.g. literal or constant) and has to be of type Bool." const ERRMSG_CHECK_LITERALTYPES = "the type given to 'literaltype' must be one of the following: $(join(SUPPORTED_LITERALTYPES,", "))" const CELLARRAY_BLOCKLENGTH = Dict(PKG_NONE => 0, @@ -402,6 +403,7 @@ inexpr_walk(expr, s::Symbol; match_only_head=false) = false inexpr_walk(expr, e::Expr) = false Base.unquoted(s::Symbol) = s +Base.unquoted(b::Bool) = b function extract_tuple(t::Union{Expr,Symbol}; nested=false) # NOTE: this could return a tuple, but would require to change all small arrays to tuples... if isa(t, Expr) && t.head == :tuple @@ -422,6 +424,7 @@ check_literaltype(T::DataType) = ( if !(T in SUPPORTED_LITERALTYPES) @ArgumentE check_numbertype(datatypes...) = check_numbertype.(datatypes) check_literaltype(datatypes...) = check_literaltype.(datatypes) check_inbounds(inbounds) = ( if !isa(inbounds, Bool) @ArgumentError("$ERRMSG_CHECK_INBOUNDS (obtained: $inbounds)." ) end ) +check_padding(padding) = ( if !isa(padding, Bool) @ArgumentError("$ERRMSG_CHECK_INBOUNDS (obtained: $padding)." ) end ) ## FUNCTIONS AND MACROS FOR UNIT TESTS @@ -441,6 +444,7 @@ macro prettyexpand(expr) return QuoteNode(remove_linenumbernodes!( macro gorgeousexpand(expr) return QuoteNode(simplify_varnames!(remove_linenumbernodes!(macroexpand(__module__, expr; recursive=true)))) end macro prettystring(args...) return esc(:(string(ParallelStencil.ParallelKernel.@prettyexpand($(args...))))) end macro gorgeousstring(args...) return esc(:(string(ParallelStencil.ParallelKernel.@gorgeousexpand($(args...))))) end +macro interpolate(args...) esc(interpolate(args...)) end function macroexpandn(m::Module, expr, n::Integer) for i = 1:n @@ -483,6 +487,15 @@ function simplify_varnames!(expr::Expr) end +function interpolate(sym::Symbol, vals::NTuple, block::Expr) + return quote + $((substitute(block, :(_$($sym)), val) for val in vals)...) + end +end + +interpolate(sym::Symbol, vals_expr::Expr, block::Expr) = interpolate(sym, (extract_tuple(vals_expr)...,), block) + + ## FUNCTIONS/MACROS FOR DIVERSE SYNTAX SUGAR iscpu(package) = return (package in (PKG_THREADS, PKG_POLYESTER)) From 443d17487cd30d01c3fcf18022d23d372cf7f512 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Mon, 21 Oct 2024 19:26:55 +0200 Subject: [PATCH 12/29] add padding in field allocators --- src/shared.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shared.jl b/src/shared.jl index 9f47b7c0..a1faa66e 100644 --- a/src/shared.jl +++ b/src/shared.jl @@ -1,7 +1,7 @@ import MacroTools: @capture, postwalk, splitdef, splitarg # NOTE: inexpr_walk used instead of MacroTools.inexpr import .ParallelKernel: eval_arg, split_args, split_kwargs, extract_posargs_init, extract_kernel_args, insert_device_types, is_kernel, is_call, gensym_world, isgpu, iscpu, @isgpu, @iscpu, substitute, substitute_in_kernel, in_signature, inexpr_walk, adjust_signatures, handle_indices_and_literals, add_inbounds, cast, @ranges, @rangelengths, @return_value, @return_nothing import .ParallelKernel: PKG_CUDA, PKG_AMDGPU, PKG_THREADS, PKG_POLYESTER, PKG_NONE, NUMBERTYPE_NONE, SUPPORTED_NUMBERTYPES, SUPPORTED_PACKAGES, ERRMSG_UNSUPPORTED_PACKAGE, INT_CUDA, INT_AMDGPU, INT_POLYESTER, INT_THREADS, INDICES, PKNumber, RANGES_VARNAME, RANGES_TYPE, RANGELENGTH_XYZ_TYPE, RANGELENGTHS_VARNAMES, THREADIDS_VARNAMES, GENSYM_SEPARATOR, AD_SUPPORTED_ANNOTATIONS -import .ParallelKernel: @require, @symbols, symbols, longnameof, @prettyexpand, @prettystring, prettystring, @gorgeousexpand, @gorgeousstring, gorgeousstring +import .ParallelKernel: @require, @symbols, symbols, longnameof, @prettyexpand, @prettystring, prettystring, @gorgeousexpand, @gorgeousstring, gorgeousstring, @interpolate ## CONSTANTS From 153c1a1f888aff5a0e3d99be971ce9d11fb4a508 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Mon, 21 Oct 2024 19:27:27 +0200 Subject: [PATCH 13/29] add unit tests for padding initialization --- test/ParallelKernel/test_allocators.jl | 498 +++++++++++++------------ 1 file changed, 251 insertions(+), 247 deletions(-) diff --git a/test/ParallelKernel/test_allocators.jl b/test/ParallelKernel/test_allocators.jl index f84bafa6..57030ced 100644 --- a/test/ParallelKernel/test_allocators.jl +++ b/test/ParallelKernel/test_allocators.jl @@ -3,7 +3,7 @@ using CellArrays, StaticArrays import ParallelStencil using ParallelStencil.ParallelKernel import ParallelStencil.ParallelKernel: @reset_parallel_kernel, @is_initialized, @get_numbertype, NUMBERTYPE_NONE, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU -import ParallelStencil.ParallelKernel: @require, @prettystring, @gorgeousstring +import ParallelStencil.ParallelKernel: @require, @prettystring, @gorgeousstring, interpolate import ParallelStencil.ParallelKernel: checkargs_CellType, _CellType using ParallelStencil.ParallelKernel.FieldAllocators import ParallelStencil.ParallelKernel.FieldAllocators: checksargs_field_macros, checkargs_allocate @@ -458,260 +458,264 @@ const DATA_INDEX = ParallelStencil.INT_THREADS # TODO: using Data.Index does not end @reset_parallel_kernel() end; - @testset "6. Fields (padding=$padding)" for padding in (false, true) - @require !@is_initialized() - @init_parallel_kernel($package, Float16, padding=$padding) - @require @is_initialized() - (nx, ny, nz) = (3, 4, 5) - @testset "mapping to array allocators" begin - @testset "Field" begin - @test occursin("@zeros", @prettystring(1, @Field((nx, ny, nz)))) - @test occursin("@zeros", @prettystring(1, @Field((nx, ny, nz), @zeros))) - @test occursin("@ones", @prettystring(1, @Field((nx, ny, nz), @ones))) - @test occursin("@rand", @prettystring(1, @Field((nx, ny, nz), @rand))) - @test occursin("@falses",@prettystring(1, @Field((nx, ny, nz), @falses))) - @test occursin("@trues", @prettystring(1, @Field((nx, ny, nz), @trues))) - end; - @testset "[B]{X|Y|Z}Field" begin - @test occursin("@zeros", @prettystring(1, @XField((nx, ny, nz)))) - @test occursin("@zeros", @prettystring(1, @YField((nx, ny, nz), @zeros))) - @test occursin("@ones", @prettystring(1, @ZField((nx, ny, nz), @ones))) - @test occursin("@rand", @prettystring(1, @BXField((nx, ny, nz), @rand))) - @test occursin("@falses",@prettystring(1, @BYField((nx, ny, nz), @falses))) - @test occursin("@trues", @prettystring(1, @BZField((nx, ny, nz), @trues))) - end; - @testset "{XX|YY|ZZ|XY|XZ|YZ}Field" begin - @test occursin("@zeros", @prettystring(1, @XXField((nx, ny, nz), eltype=Float32))) - @test occursin("@zeros", @prettystring(1, @YYField((nx, ny, nz), @zeros, eltype=Float32))) - @test occursin("@ones", @prettystring(1, @ZZField((nx, ny, nz), @ones, eltype=Float32))) - @test occursin("@rand", @prettystring(1, @XYField((nx, ny, nz), @rand, eltype=Float32))) - @test occursin("@falses",@prettystring(1, @XZField((nx, ny, nz), @falses, eltype=Float32))) - @test occursin("@trues", @prettystring(1, @YZField((nx, ny, nz), @trues, eltype=Float32))) - end; - end; - @testset "field size (3D)" begin - @test size( @Field((nx, ny, nz))) == (nx, ny, nz ) - @test size( @XField((nx, ny, nz))) == (nx-1, ny-2, nz-2) - @test size( @YField((nx, ny, nz))) == (nx-2, ny-1, nz-2) - @test size( @ZField((nx, ny, nz))) == (nx-2, ny-2, nz-1) - @test size(@BXField((nx, ny, nz))) == (nx+1, ny, nz ) - @test size(@BYField((nx, ny, nz))) == (nx, ny+1, nz ) - @test size(@BZField((nx, ny, nz))) == (nx, ny, nz+1) - @test size(@XXField((nx, ny, nz))) == (nx, ny-2, nz-2) - @test size(@YYField((nx, ny, nz))) == (nx-2, ny, nz-2) - @test size(@ZZField((nx, ny, nz))) == (nx-2, ny-2, nz ) - @test size(@XYField((nx, ny, nz))) == (nx-1, ny-1, nz-2) - @test size(@XZField((nx, ny, nz))) == (nx-1, ny-2, nz-1) - @test size(@YZField((nx, ny, nz))) == (nx-2, ny-1, nz-1) - @test size.(Tuple( @VectorField((nx, ny, nz)))) == (size( @XField((nx, ny, nz))), size( @YField((nx, ny, nz))), size( @ZField((nx, ny, nz)))) - @test size.(Tuple(@BVectorField((nx, ny, nz)))) == (size(@BXField((nx, ny, nz))), size(@BYField((nx, ny, nz))), size(@BZField((nx, ny, nz)))) - @test size.(Tuple( @TensorField((nx, ny, nz)))) == (size(@XXField((nx, ny, nz))), size(@YYField((nx, ny, nz))), size(@ZZField((nx, ny, nz))), - size(@XYField((nx, ny, nz))), size(@XZField((nx, ny, nz))), size(@YZField((nx, ny, nz)))) - end; - @testset "field size (2D)" begin - @test size( @Field((nx, ny))) == (nx, ny, ) - @test size( @XField((nx, ny))) == (nx-1, ny-2) - @test size( @YField((nx, ny))) == (nx-2, ny-1) - @test size( @ZField((nx, ny))) == (nx-2, ny-2) - @test size(@BXField((nx, ny))) == (nx+1, ny, ) - @test size(@BYField((nx, ny))) == (nx, ny+1) - @test size(@BZField((nx, ny))) == (nx, ny, ) - @test size(@XXField((nx, ny))) == (nx, ny-2) - @test size(@YYField((nx, ny))) == (nx-2, ny, ) - @test size(@ZZField((nx, ny))) == (nx-2, ny-2) - @test size(@XYField((nx, ny))) == (nx-1, ny-1) - @test size(@XZField((nx, ny))) == (nx-1, ny-2) - @test size(@YZField((nx, ny))) == (nx-2, ny-1) - @test size.(Tuple( @VectorField((nx, ny)))) == (size( @XField((nx, ny))), size( @YField((nx, ny)))) - @test size.(Tuple(@BVectorField((nx, ny)))) == (size(@BXField((nx, ny))), size(@BYField((nx, ny)))) - @test size.(Tuple( @TensorField((nx, ny)))) == (size(@XXField((nx, ny))), size(@YYField((nx, ny))), - size(@XYField((nx, ny)))) - end; - @testset "field size (1D)" begin - @test size( @Field((nx,))) == (nx, ) - @test size( @XField((nx,))) == (nx-1,) - @test size( @YField((nx,))) == (nx-2,) - @test size( @ZField((nx,))) == (nx-2,) - @test size(@BXField((nx,))) == (nx+1,) - @test size(@BYField((nx,))) == (nx, ) - @test size(@BZField((nx,))) == (nx, ) - @test size(@XXField((nx,))) == (nx, ) - @test size(@YYField((nx,))) == (nx-2,) - @test size(@ZZField((nx,))) == (nx-2,) - @test size(@XYField((nx,))) == (nx-1,) - @test size(@XZField((nx,))) == (nx-1,) - @test size(@YZField((nx,))) == (nx-2,) - @test size.(Tuple( @VectorField((nx,)))) == (size( @XField((nx,))),) - @test size.(Tuple(@BVectorField((nx,)))) == (size(@BXField((nx,))),) - @test size.(Tuple( @TensorField((nx,)))) == (size(@XXField((nx,))),) - end; - @static if $padding - @testset "array size (3D)" begin - @test size( @Field((nx, ny, nz)).parent) == (nx, ny, nz ) - @test size( @XField((nx, ny, nz)).parent) == (nx+1, ny, nz ) - @test size( @YField((nx, ny, nz)).parent) == (nx, ny+1, nz ) - @test size( @ZField((nx, ny, nz)).parent) == (nx, ny, nz+1) - @test size(@BXField((nx, ny, nz)).parent) == (nx+1, ny, nz ) - @test size(@BYField((nx, ny, nz)).parent) == (nx, ny+1, nz ) - @test size(@BZField((nx, ny, nz)).parent) == (nx, ny, nz+1) - @test size(@XXField((nx, ny, nz)).parent) == (nx, ny, nz ) - @test size(@YYField((nx, ny, nz)).parent) == (nx, ny, nz ) - @test size(@ZZField((nx, ny, nz)).parent) == (nx, ny, nz ) - @test size(@XYField((nx, ny, nz)).parent) == (nx+1, ny+1, nz ) - @test size(@XZField((nx, ny, nz)).parent) == (nx+1, ny, nz+1) - @test size(@YZField((nx, ny, nz)).parent) == (nx, ny+1, nz+1) - end; - @testset "array size (2D)" begin - @test size( @Field((nx, ny)).parent) == (nx, ny ) - @test size( @XField((nx, ny)).parent) == (nx+1, ny ) - @test size( @YField((nx, ny)).parent) == (nx, ny+1) - @test size( @ZField((nx, ny)).parent) == (nx, ny ) - @test size(@BXField((nx, ny)).parent) == (nx+1, ny ) - @test size(@BYField((nx, ny)).parent) == (nx, ny+1) - @test size(@BZField((nx, ny)).parent) == (nx, ny ) - @test size(@XXField((nx, ny)).parent) == (nx, ny ) - @test size(@YYField((nx, ny)).parent) == (nx, ny ) - @test size(@ZZField((nx, ny)).parent) == (nx, ny ) - @test size(@XYField((nx, ny)).parent) == (nx+1, ny+1) - @test size(@XZField((nx, ny)).parent) == (nx+1, ny ) - @test size(@YZField((nx, ny)).parent) == (nx, ny+1) + $(interpolate(:padding, (false, true), :( + @testset "6. Fields (padding=$(_$padding))" begin + @require !@is_initialized() + @init_parallel_kernel($package, Float16, padding=_$padding) + @require @is_initialized() + (nx, ny, nz) = (3, 4, 5) + @testset "mapping to array allocators" begin + @testset "Field" begin + @test occursin("@zeros", @prettystring(1, @Field((nx, ny, nz)))) + @test occursin("@zeros", @prettystring(1, @Field((nx, ny, nz), @zeros))) + @test occursin("@ones", @prettystring(1, @Field((nx, ny, nz), @ones))) + @test occursin("@rand", @prettystring(1, @Field((nx, ny, nz), @rand))) + @test occursin("@falses",@prettystring(1, @Field((nx, ny, nz), @falses))) + @test occursin("@trues", @prettystring(1, @Field((nx, ny, nz), @trues))) + end; + @testset "[B]{X|Y|Z}Field" begin + @test occursin("@zeros", @prettystring(1, @XField((nx, ny, nz)))) + @test occursin("@zeros", @prettystring(1, @YField((nx, ny, nz), @zeros))) + @test occursin("@ones", @prettystring(1, @ZField((nx, ny, nz), @ones))) + @test occursin("@rand", @prettystring(1, @BXField((nx, ny, nz), @rand))) + @test occursin("@falses",@prettystring(1, @BYField((nx, ny, nz), @falses))) + @test occursin("@trues", @prettystring(1, @BZField((nx, ny, nz), @trues))) + end; + @testset "{XX|YY|ZZ|XY|XZ|YZ}Field" begin + @test occursin("@zeros", @prettystring(1, @XXField((nx, ny, nz), eltype=Float32))) + @test occursin("@zeros", @prettystring(1, @YYField((nx, ny, nz), @zeros, eltype=Float32))) + @test occursin("@ones", @prettystring(1, @ZZField((nx, ny, nz), @ones, eltype=Float32))) + @test occursin("@rand", @prettystring(1, @XYField((nx, ny, nz), @rand, eltype=Float32))) + @test occursin("@falses",@prettystring(1, @XZField((nx, ny, nz), @falses, eltype=Float32))) + @test occursin("@trues", @prettystring(1, @YZField((nx, ny, nz), @trues, eltype=Float32))) + end; end; - @testset "array size (1D)" begin - @test size( @Field((nx,)).parent) == (nx, ) - @test size( @XField((nx,)).parent) == (nx+1,) - @test size( @YField((nx,)).parent) == (nx, ) - @test size( @ZField((nx,)).parent) == (nx, ) - @test size(@BXField((nx,)).parent) == (nx+1,) - @test size(@BYField((nx,)).parent) == (nx, ) - @test size(@BZField((nx,)).parent) == (nx, ) - @test size(@XXField((nx,)).parent) == (nx, ) - @test size(@YYField((nx,)).parent) == (nx, ) - @test size(@ZZField((nx,)).parent) == (nx, ) - @test size(@XYField((nx,)).parent) == (nx+1,) - @test size(@XZField((nx,)).parent) == (nx+1,) - @test size(@YZField((nx,)).parent) == (nx, ) + @testset "field size (3D)" begin + @test size( @Field((nx, ny, nz))) == (nx, ny, nz ) + @test size( @XField((nx, ny, nz))) == (nx-1, ny-2, nz-2) + @test size( @YField((nx, ny, nz))) == (nx-2, ny-1, nz-2) + @test size( @ZField((nx, ny, nz))) == (nx-2, ny-2, nz-1) + @test size(@BXField((nx, ny, nz))) == (nx+1, ny, nz ) + @test size(@BYField((nx, ny, nz))) == (nx, ny+1, nz ) + @test size(@BZField((nx, ny, nz))) == (nx, ny, nz+1) + @test size(@XXField((nx, ny, nz))) == (nx, ny-2, nz-2) + @test size(@YYField((nx, ny, nz))) == (nx-2, ny, nz-2) + @test size(@ZZField((nx, ny, nz))) == (nx-2, ny-2, nz ) + @test size(@XYField((nx, ny, nz))) == (nx-1, ny-1, nz-2) + @test size(@XZField((nx, ny, nz))) == (nx-1, ny-2, nz-1) + @test size(@YZField((nx, ny, nz))) == (nx-2, ny-1, nz-1) + @test size.(Tuple( @VectorField((nx, ny, nz)))) == (size( @XField((nx, ny, nz))), size( @YField((nx, ny, nz))), size( @ZField((nx, ny, nz)))) + @test size.(Tuple(@BVectorField((nx, ny, nz)))) == (size(@BXField((nx, ny, nz))), size(@BYField((nx, ny, nz))), size(@BZField((nx, ny, nz)))) + @test size.(Tuple( @TensorField((nx, ny, nz)))) == (size(@XXField((nx, ny, nz))), size(@YYField((nx, ny, nz))), size(@ZZField((nx, ny, nz))), + size(@XYField((nx, ny, nz))), size(@XZField((nx, ny, nz))), size(@YZField((nx, ny, nz)))) end; - @testset "view ranges (3D)" begin - @test @Field((nx, ny, nz)).indices == (1:nx, 1:ny, 1:nz ) - @test @XField((nx, ny, nz)).indices == (2:nx, 2:ny-1, 2:nz-1) - @test @YField((nx, ny, nz)).indices == (2:nx-1, 2:ny, 2:nz-1) - @test @ZField((nx, ny, nz)).indices == (2:nx-1, 2:ny-1, 2:nz ) - @test @BXField((nx, ny, nz)).indices == (1:nx+1, 1:ny, 1:nz ) - @test @BYField((nx, ny, nz)).indices == (1:nx, 1:ny+1, 1:nz ) - @test @BZField((nx, ny, nz)).indices == (1:nx, 1:ny, 1:nz+1) - @test @XXField((nx, ny, nz)).indices == (1:nx, 2:ny-1, 2:nz-1) - @test @YYField((nx, ny, nz)).indices == (2:nx-1, 1:ny, 2:nz-1) - @test @ZZField((nx, ny, nz)).indices == (2:nx-1, 2:ny-1, 1:nz ) - @test @XYField((nx, ny, nz)).indices == (2:nx, 2:ny, 2:nz-1) - @test @XZField((nx, ny, nz)).indices == (2:nx, 2:ny-1, 2:nz ) - @test @YZField((nx, ny, nz)).indices == (2:nx-1, 2:ny, 2:nz ) + @testset "field size (2D)" begin + @test size( @Field((nx, ny))) == (nx, ny, ) + @test size( @XField((nx, ny))) == (nx-1, ny-2) + @test size( @YField((nx, ny))) == (nx-2, ny-1) + @test size( @ZField((nx, ny))) == (nx-2, ny-2) + @test size(@BXField((nx, ny))) == (nx+1, ny, ) + @test size(@BYField((nx, ny))) == (nx, ny+1) + @test size(@BZField((nx, ny))) == (nx, ny, ) + @test size(@XXField((nx, ny))) == (nx, ny-2) + @test size(@YYField((nx, ny))) == (nx-2, ny, ) + @test size(@ZZField((nx, ny))) == (nx-2, ny-2) + @test size(@XYField((nx, ny))) == (nx-1, ny-1) + @test size(@XZField((nx, ny))) == (nx-1, ny-2) + @test size(@YZField((nx, ny))) == (nx-2, ny-1) + @test size.(Tuple( @VectorField((nx, ny)))) == (size( @XField((nx, ny))), size( @YField((nx, ny)))) + @test size.(Tuple(@BVectorField((nx, ny)))) == (size(@BXField((nx, ny))), size(@BYField((nx, ny)))) + @test size.(Tuple( @TensorField((nx, ny)))) == (size(@XXField((nx, ny))), size(@YYField((nx, ny))), + size(@XYField((nx, ny)))) end; - @testset "view ranges (2D)" begin - @test @Field((nx, ny)).indices == (1:nx, 1:ny ) - @test @XField((nx, ny)).indices == (2:nx, 2:ny-1) - @test @YField((nx, ny)).indices == (2:nx-1, 2:ny ) - @test @ZField((nx, ny)).indices == (2:nx-1, 2:ny-1) - @test @BXField((nx, ny)).indices == (1:nx+1, 1:ny ) - @test @BYField((nx, ny)).indices == (1:nx, 1:ny+1) - @test @BZField((nx, ny)).indices == (1:nx, 1:ny ) - @test @XXField((nx, ny)).indices == (1:nx, 2:ny-1) - @test @YYField((nx, ny)).indices == (2:nx-1, 1:ny ) - @test @ZZField((nx, ny)).indices == (2:nx-1, 2:ny-1) - @test @XYField((nx, ny)).indices == (2:nx, 2:ny ) - @test @XZField((nx, ny)).indices == (2:nx, 2:ny-1) - @test @YZField((nx, ny)).indices == (2:nx-1, 2:ny ) + @testset "field size (1D)" begin + @test size( @Field((nx,))) == (nx, ) + @test size( @XField((nx,))) == (nx-1,) + @test size( @YField((nx,))) == (nx-2,) + @test size( @ZField((nx,))) == (nx-2,) + @test size(@BXField((nx,))) == (nx+1,) + @test size(@BYField((nx,))) == (nx, ) + @test size(@BZField((nx,))) == (nx, ) + @test size(@XXField((nx,))) == (nx, ) + @test size(@YYField((nx,))) == (nx-2,) + @test size(@ZZField((nx,))) == (nx-2,) + @test size(@XYField((nx,))) == (nx-1,) + @test size(@XZField((nx,))) == (nx-1,) + @test size(@YZField((nx,))) == (nx-2,) + @test size.(Tuple( @VectorField((nx,)))) == (size( @XField((nx,))),) + @test size.(Tuple(@BVectorField((nx,)))) == (size(@BXField((nx,))),) + @test size.(Tuple( @TensorField((nx,)))) == (size(@XXField((nx,))),) end; - @testset "view ranges (1D)" begin - @test @Field((nx,)).indices == (1:nx, ) - @test @XField((nx,)).indices == (2:nx, ) - @test @YField((nx,)).indices == (2:nx-1,) - @test @ZField((nx,)).indices == (2:nx-1,) - @test @BXField((nx,)).indices == (1:nx+1,) - @test @BYField((nx,)).indices == (1:nx, ) - @test @BZField((nx,)).indices == (1:nx, ) - @test @XXField((nx,)).indices == (1:nx, ) - @test @YYField((nx,)).indices == (2:nx-1,) - @test @ZZField((nx,)).indices == (2:nx-1,) - @test @XYField((nx,)).indices == (2:nx, ) - @test @XZField((nx,)).indices == (2:nx, ) - @test @YZField((nx,)).indices == (2:nx-1,) - end; - end; - @testset "eltype" begin - @test eltype(@Field((nx, ny, nz))) == Float16 - @test eltype(@Field((nx, ny, nz), eltype=Float32)) == Float32 - @test eltype.(Tuple(@VectorField((nx, ny, nz)))) == (Float16, Float16, Float16) - @test eltype.(Tuple(@VectorField((nx, ny, nz), eltype=Float32))) == (Float32, Float32, Float32) - @test eltype.(Tuple(@BVectorField((nx, ny, nz)))) == (Float16, Float16, Float16) - @test eltype.(Tuple(@BVectorField((nx, ny, nz), eltype=Float32))) == (Float32, Float32, Float32) - @test eltype.(Tuple(@TensorField((nx, ny, nz)))) == (Float16, Float16, Float16, Float16, Float16, Float16) - @test eltype.(Tuple(@TensorField((nx, ny, nz), eltype=Float32))) == (Float32, Float32, Float32, Float32, Float32, Float32) - end; - @testset "@allocate" begin - @testset "single field" begin - @test occursin("F = @Field((nx, ny, nz), @zeros(), eltype = Float16)", @prettystring(1, @allocate(gridsize = (nx,ny,nz), fields = (Field=>F)))) - @test occursin("F = @Field(nxyz, @zeros(), eltype = Float16)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F))) - @test occursin("F = @Field(nxyz, @ones(), eltype = Float16)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, allocator=@ones))) - @test occursin("F = @Field(nxyz, @rand(), eltype = Float16)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, allocator=@rand))) - @test occursin("F = @Field(nxyz, @falses(), eltype = Float16)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, allocator=@falses))) - @test occursin("F = @Field(nxyz, @trues(), eltype = Float16)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, allocator=@trues))) - @test occursin("F = @Field(nxyz, @zeros(), eltype = Float32)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, eltype=Float32))) - @test occursin("F = @Field(nxyz, @rand(), eltype = Float32)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, eltype=Float32, allocator=@rand))) + @static if _$padding + @testset "array size (3D)" begin + @test size( @Field((nx, ny, nz)).parent) == (nx, ny, nz ) + @test size( @XField((nx, ny, nz)).parent) == (nx+1, ny, nz ) + @test size( @YField((nx, ny, nz)).parent) == (nx, ny+1, nz ) + @test size( @ZField((nx, ny, nz)).parent) == (nx, ny, nz+1) + @test size(@BXField((nx, ny, nz)).parent) == (nx+1, ny, nz ) + @test size(@BYField((nx, ny, nz)).parent) == (nx, ny+1, nz ) + @test size(@BZField((nx, ny, nz)).parent) == (nx, ny, nz+1) + @test size(@XXField((nx, ny, nz)).parent) == (nx, ny, nz ) + @test size(@YYField((nx, ny, nz)).parent) == (nx, ny, nz ) + @test size(@ZZField((nx, ny, nz)).parent) == (nx, ny, nz ) + @test size(@XYField((nx, ny, nz)).parent) == (nx+1, ny+1, nz ) + @test size(@XZField((nx, ny, nz)).parent) == (nx+1, ny, nz+1) + @test size(@YZField((nx, ny, nz)).parent) == (nx, ny+1, nz+1) + end; + @testset "array size (2D)" begin + @test size( @Field((nx, ny)).parent) == (nx, ny ) + @test size( @XField((nx, ny)).parent) == (nx+1, ny ) + @test size( @YField((nx, ny)).parent) == (nx, ny+1) + @test size( @ZField((nx, ny)).parent) == (nx, ny ) + @test size(@BXField((nx, ny)).parent) == (nx+1, ny ) + @test size(@BYField((nx, ny)).parent) == (nx, ny+1) + @test size(@BZField((nx, ny)).parent) == (nx, ny ) + @test size(@XXField((nx, ny)).parent) == (nx, ny ) + @test size(@YYField((nx, ny)).parent) == (nx, ny ) + @test size(@ZZField((nx, ny)).parent) == (nx, ny ) + @test size(@XYField((nx, ny)).parent) == (nx+1, ny+1) + @test size(@XZField((nx, ny)).parent) == (nx+1, ny ) + @test size(@YZField((nx, ny)).parent) == (nx, ny+1) + end; + # TODO: these tests fail for CUDA (most certainly a bug in CUDA) + # @testset "array size (1D)" begin + # @test size( @Field((nx,)).parent) == (nx, ) + # @test size( @XField((nx,)).parent) == (nx+1,) + # @test size( @YField((nx,)).parent) == (nx, ) + # @test size( @ZField((nx,)).parent) == (nx, ) + # @test size(@BXField((nx,)).parent) == (nx+1,) + # @test size(@BYField((nx,)).parent) == (nx, ) + # @test size(@BZField((nx,)).parent) == (nx, ) + # @test size(@XXField((nx,)).parent) == (nx, ) + # @test size(@YYField((nx,)).parent) == (nx, ) + # @test size(@ZZField((nx,)).parent) == (nx, ) + # @test size(@XYField((nx,)).parent) == (nx+1,) + # @test size(@XZField((nx,)).parent) == (nx+1,) + # @test size(@YZField((nx,)).parent) == (nx, ) + # end; + @testset "view ranges (3D)" begin + @test @Field((nx, ny, nz)).indices == (1:nx, 1:ny, 1:nz ) + @test @XField((nx, ny, nz)).indices == (2:nx, 2:ny-1, 2:nz-1) + @test @YField((nx, ny, nz)).indices == (2:nx-1, 2:ny, 2:nz-1) + @test @ZField((nx, ny, nz)).indices == (2:nx-1, 2:ny-1, 2:nz ) + @test @BXField((nx, ny, nz)).indices == (1:nx+1, 1:ny, 1:nz ) + @test @BYField((nx, ny, nz)).indices == (1:nx, 1:ny+1, 1:nz ) + @test @BZField((nx, ny, nz)).indices == (1:nx, 1:ny, 1:nz+1) + @test @XXField((nx, ny, nz)).indices == (1:nx, 2:ny-1, 2:nz-1) + @test @YYField((nx, ny, nz)).indices == (2:nx-1, 1:ny, 2:nz-1) + @test @ZZField((nx, ny, nz)).indices == (2:nx-1, 2:ny-1, 1:nz ) + @test @XYField((nx, ny, nz)).indices == (2:nx, 2:ny, 2:nz-1) + @test @XZField((nx, ny, nz)).indices == (2:nx, 2:ny-1, 2:nz ) + @test @YZField((nx, ny, nz)).indices == (2:nx-1, 2:ny, 2:nz ) + end; + @testset "view ranges (2D)" begin + @test @Field((nx, ny)).indices == (1:nx, 1:ny ) + @test @XField((nx, ny)).indices == (2:nx, 2:ny-1) + @test @YField((nx, ny)).indices == (2:nx-1, 2:ny ) + @test @ZField((nx, ny)).indices == (2:nx-1, 2:ny-1) + @test @BXField((nx, ny)).indices == (1:nx+1, 1:ny ) + @test @BYField((nx, ny)).indices == (1:nx, 1:ny+1) + @test @BZField((nx, ny)).indices == (1:nx, 1:ny ) + @test @XXField((nx, ny)).indices == (1:nx, 2:ny-1) + @test @YYField((nx, ny)).indices == (2:nx-1, 1:ny ) + @test @ZZField((nx, ny)).indices == (2:nx-1, 2:ny-1) + @test @XYField((nx, ny)).indices == (2:nx, 2:ny ) + @test @XZField((nx, ny)).indices == (2:nx, 2:ny-1) + @test @YZField((nx, ny)).indices == (2:nx-1, 2:ny ) + end; + # TODO: these tests fail for CUDA (most certainly a bug in CUDA) + # @testset "view ranges (1D)" begin + # @test @Field((nx,)).indices == (1:nx, ) + # @test @XField((nx,)).indices == (2:nx, ) + # @test @YField((nx,)).indices == (2:nx-1,) + # @test @ZField((nx,)).indices == (2:nx-1,) + # @test @BXField((nx,)).indices == (1:nx+1,) + # @test @BYField((nx,)).indices == (1:nx, ) + # @test @BZField((nx,)).indices == (1:nx, ) + # @test @XXField((nx,)).indices == (1:nx, ) + # @test @YYField((nx,)).indices == (2:nx-1,) + # @test @ZZField((nx,)).indices == (2:nx-1,) + # @test @XYField((nx,)).indices == (2:nx, ) + # @test @XZField((nx,)).indices == (2:nx, ) + # @test @YZField((nx,)).indices == (2:nx-1,) + # end; end; - @testset "multiple fields - one per type (default allocator and eltype)" begin - call = @prettystring(1, @allocate(gridsize = nxyz, - fields = (Field => F, - XField => X, - YField => Y, - ZField => Z, - BXField => BX, - BYField => BY, - BZField => BZ, - XXField => XX, - YYField => YY, - ZZField => ZZ, - XYField => XY, - XZField => XZ, - YZField => YZ, - VectorField => V, - BVectorField => BV, - TensorField => T) )) - @test occursin("F = @Field(nxyz, @zeros(), eltype = Float16)", call) - @test occursin("X = @XField(nxyz, @zeros(), eltype = Float16)", call) - @test occursin("Y = @YField(nxyz, @zeros(), eltype = Float16)", call) - @test occursin("Z = @ZField(nxyz, @zeros(), eltype = Float16)", call) - @test occursin("BX = @BXField(nxyz, @zeros(), eltype = Float16)", call) - @test occursin("BY = @BYField(nxyz, @zeros(), eltype = Float16)", call) - @test occursin("BZ = @BZField(nxyz, @zeros(), eltype = Float16)", call) - @test occursin("XX = @XXField(nxyz, @zeros(), eltype = Float16)", call) - @test occursin("YY = @YYField(nxyz, @zeros(), eltype = Float16)", call) - @test occursin("ZZ = @ZZField(nxyz, @zeros(), eltype = Float16)", call) - @test occursin("XY = @XYField(nxyz, @zeros(), eltype = Float16)", call) - @test occursin("XZ = @XZField(nxyz, @zeros(), eltype = Float16)", call) - @test occursin("YZ = @YZField(nxyz, @zeros(), eltype = Float16)", call) - @test occursin("V = @VectorField(nxyz, @zeros(), eltype = Float16)", call) - @test occursin("BV = @BVectorField(nxyz, @zeros(), eltype = Float16)", call) - @test occursin("T = @TensorField(nxyz, @zeros(), eltype = Float16)", call) + @testset "eltype" begin + @test eltype(@Field((nx, ny, nz))) == Float16 + @test eltype(@Field((nx, ny, nz), eltype=Float32)) == Float32 + @test eltype.(Tuple(@VectorField((nx, ny, nz)))) == (Float16, Float16, Float16) + @test eltype.(Tuple(@VectorField((nx, ny, nz), eltype=Float32))) == (Float32, Float32, Float32) + @test eltype.(Tuple(@BVectorField((nx, ny, nz)))) == (Float16, Float16, Float16) + @test eltype.(Tuple(@BVectorField((nx, ny, nz), eltype=Float32))) == (Float32, Float32, Float32) + @test eltype.(Tuple(@TensorField((nx, ny, nz)))) == (Float16, Float16, Float16, Float16, Float16, Float16) + @test eltype.(Tuple(@TensorField((nx, ny, nz), eltype=Float32))) == (Float32, Float32, Float32, Float32, Float32, Float32) end; - @testset "multiple fields - multiple per type (custom allocator and eltype)" begin - call = @prettystring(1, @allocate(gridsize = nxyz, - fields = (Field => (F1, F2), - XField => X, - VectorField => (V1, V2, V3), - TensorField => T), - allocator = @rand, - eltype = Float32) ) - @test occursin("F1 = @Field(nxyz, @rand(), eltype = Float32)", call) - @test occursin("F2 = @Field(nxyz, @rand(), eltype = Float32)", call) - @test occursin("X = @XField(nxyz, @rand(), eltype = Float32)", call) - @test occursin("V1 = @VectorField(nxyz, @rand(), eltype = Float32)", call) - @test occursin("V2 = @VectorField(nxyz, @rand(), eltype = Float32)", call) - @test occursin("V3 = @VectorField(nxyz, @rand(), eltype = Float32)", call) - @test occursin("T = @TensorField(nxyz, @rand(), eltype = Float32)", call) + @testset "@allocate" begin + @testset "single field" begin + @test occursin("F = @Field((nx, ny, nz), @zeros(), eltype = Float16)", @prettystring(1, @allocate(gridsize = (nx,ny,nz), fields = (Field=>F)))) + @test occursin("F = @Field(nxyz, @zeros(), eltype = Float16)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F))) + @test occursin("F = @Field(nxyz, @ones(), eltype = Float16)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, allocator=@ones))) + @test occursin("F = @Field(nxyz, @rand(), eltype = Float16)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, allocator=@rand))) + @test occursin("F = @Field(nxyz, @falses(), eltype = Float16)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, allocator=@falses))) + @test occursin("F = @Field(nxyz, @trues(), eltype = Float16)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, allocator=@trues))) + @test occursin("F = @Field(nxyz, @zeros(), eltype = Float32)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, eltype=Float32))) + @test occursin("F = @Field(nxyz, @rand(), eltype = Float32)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, eltype=Float32, allocator=@rand))) + end; + @testset "multiple fields - one per type (default allocator and eltype)" begin + call = @prettystring(1, @allocate(gridsize = nxyz, + fields = (Field => F, + XField => X, + YField => Y, + ZField => Z, + BXField => BX, + BYField => BY, + BZField => BZ, + XXField => XX, + YYField => YY, + ZZField => ZZ, + XYField => XY, + XZField => XZ, + YZField => YZ, + VectorField => V, + BVectorField => BV, + TensorField => T) )) + @test occursin("F = @Field(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("X = @XField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("Y = @YField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("Z = @ZField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("BX = @BXField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("BY = @BYField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("BZ = @BZField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("XX = @XXField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("YY = @YYField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("ZZ = @ZZField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("XY = @XYField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("XZ = @XZField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("YZ = @YZField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("V = @VectorField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("BV = @BVectorField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("T = @TensorField(nxyz, @zeros(), eltype = Float16)", call) + end; + @testset "multiple fields - multiple per type (custom allocator and eltype)" begin + call = @prettystring(1, @allocate(gridsize = nxyz, + fields = (Field => (F1, F2), + XField => X, + VectorField => (V1, V2, V3), + TensorField => T), + allocator = @rand, + eltype = Float32) ) + @test occursin("F1 = @Field(nxyz, @rand(), eltype = Float32)", call) + @test occursin("F2 = @Field(nxyz, @rand(), eltype = Float32)", call) + @test occursin("X = @XField(nxyz, @rand(), eltype = Float32)", call) + @test occursin("V1 = @VectorField(nxyz, @rand(), eltype = Float32)", call) + @test occursin("V2 = @VectorField(nxyz, @rand(), eltype = Float32)", call) + @test occursin("V3 = @VectorField(nxyz, @rand(), eltype = Float32)", call) + @test occursin("T = @TensorField(nxyz, @rand(), eltype = Float32)", call) + end; end; + @reset_parallel_kernel() end; - @reset_parallel_kernel() - end; + ))) @testset "7. Exceptions" begin @require !@is_initialized() @init_parallel_kernel(package = $package) From de32dfa7c5b22c7a3a6abfff155a5082a06f08e8 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Wed, 23 Oct 2024 19:38:47 +0200 Subject: [PATCH 14/29] make find a different modules compatible with padding --- src/FiniteDifferences.jl | 185 +++++++++++++++++++-------------------- 1 file changed, 92 insertions(+), 93 deletions(-) diff --git a/src/FiniteDifferences.jl b/src/FiniteDifferences.jl index a5266c98..b1a3a0b2 100644 --- a/src/FiniteDifferences.jl +++ b/src/FiniteDifferences.jl @@ -46,24 +46,24 @@ export @within @doc "`@minloc(A)`: Compute the minimum between 2nd order adjacent elements of `A`, using a moving window of size 3." :(@minloc) import ..ParallelStencil -import ..ParallelStencil: INDICES, WITHIN_DOC, @expandargs -const ix = INDICES[1] -const ixi = :($ix+1) +import ..ParallelStencil: INDICES, INDICES_INN, WITHIN_DOC, @expandargs +const ix = INDICES[1] +const ixi = INDICES_INN[1] -macro d(A) @expandargs(A); esc(:( $A[$ix+1] - $A[$ix] )) end +macro d(A) @expandargs(A); esc(:( $A[$ixi] - $A[$ixi-1] )) end macro d2(A) @expandargs(A); esc(:( ($A[$ixi+1] - $A[$ixi]) - ($A[$ixi] - $A[$ixi-1]) )) end macro all(A) @expandargs(A); esc(:( $A[$ix ] )) end macro inn(A) @expandargs(A); esc(:( $A[$ixi ] )) end -macro av(A) @expandargs(A); esc(:(($A[$ix] + $A[$ix+1] )*0.5 )) end -macro harm(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ix] + 1.0/$A[$ix+1])*2.0 )) end +macro av(A) @expandargs(A); esc(:(($A[$ixi-1] + $A[$ixi] )*0.5 )) end +macro harm(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ixi-1] + 1.0/$A[$ixi])*2.0 )) end macro maxloc(A) @expandargs(A); esc(:( max( max($A[$ixi-1], $A[$ixi+1]), $A[$ixi] ) )) end macro minloc(A) @expandargs(A); esc(:( min( min($A[$ixi-1], $A[$ixi+1]), $A[$ixi] ) )) end @doc WITHIN_DOC macro within(macroname::String, A) @expandargs(A) - if macroname == "@all" esc( :($ix<=size($A,1) ) ) - elseif macroname == "@inn" esc( :($ix<=size($A,1)-2) ) + if macroname == "@all" esc( :( $ix <= size($A,1)) ) + elseif macroname == "@inn" esc( :(1 < $ixi < size($A,1)) ) else error("unkown macroname: $macroname. If you want to add your own assignement macros, overwrite the macro 'within(macroname::String, A)'; to still use the exising macro within as well call ParallelStencil.FiniteDifferences{1|2|3}D.@within(macroname, A) at the end.") end end @@ -151,14 +151,14 @@ export @within @doc "`@minloc(A)`: Compute the minimum between 2nd order adjacent elements of `A`, using a moving window of size 3." :(@minloc) import ..ParallelStencil -import ..ParallelStencil: INDICES, WITHIN_DOC, @expandargs -ix, iy = INDICES[1], INDICES[2] -ixi, iyi = :($ix+1), :($iy+1) - -macro d_xa(A) @expandargs(A); esc(:( $A[$ix+1,$iy ] - $A[$ix ,$iy ] )) end -macro d_ya(A) @expandargs(A); esc(:( $A[$ix ,$iy+1] - $A[$ix ,$iy ] )) end -macro d_xi(A) @expandargs(A); esc(:( $A[$ix+1,$iyi ] - $A[$ix ,$iyi] )) end -macro d_yi(A) @expandargs(A); esc(:( $A[$ixi ,$iy+1] - $A[$ixi ,$iy ] )) end +import ..ParallelStencil: INDICES, INDICES_INN, WITHIN_DOC, @expandargs +ix, iy = INDICES[1], INDICES[2] +ixi, iyi = INDICES_INN[1], INDICES_INN[2] + +macro d_xa(A) @expandargs(A); esc(:( $A[$ixi,$iy ] - $A[$ixi-1,$iy ] )) end +macro d_ya(A) @expandargs(A); esc(:( $A[$ix ,$iyi] - $A[$ix ,$iyi-1] )) end +macro d_xi(A) @expandargs(A); esc(:( $A[$ixi,$iyi] - $A[$ixi-1,$iyi ] )) end +macro d_yi(A) @expandargs(A); esc(:( $A[$ixi,$iyi] - $A[$ixi ,$iyi-1] )) end macro d2_xa(A) @expandargs(A); esc(:( ($A[$ixi+1,$iy ] - $A[$ixi ,$iy ]) - ($A[$ixi ,$iy ] - $A[$ixi-1,$iy ]) )) end macro d2_ya(A) @expandargs(A); esc(:( ($A[$ix ,$iyi+1] - $A[$ix ,$iyi]) - ($A[$ix ,$iyi] - $A[$ix ,$iyi-1]) )) end macro d2_xi(A) @expandargs(A); esc(:( ($A[$ixi+1,$iyi ] - $A[$ixi ,$iyi]) - ($A[$ixi ,$iyi] - $A[$ixi-1,$iyi ]) )) end @@ -167,16 +167,16 @@ macro all(A) @expandargs(A); esc(:( $A[$ix ,$iy ] )) end macro inn(A) @expandargs(A); esc(:( $A[$ixi ,$iyi ] )) end macro inn_x(A) @expandargs(A); esc(:( $A[$ixi ,$iy ] )) end macro inn_y(A) @expandargs(A); esc(:( $A[$ix ,$iyi ] )) end -macro av(A) @expandargs(A); esc(:(($A[$ix ,$iy ] + $A[$ix+1,$iy ] + $A[$ix,$iy+1] + $A[$ix+1,$iy+1])*0.25 )) end -macro av_xa(A) @expandargs(A); esc(:(($A[$ix ,$iy ] + $A[$ix+1,$iy ] )*0.5 )) end -macro av_ya(A) @expandargs(A); esc(:(($A[$ix ,$iy ] + $A[$ix ,$iy+1] )*0.5 )) end -macro av_xi(A) @expandargs(A); esc(:(($A[$ix ,$iyi ] + $A[$ix+1,$iyi ] )*0.5 )) end -macro av_yi(A) @expandargs(A); esc(:(($A[$ixi ,$iy ] + $A[$ixi ,$iy+1] )*0.5 )) end -macro harm(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ix ,$iy ] + 1.0/$A[$ix+1,$iy ] + 1.0/$A[$ix,$iy+1] + 1.0/$A[$ix+1,$iy+1])*4.0 )) end -macro harm_xa(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ix ,$iy ] + 1.0/$A[$ix+1,$iy ] )*2.0 )) end -macro harm_ya(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ix ,$iy ] + 1.0/$A[$ix ,$iy+1] )*2.0 )) end -macro harm_xi(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ix ,$iyi ] + 1.0/$A[$ix+1,$iyi ] )*2.0 )) end -macro harm_yi(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ixi ,$iy ] + 1.0/$A[$ixi ,$iy+1] )*2.0 )) end +macro av(A) @expandargs(A); esc(:(($A[$ixi-1,$iyi-1] + $A[$ixi,$iyi-1] + $A[$ixi-1,$iyi] + $A[$ixi,$iyi])*0.25 )) end +macro av_xa(A) @expandargs(A); esc(:(($A[$ixi-1,$iy ] + $A[$ixi,$iy ] )*0.5 )) end +macro av_ya(A) @expandargs(A); esc(:(($A[$ix ,$iyi-1] + $A[$ix ,$iyi] )*0.5 )) end +macro av_xi(A) @expandargs(A); esc(:(($A[$ixi-1,$iyi ] + $A[$ixi,$iyi] )*0.5 )) end +macro av_yi(A) @expandargs(A); esc(:(($A[$ixi ,$iyi-1] + $A[$ixi,$iyi] )*0.5 )) end +macro harm(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ixi-1,$iyi-1] + 1.0/$A[$ixi,$iyi-1] + 1.0/$A[$ixi-1,$iyi] + 1.0/$A[$ixi,$iyi])*4.0 )) end +macro harm_xa(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ixi-1,$iy ] + 1.0/$A[$ixi,$iy ] )*2.0 )) end +macro harm_ya(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ix ,$iyi-1] + 1.0/$A[$ix ,$iyi] )*2.0 )) end +macro harm_xi(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ixi-1,$iyi ] + 1.0/$A[$ixi,$iyi] )*2.0 )) end +macro harm_yi(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ixi ,$iyi-1] + 1.0/$A[$ixi,$iyi] )*2.0 )) end macro maxloc(A) @expandargs(A); esc(:( max( max( max($A[$ixi-1,$iyi ], $A[$ixi+1,$iyi ]) , $A[$ixi ,$iyi ] ), max($A[$ixi ,$iyi-1], $A[$ixi ,$iyi+1]) ) )) end macro minloc(A) @expandargs(A); esc(:( min( min( min($A[$ixi-1,$iyi ], $A[$ixi+1,$iyi ]) , $A[$ixi ,$iyi ] ), @@ -185,10 +185,10 @@ macro minloc(A) @expandargs(A); esc(:( min( min( min($A[$ixi-1,$iyi ], $A[$ @doc WITHIN_DOC macro within(macroname::String, A) @expandargs(A) - if macroname == "@all" esc( :($ix<=size($A,1) && $iy<=size($A,2) ) ) - elseif macroname == "@inn" esc( :($ix<=size($A,1)-2 && $iy<=size($A,2)-2) ) - elseif macroname == "@inn_x" esc( :($ix<=size($A,1)-2 && $iy<=size($A,2) ) ) - elseif macroname == "@inn_y" esc( :($ix<=size($A,1) && $iy<=size($A,2)-2) ) + if macroname == "@all" esc( :( $ix<=size($A,1) && $iy<=size($A,2)) ) + elseif macroname == "@inn" esc( :(1<$ixi Date: Wed, 23 Oct 2024 19:40:47 +0200 Subject: [PATCH 15/29] add basic basic handling of padding in kernels --- src/ParallelKernel/shared.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ParallelKernel/shared.jl b/src/ParallelKernel/shared.jl index 8298c631..14f094b5 100644 --- a/src/ParallelKernel/shared.jl +++ b/src/ParallelKernel/shared.jl @@ -23,6 +23,7 @@ const NTHREADS_X_MAX = 32 const NTHREADS_X_MAX_AMDGPU = 64 const NTHREADS_MAX = 256 const INDICES = (gensym_world("ix", @__MODULE__), gensym_world("iy", @__MODULE__), gensym_world("iz", @__MODULE__)) +const INDICES_INN = (gensym_world("ixi", @__MODULE__), gensym_world("iyi", @__MODULE__), gensym_world("izi", @__MODULE__)) # ( :($(INDICES[1])+1), :($(INDICES[2])+1), :($(INDICES[3])+1) ) const RANGES_VARNAME = gensym_world("ranges", @__MODULE__) const RANGELENGTHS_VARNAMES = (gensym_world("rangelength_x", @__MODULE__), gensym_world("rangelength_y", @__MODULE__), gensym_world("rangelength_z", @__MODULE__)) const THREADIDS_VARNAMES = (gensym_world("tx", @__MODULE__), gensym_world("ty", @__MODULE__), gensym_world("tz", @__MODULE__)) From 070c523825a48940b67125af9befc56fa2bf31dd Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Wed, 23 Oct 2024 19:41:21 +0200 Subject: [PATCH 16/29] add basic basic handling of padding in kernels --- src/ParallelKernel/parallel.jl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/ParallelKernel/parallel.jl b/src/ParallelKernel/parallel.jl index 44dfd967..c5955f2a 100644 --- a/src/ParallelKernel/parallel.jl +++ b/src/ParallelKernel/parallel.jl @@ -172,17 +172,18 @@ function parallel_kernel(caller::Module, package::Symbol, numbertype::DataType, indices = extract_tuple(indices) body = get_body(kernel) body = remove_return(body) + body = macroexpand(caller, body) use_aliases = !all(indices .== INDICES[1:length(indices)]) if use_aliases # NOTE: we treat explicit parallel indices as aliases to the statically retrievable indices INDICES. indices_aliases = indices indices = [INDICES[1:length(indices)]...] - body = macroexpand(caller, body) for i=1:length(indices_aliases) body = substitute(body, indices_aliases[i], indices[i]) end end if isgpu(package) kernel = insert_device_types(caller, kernel) end kernel = adjust_signatures(kernel, package) + body = handle_padding(body, get_padding(caller)) # TODO: padding can later be made configurable per kernel (to enable working with arrays as before). body = handle_indices_and_literals(body, indices, package, numbertype) if (inbounds) body = add_inbounds(body) end body = add_return(body) @@ -363,6 +364,14 @@ function adjust_signatures(kernel::Expr, package::Symbol) return kernel end +function handle_padding(body::Expr, padding::Bool) + for i=1:length(INDICES_INN) + index_inn = (padding) ? INDICES[i] : :($(INDICES[i]) + 1) # NOTE: expression of ixi with ix, etc.: if padding is not used, they must be shifted by 1. + body = substitute(body, INDICES_INN[i], index_inn) + end + return body +end + function handle_indices_and_literals(body::Expr, indices::Array, package::Symbol, numbertype::DataType) int_type = kernel_int_type(package) ranges = [:($RANGES_VARNAME[1]), :($RANGES_VARNAME[2]), :($RANGES_VARNAME[3])] From 789c0877182e8e032a46673bd25c3e2e36e4d082 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Wed, 23 Oct 2024 19:41:56 +0200 Subject: [PATCH 17/29] add basic basic handling of padding in kernels --- src/parallel.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/parallel.jl b/src/parallel.jl index d29baa11..7e2bcaa5 100644 --- a/src/parallel.jl +++ b/src/parallel.jl @@ -261,6 +261,7 @@ function parallel_kernel(metadata_module::Module, metadata_function::Expr, calle is_parallel_kernel = true if (ndims < 1 || ndims > 3) @KeywordArgumentError("@parallel: keyword argument 'ndims' is invalid or missing (valid values are 1, 2 or 3; 'ndims' an be set globally in @init_parallel_stencil and overwritten per kernel if needed).") end inbounds = haskey(kwargs, :inbounds) ? kwargs.inbounds : get_inbounds(caller) + padding = get_padding(caller) # TODO: padding can later be made configurable per kernel (to enable working with arrays as before). memopt = haskey(kwargs, :memopt) ? kwargs.memopt : get_memopt(caller) indices = get_indices_expr(ndims).args body = get_body(kernel) @@ -271,10 +272,12 @@ function parallel_kernel(metadata_module::Module, metadata_function::Expr, calle onthefly_vars, onthefly_exprs, write_vars, body = extract_onthefly_arrays!(body, argvars) check_mask_macro(caller) body = apply_masks(body, indices) + body = macroexpand(caller, body) + body = handle_padding(body, padding) if length(onthefly_vars) > 0 - body = macroexpand(caller, body) onthefly_syms = gensym_world.(onthefly_vars, (@__MODULE__,)) onthefly_exprs = macroexpand.((caller,), onthefly_exprs) + onthefly_exprs = handle_padding.(onthefly_exprs, (padding,)) body = insert_onthefly!(body, onthefly_vars, onthefly_syms, indices) onthefly_exprs = insert_onthefly!.(onthefly_exprs, (onthefly_vars,), (onthefly_syms,), (indices,)) create_onthefly_macro.((caller,), onthefly_syms, onthefly_exprs, onthefly_vars, (indices,)) From 43ffd021fa9f8f65db9a4119b0d7d21ce016bcf1 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Wed, 23 Oct 2024 19:44:15 +0200 Subject: [PATCH 18/29] add basic basic handling of padding in kernels --- src/shared.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shared.jl b/src/shared.jl index a1faa66e..3d616756 100644 --- a/src/shared.jl +++ b/src/shared.jl @@ -1,7 +1,7 @@ import MacroTools: @capture, postwalk, splitdef, splitarg # NOTE: inexpr_walk used instead of MacroTools.inexpr -import .ParallelKernel: eval_arg, split_args, split_kwargs, extract_posargs_init, extract_kernel_args, insert_device_types, is_kernel, is_call, gensym_world, isgpu, iscpu, @isgpu, @iscpu, substitute, substitute_in_kernel, in_signature, inexpr_walk, adjust_signatures, handle_indices_and_literals, add_inbounds, cast, @ranges, @rangelengths, @return_value, @return_nothing -import .ParallelKernel: PKG_CUDA, PKG_AMDGPU, PKG_THREADS, PKG_POLYESTER, PKG_NONE, NUMBERTYPE_NONE, SUPPORTED_NUMBERTYPES, SUPPORTED_PACKAGES, ERRMSG_UNSUPPORTED_PACKAGE, INT_CUDA, INT_AMDGPU, INT_POLYESTER, INT_THREADS, INDICES, PKNumber, RANGES_VARNAME, RANGES_TYPE, RANGELENGTH_XYZ_TYPE, RANGELENGTHS_VARNAMES, THREADIDS_VARNAMES, GENSYM_SEPARATOR, AD_SUPPORTED_ANNOTATIONS -import .ParallelKernel: @require, @symbols, symbols, longnameof, @prettyexpand, @prettystring, prettystring, @gorgeousexpand, @gorgeousstring, gorgeousstring, @interpolate +import .ParallelKernel: eval_arg, split_args, split_kwargs, extract_posargs_init, extract_kernel_args, insert_device_types, is_kernel, is_call, gensym_world, isgpu, iscpu, @isgpu, @iscpu, substitute, substitute_in_kernel, in_signature, inexpr_walk, adjust_signatures, handle_indices_and_literals, add_inbounds, cast, @ranges, @rangelengths, @return_value, @return_nothing, handle_padding +import .ParallelKernel: PKG_CUDA, PKG_AMDGPU, PKG_THREADS, PKG_POLYESTER, PKG_NONE, NUMBERTYPE_NONE, SUPPORTED_NUMBERTYPES, SUPPORTED_PACKAGES, ERRMSG_UNSUPPORTED_PACKAGE, INT_CUDA, INT_AMDGPU, INT_POLYESTER, INT_THREADS, INDICES, INDICES_INN, PKNumber, RANGES_VARNAME, RANGES_TYPE, RANGELENGTH_XYZ_TYPE, RANGELENGTHS_VARNAMES, THREADIDS_VARNAMES, GENSYM_SEPARATOR, AD_SUPPORTED_ANNOTATIONS +import .ParallelKernel: @require, @symbols, symbols, longnameof, @prettyexpand, @prettystring, prettystring, @gorgeousexpand, @gorgeousstring, gorgeousstring, interpolate ## CONSTANTS From 633c7d2b138ad3b2906f2a56469d84747f6ae115 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Wed, 23 Oct 2024 19:45:03 +0200 Subject: [PATCH 19/29] fix computation order of average tests --- test/test_FiniteDifferences3D.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_FiniteDifferences3D.jl b/test/test_FiniteDifferences3D.jl index 056ffae0..9f4ce8e1 100644 --- a/test/test_FiniteDifferences3D.jl +++ b/test/test_FiniteDifferences3D.jl @@ -96,7 +96,7 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t @parallel av_xyi!(R, Axyzz) = (@all(R) = @av_xyi(Axyzz); return) @parallel av_xzi!(R, Axyyz) = (@all(R) = @av_xzi(Axyyz); return) @parallel av_yzi!(R, Axxyz) = (@all(R) = @av_yzi(Axxyz); return) - R.=0; @parallel av!(R, Axyz); @test all(Array(R .== (Axyz[1:end-1,1:end-1,1:end-1].+Axyz[2:end,1:end-1,1:end-1].+Axyz[2:end,2:end,1:end-1].+Axyz[2:end,2:end,2:end].+Axyz[1:end-1,2:end,2:end].+Axyz[1:end-1,1:end-1,2:end].+Axyz[2:end,1:end-1,2:end].+Axyz[1:end-1,2:end,1:end-1])*0.125)) + R.=0; @parallel av!(R, Axyz); @test all(Array(R .== (Axyz[1:end-1,1:end-1,1:end-1].+Axyz[2:end,1:end-1,1:end-1].+Axyz[1:end-1,2:end,1:end-1].+Axyz[2:end,2:end,1:end-1].+Axyz[1:end-1,1:end-1,2:end].+Axyz[2:end,1:end-1,2:end].+Axyz[1:end-1,2:end,2:end].+Axyz[2:end,2:end,2:end])*0.125)) R.=0; @parallel av_xa!(R, Ax); @test all(Array(R .== (Ax[2:end, :, :].+Ax[1:end-1, :, :]).*0.5)) R.=0; @parallel av_ya!(R, Ay); @test all(Array(R .== (Ay[ :,2:end, :].+Ay[ :,1:end-1, :]).*0.5)) R.=0; @parallel av_za!(R, Az); @test all(Array(R .== (Az[ :, :,2:end].+Az[ :, :,1:end-1]).*0.5)) @@ -124,7 +124,7 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t @parallel harm_xyi!(R, Axyzz) = (@all(R) = @harm_xyi(Axyzz); return) @parallel harm_xzi!(R, Axyyz) = (@all(R) = @harm_xzi(Axyyz); return) @parallel harm_yzi!(R, Axxyz) = (@all(R) = @harm_yzi(Axxyz); return) - R.=0; @parallel harm!(R, Axyz); @test all(Array(R .== 8 ./(1 ./Axyz[1:end-1,1:end-1,1:end-1].+1 ./Axyz[2:end,1:end-1,1:end-1].+1 ./Axyz[2:end,2:end,1:end-1].+1 ./Axyz[2:end,2:end,2:end].+1 ./Axyz[1:end-1,2:end,2:end].+1 ./Axyz[1:end-1,1:end-1,2:end].+1 ./Axyz[2:end,1:end-1,2:end].+1 ./Axyz[1:end-1,2:end,1:end-1]) )) + R.=0; @parallel harm!(R, Axyz); @test all(Array(R .== 8 ./(1 ./Axyz[1:end-1,1:end-1,1:end-1].+1 ./Axyz[2:end,1:end-1,1:end-1].+1 ./Axyz[1:end-1,2:end,1:end-1].+1 ./Axyz[2:end,2:end,1:end-1].+1 ./Axyz[1:end-1,1:end-1,2:end].+1 ./Axyz[2:end,1:end-1,2:end].+1 ./Axyz[1:end-1,2:end,2:end].+1 ./Axyz[2:end,2:end,2:end]) )) R.=0; @parallel harm_xa!(R, Ax); @test all(Array(R .== 2 ./(1 ./Ax[2:end, :, :].+1 ./Ax[1:end-1, :, :]))) R.=0; @parallel harm_ya!(R, Ay); @test all(Array(R .== 2 ./(1 ./Ay[ :,2:end, :].+1 ./Ay[ :,1:end-1, :]))) R.=0; @parallel harm_za!(R, Az); @test all(Array(R .== 2 ./(1 ./Az[ :, :,2:end].+1 ./Az[ :, :,1:end-1]))) From 763f215e011c4e31b5c5ce026d1cd7bd4d1b0635 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Wed, 23 Oct 2024 19:46:10 +0200 Subject: [PATCH 20/29] fix initialization unit tests --- test/test_init_parallel_stencil.jl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/test/test_init_parallel_stencil.jl b/test/test_init_parallel_stencil.jl index 5fdfc91c..d401e0bd 100644 --- a/test/test_init_parallel_stencil.jl +++ b/test/test_init_parallel_stencil.jl @@ -94,12 +94,13 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t set_inbounds(@__MODULE__, false) set_padding(@__MODULE__, false) @require is_initialized(@__MODULE__) - @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :Threads, Float64, 3, false, false) - @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float32, 3, false, false) - @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float64, 2, false, false) - @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float64, 3, true, false) - @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float64, 3, false, true) - @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :AMDGPU, Float16, 1, true, true) + @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :Threads, Float64, 3, false, false, false) + @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float32, 3, false, false, false) + @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float64, 2, false, false, false) + @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float64, 3, true, false, false) + @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float64, 3, false, true, false) + @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float64, 3, false, false, true) + @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :AMDGPU, Float16, 1, true, false, true) set_initialized(@__MODULE__, false) set_package(@__MODULE__, PKG_NONE) set_numbertype(@__MODULE__, NUMBERTYPE_NONE) From bb576e77f9e698d7bd90ef3c738f417852db908c Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Wed, 30 Oct 2024 18:41:47 +0100 Subject: [PATCH 21/29] update parallel unit tests --- test/test_parallel.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_parallel.jl b/test/test_parallel.jl index 5809cc15..1d7fae99 100644 --- a/test/test_parallel.jl +++ b/test/test_parallel.jl @@ -857,8 +857,8 @@ import ParallelStencil.@gorgeousexpand end; @testset "apply masks" begin expansion = @prettystring(1, @parallel sum!(A, B) = (@all(A) = @all(A) + @all(B); return)) - @test occursin("if @within(\"@all\", A)", expansion) @test @prettystring(@within("@all", A)) == string(:($ix <= size(A, 1) && ($iy <= size(A, 2) && $iz <= size(A, 3)))) + @test occursin("if $(@prettystring(@within("@all", A)))", expansion) end; @reset_parallel_stencil() end; From 49bab6a441ea6d4d3e9d9998c9a959a24b65ec82 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Wed, 30 Oct 2024 18:45:22 +0100 Subject: [PATCH 22/29] at first and last index usage in FiniteDifferences --- src/FiniteDifferences.jl | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/FiniteDifferences.jl b/src/FiniteDifferences.jl index b1a3a0b2..352073f4 100644 --- a/src/FiniteDifferences.jl +++ b/src/FiniteDifferences.jl @@ -62,8 +62,8 @@ macro minloc(A) @expandargs(A); esc(:( min( min($A[$ixi-1], $A[$ixi+1]), $A[$i @doc WITHIN_DOC macro within(macroname::String, A) @expandargs(A) - if macroname == "@all" esc( :( $ix <= size($A,1)) ) - elseif macroname == "@inn" esc( :(1 < $ixi < size($A,1)) ) + if macroname == "@all" esc( :( $ix <= lastindex($A,1)) ) + elseif macroname == "@inn" esc( :(firstindex($A,1) < $ixi < lastindex($A,1)) ) else error("unkown macroname: $macroname. If you want to add your own assignement macros, overwrite the macro 'within(macroname::String, A)'; to still use the exising macro within as well call ParallelStencil.FiniteDifferences{1|2|3}D.@within(macroname, A) at the end.") end end @@ -185,10 +185,10 @@ macro minloc(A) @expandargs(A); esc(:( min( min( min($A[$ixi-1,$iyi ], $A[$ @doc WITHIN_DOC macro within(macroname::String, A) @expandargs(A) - if macroname == "@all" esc( :( $ix<=size($A,1) && $iy<=size($A,2)) ) - elseif macroname == "@inn" esc( :(1<$ixi Date: Wed, 30 Oct 2024 18:48:10 +0100 Subject: [PATCH 23/29] add macros for first and last index --- src/ParallelKernel/kernel_language.jl | 38 ++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/src/ParallelKernel/kernel_language.jl b/src/ParallelKernel/kernel_language.jl index a714a95a..afa8add3 100644 --- a/src/ParallelKernel/kernel_language.jl +++ b/src/ParallelKernel/kernel_language.jl @@ -89,12 +89,6 @@ Call a macro analogue to `Base.@println`, compatible with the package for parall macro pk_println(args...) check_initialized(__module__); esc(pk_println(__module__, args...)); end -## INTERNAL MACROS - -## -macro threads(args...) check_initialized(__module__); esc(threads(__module__, args...)); end - - ## const FORALL_DOC = """ @∀ x ∈ X statement @@ -139,6 +133,20 @@ Expand the `statement` for all `x` in `X`. macro ∀(args...) check_initialized(__module__); checkforallargs(args...); esc(∀(__module__, args...)); end +## INTERNAL MACROS + +## +macro threads(args...) check_initialized(__module__); esc(threads(__module__, args...)); end + + +## +macro firstindex(args...) check_initialized(__module__); checkargs_begin_end(args...); esc(_firstindex(__module__, args...)); end + + +## +macro lastindex(args...) check_initialized(__module__); checkargs_begin_end(args...); esc(_lastindex(__module__, args...)); end + + ## macro return_value(args...) check_initialized(__module__); checksinglearg(args...); esc(return_value(args...)); end @@ -166,6 +174,10 @@ function checkforallargs(args...) if !((args[1].head == :call && args[1].args[1] in [:∈, :in]) || args[1].head == :(=)) @ArgumentError("the first argument must be of the form `x ∈ X, `x in X` or `x = X`.") end end +function checkargs_begin_end(args...) + if !(2 <= length(args) <= 3) @ArgumentError("wrong number of arguments.") end +end + ## FUNCTIONS FOR INDEXING AND DIMENSIONS @@ -289,6 +301,20 @@ function threads(caller::Module, args...; package::Symbol=get_package(caller)) end end +function _firstindex(caller::Module, A::Expr, dim::Expr, padding::Union{Bool, Symbol, Expr}=false) + padding = eval_arg(caller, padding) + if (padding) return :($A.indices[$dim][1]) + else return :(1) + end +end + +function _lastindex(caller::Module, A::Expr, dim::Expr, padding::Union{Bool, Symbol, Expr}=false) + padding = eval_arg(caller, padding) + if (padding) return :($A.indices[$dim][end]) + else return :(size($A, $dim)) + end +end + ## CPU TARGET IMPLEMENTATIONS From b9e71be9e2cdd3b8d1b818be1f5b779578ad6054 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Wed, 30 Oct 2024 18:51:40 +0100 Subject: [PATCH 24/29] generalize find_vars and introduce is_access --- src/ParallelKernel/shared.jl | 27 +++++++++++++++++++++++++++ src/kernel_language.jl | 23 +---------------------- src/shared.jl | 12 ++++-------- 3 files changed, 32 insertions(+), 30 deletions(-) diff --git a/src/ParallelKernel/shared.jl b/src/ParallelKernel/shared.jl index 14f094b5..216078da 100644 --- a/src/ParallelKernel/shared.jl +++ b/src/ParallelKernel/shared.jl @@ -238,6 +238,33 @@ function insert_device_types(caller::Module, kernel::Expr) return kernel end +function find_vars(body::Expr, indices::NTuple{N,<:Union{Symbol,Expr}} where N; readonly=false) + vars = Dict() + writevars = Dict() + postwalk(body) do ex + if is_access(ex, indices...) + @capture(ex, A_[indices_expr__]) || @ModuleInternalError("a indices array access could not be pattern matched.") + if haskey(vars, A) vars[A] += 1 + else vars[A] = 1 + end + end + if @capture(ex, (A_[indices_expr__] = rhs_) | (A_[indices_expr__] .= rhs_)) && is_access(:($A[$(indices_expr...)]), indices...) + if haskey(writevars, A) writevars[A] += 1 + else writevars[A] = 1 + end + end + return ex + end + if (readonly) return Dict(A => count for (A, count) in vars if A ∉ keys(writevars)) + else return vars + end +end + +is_access(ex::Expr, ix::Symbol, iy::Symbol, iz::Symbol) = @capture(ex, A_[x_, y_, z_]) && inexpr_walk(x, ix) && inexpr_walk(y, iy) && inexpr_walk(z, iz) +is_access(ex::Expr, ix::Symbol, iy::Symbol) = @capture(ex, A_[x_, y_]) && inexpr_walk(x, ix) && inexpr_walk(y, iy) +is_access(ex::Expr, ix::Symbol) = @capture(ex, A_[x_]) && inexpr_walk(x, ix) +is_access(ex, indices...) = false + ## FUNCTIONS TO DEAL WITH KERNEL/MACRO CALLS: CHECK IF DEFINITION/CALL, EXTRACT, SPLIT AND EVALUATE ARGUMENTS diff --git a/src/kernel_language.jl b/src/kernel_language.jl index 92d59e7a..5cb7f906 100644 --- a/src/kernel_language.jl +++ b/src/kernel_language.jl @@ -59,7 +59,7 @@ function memopt(metadata_module::Module, is_parallel_kernel::Bool, caller::Modul indices = Tuple(extract_tuple(indices)) use_shmemhalos = isnothing(use_shmemhalos) ? use_shmemhalos : eval_arg(caller, use_shmemhalos) optranges = isnothing(optranges) ? optranges : eval_arg(caller, optranges) - readonlyvars = find_readonlyvars(body, indices) + readonlyvars = find_vars(body, indices; readonly=true) if length(indices) != 3 @IncoherentArgumentError("incoherent arguments memopt in @parallel[_indices] : optimization can only be applied in 3-D @parallel kernels and @parallel_indices kernels with three indices.") end if optvars == (Symbol(""),) optvars = Tuple(keys(readonlyvars)) @@ -488,27 +488,6 @@ end ## HELPER FUNCTIONS -function find_readonlyvars(body::Expr, indices::NTuple{N,<:Union{Symbol,Expr}} where N) - vars = Dict() - writevars = Dict() - postwalk(body) do ex - if is_stencil_access(ex, indices...) - @capture(ex, A_[indices_expr__]) || @ModuleInternalError("a stencil access could not be pattern matched.") - if haskey(vars, A) vars[A] += 1 - else vars[A] = 1 - end - end - if @capture(ex, (A_[indices_expr__] = rhs_) | (A_[indices_expr__] .= rhs_)) && is_stencil_access(:($A[$(indices_expr...)]), indices...) - if haskey(writevars, A) writevars[A] += 1 - else writevars[A] = 1 - end - end - return ex - end - readonlyvars = Dict(A => count for (A, count) in vars if A ∉ keys(writevars)) - return readonlyvars -end - function eval_offsets(caller::Module, body::Expr, indices::NTuple{N,<:Union{Symbol,Expr}} where N, int_type::Type{<:Integer}) return postwalk(body) do ex if !is_stencil_access(ex, indices...) return ex; end diff --git a/src/shared.jl b/src/shared.jl index 3d616756..b9552800 100644 --- a/src/shared.jl +++ b/src/shared.jl @@ -1,5 +1,5 @@ import MacroTools: @capture, postwalk, splitdef, splitarg # NOTE: inexpr_walk used instead of MacroTools.inexpr -import .ParallelKernel: eval_arg, split_args, split_kwargs, extract_posargs_init, extract_kernel_args, insert_device_types, is_kernel, is_call, gensym_world, isgpu, iscpu, @isgpu, @iscpu, substitute, substitute_in_kernel, in_signature, inexpr_walk, adjust_signatures, handle_indices_and_literals, add_inbounds, cast, @ranges, @rangelengths, @return_value, @return_nothing, handle_padding +import .ParallelKernel: eval_arg, split_args, split_kwargs, extract_posargs_init, extract_kernel_args, insert_device_types, is_kernel, is_call, gensym_world, isgpu, iscpu, @isgpu, @iscpu, substitute, substitute_in_kernel, in_signature, inexpr_walk, adjust_signatures, handle_indices_and_literals, add_inbounds, cast, @ranges, @rangelengths, @return_value, @return_nothing, @firstindex, @lastindex, is_access, find_vars, handle_padding import .ParallelKernel: PKG_CUDA, PKG_AMDGPU, PKG_THREADS, PKG_POLYESTER, PKG_NONE, NUMBERTYPE_NONE, SUPPORTED_NUMBERTYPES, SUPPORTED_PACKAGES, ERRMSG_UNSUPPORTED_PACKAGE, INT_CUDA, INT_AMDGPU, INT_POLYESTER, INT_THREADS, INDICES, INDICES_INN, PKNumber, RANGES_VARNAME, RANGES_TYPE, RANGELENGTH_XYZ_TYPE, RANGELENGTHS_VARNAMES, THREADIDS_VARNAMES, GENSYM_SEPARATOR, AD_SUPPORTED_ANNOTATIONS import .ParallelKernel: @require, @symbols, symbols, longnameof, @prettyexpand, @prettystring, prettystring, @gorgeousexpand, @gorgeousstring, gorgeousstring, interpolate @@ -36,8 +36,9 @@ const META_FUNCTION_PREFIX = string(gensym_world("META", @__MODULE__)) ## FUNCTIONS TO DEAL WITH KERNEL DEFINITIONS -get_statements(body::Expr) = (body.head == :block) ? body.args : [body] -is_array_assignment(statement) = isa(statement, Expr) && (statement.head == :(=)) && isa(statement.args[1], Expr) && (statement.args[1].head == :macrocall) +get_statements(body::Expr) = (body.head == :block) ? body.args : [body] +is_array_assignment(statement) = isa(statement, Expr) && (statement.head == :(=)) && isa(statement.args[1], Expr) && (statement.args[1].head == :macrocall) +is_stencil_access(ex, indices...) = is_access(ex, indices...) function validate_body(body::Expr) statements = get_statements(body) @@ -47,11 +48,6 @@ function validate_body(body::Expr) end end -is_stencil_access(ex::Expr, ix::Symbol, iy::Symbol, iz::Symbol) = @capture(ex, A_[x_, y_, z_]) && inexpr_walk(x, ix) && inexpr_walk(y, iy) && inexpr_walk(z, iz) -is_stencil_access(ex::Expr, ix::Symbol, iy::Symbol) = @capture(ex, A_[x_, y_]) && inexpr_walk(x, ix) && inexpr_walk(y, iy) -is_stencil_access(ex::Expr, ix::Symbol) = @capture(ex, A_[x_]) && inexpr_walk(x, ix) -is_stencil_access(ex, indices...) = false - function substitute(expr::Expr, A, m, indices::NTuple{N,<:Union{Symbol,Expr}} where N) return postwalk(expr) do ex if is_stencil_access(ex, indices...) From b129021e5c2e9fc74db43e2cf8503f776fdfd277 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Wed, 30 Oct 2024 18:52:50 +0100 Subject: [PATCH 25/29] add remaining handling of padding --- src/ParallelKernel/parallel.jl | 37 +++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/src/ParallelKernel/parallel.jl b/src/ParallelKernel/parallel.jl index c5955f2a..a7383388 100644 --- a/src/ParallelKernel/parallel.jl +++ b/src/ParallelKernel/parallel.jl @@ -170,6 +170,7 @@ end function parallel_kernel(caller::Module, package::Symbol, numbertype::DataType, inbounds::Bool, indices::Union{Symbol,Expr}, kernel::Expr) if (!isa(indices,Symbol) && !isa(indices.head,Symbol)) @ArgumentError("@parallel_indices: argument 'indices' must be a tuple of indices or a single index (e.g. (ix, iy, iz) or (ix, iy) or ix ).") end indices = extract_tuple(indices) + padding = get_padding(caller) body = get_body(kernel) body = remove_return(body) body = macroexpand(caller, body) @@ -183,7 +184,7 @@ function parallel_kernel(caller::Module, package::Symbol, numbertype::DataType, end if isgpu(package) kernel = insert_device_types(caller, kernel) end kernel = adjust_signatures(kernel, package) - body = handle_padding(body, get_padding(caller)) # TODO: padding can later be made configurable per kernel (to enable working with arrays as before). + body = handle_padding(body, padding) # TODO: padding can later be made configurable per kernel (to enable working with arrays as before). body = handle_indices_and_literals(body, indices, package, numbertype) if (inbounds) body = add_inbounds(body) end body = add_return(body) @@ -365,6 +366,15 @@ function adjust_signatures(kernel::Expr, package::Symbol) end function handle_padding(body::Expr, padding::Bool) + body = substitute_indices_inn(body, padding) + if padding + body = substitute_firstlastindex(body) + body = substitute_view_accesses(body, INDICES) + end + return body +end + +function substitute_indices_inn(body::Expr, padding::Bool) for i=1:length(INDICES_INN) index_inn = (padding) ? INDICES[i] : :($(INDICES[i]) + 1) # NOTE: expression of ixi with ix, etc.: if padding is not used, they must be shifted by 1. body = substitute(body, INDICES_INN[i], index_inn) @@ -372,6 +382,31 @@ function handle_padding(body::Expr, padding::Bool) return body end +function substitute_firstlastindex(body::Expr) + padding = true + return postwalk(body) do ex + if @capture(ex, f_(args__)) + if (f == :firstindex) return :(ParallelStencil.ParallelKernel.@firstindex($(args...), $padding)) + elseif (f == :lastindex) return :(ParallelStencil.ParallelKernel.@lastindex($(args...), $padding)) + else return ex + end + else + return ex + end + end +end + +function substitute_view_accesses(expr::Expr, indices::NTuple{N,<:Union{Symbol,Expr}} where N) + return postwalk(expr) do ex + if is_access(ex, indices...) + @capture(ex, A_[indices_expr__]) || @ModuleInternalError("a stencil access could not be pattern matched.") + return :($A.parent[$(indices_expr...)]) + else + return ex + end + end +end + function handle_indices_and_literals(body::Expr, indices::Array, package::Symbol, numbertype::DataType) int_type = kernel_int_type(package) ranges = [:($RANGES_VARNAME[1]), :($RANGES_VARNAME[2]), :($RANGES_VARNAME[3])] From 8a9f4389750040af13539cf6dbb72aec8f1d0cd9 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Wed, 30 Oct 2024 19:23:54 +0100 Subject: [PATCH 26/29] use lastindex in masks test --- test/test_parallel.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_parallel.jl b/test/test_parallel.jl index 1d7fae99..3024033c 100644 --- a/test/test_parallel.jl +++ b/test/test_parallel.jl @@ -857,7 +857,7 @@ import ParallelStencil.@gorgeousexpand end; @testset "apply masks" begin expansion = @prettystring(1, @parallel sum!(A, B) = (@all(A) = @all(A) + @all(B); return)) - @test @prettystring(@within("@all", A)) == string(:($ix <= size(A, 1) && ($iy <= size(A, 2) && $iz <= size(A, 3)))) + @test @prettystring(@within("@all", A)) == string(:($ix <= lastindex(A, 1) && ($iy <= lastindex(A, 2) && $iz <= lastindex(A, 3)))) @test occursin("if $(@prettystring(@within("@all", A)))", expansion) end; @reset_parallel_stencil() From efa42c0e29b22dfad0a516c74a6255e702f844dd Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Thu, 31 Oct 2024 10:24:29 +0100 Subject: [PATCH 27/29] handel inverses --- src/ParallelKernel/parallel.jl | 13 ++++++++++++- src/parallel.jl | 1 + src/shared.jl | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/ParallelKernel/parallel.jl b/src/ParallelKernel/parallel.jl index 28a5af08..81b1e9c2 100644 --- a/src/ParallelKernel/parallel.jl +++ b/src/ParallelKernel/parallel.jl @@ -190,6 +190,7 @@ function parallel_kernel(caller::Module, package::Symbol, numbertype::DataType, if isgpu(package) kernel = insert_device_types(caller, kernel) end kernel = adjust_signatures(kernel, package) body = handle_padding(body, padding) # TODO: padding can later be made configurable per kernel (to enable working with arrays as before). + body = handle_inverses(body) body = handle_indices_and_literals(body, indices, package, numbertype) if (inbounds) body = add_inbounds(body) end body = add_return(body) @@ -361,7 +362,7 @@ function literaltypes(type1::DataType, type2::DataType, expr::Expr) end -## FUNCTIONS TO HANDLE SIGNATURES AND INDICES +## FUNCTIONS TO HANDLE SIGNATURES, INDICES, INVERSES AND PADDING function adjust_signatures(kernel::Expr, package::Symbol) int_type = kernel_int_type(package) @@ -372,6 +373,16 @@ function adjust_signatures(kernel::Expr, package::Symbol) return kernel end +function handle_inverses(body::Expr) + return postwalk(body) do ex + if @capture(ex, (1 | 1.0 | 1.0f0) / x_) + return :(inv($x)) + else + return ex + end + end +end + function handle_padding(body::Expr, padding::Bool) body = substitute_indices_inn(body, padding) if padding diff --git a/src/parallel.jl b/src/parallel.jl index 70f294da..85f9fe92 100644 --- a/src/parallel.jl +++ b/src/parallel.jl @@ -288,6 +288,7 @@ function parallel_kernel(metadata_module::Module, metadata_function::Expr, calle if isgpu(package) kernel = insert_device_types(caller, kernel) end if !memopt kernel = adjust_signatures(kernel, package) + body = handle_inverses(body) body = handle_indices_and_literals(body, indices, package, numbertype) if (inbounds) body = add_inbounds(body) end end diff --git a/src/shared.jl b/src/shared.jl index 09c4cd1e..0b7d7ca8 100644 --- a/src/shared.jl +++ b/src/shared.jl @@ -1,5 +1,5 @@ import MacroTools: @capture, postwalk, splitdef, splitarg # NOTE: inexpr_walk used instead of MacroTools.inexpr -import .ParallelKernel: eval_arg, split_args, split_kwargs, extract_posargs_init, extract_kernel_args, insert_device_types, is_kernel, is_call, gensym_world, isgpu, iscpu, @isgpu, @iscpu, substitute, substitute_in_kernel, in_signature, inexpr_walk, adjust_signatures, handle_indices_and_literals, add_inbounds, cast, @ranges, @rangelengths, @return_value, @return_nothing, @firstindex, @lastindex, is_access, find_vars, handle_padding +import .ParallelKernel: eval_arg, split_args, split_kwargs, extract_posargs_init, extract_kernel_args, insert_device_types, is_kernel, is_call, gensym_world, isgpu, iscpu, @isgpu, @iscpu, substitute, substitute_in_kernel, in_signature, inexpr_walk, adjust_signatures, handle_indices_and_literals, add_inbounds, cast, @ranges, @rangelengths, @return_value, @return_nothing, @firstindex, @lastindex, is_access, find_vars, handle_padding, handle_inverses import .ParallelKernel: PKG_CUDA, PKG_AMDGPU, PKG_METAL, PKG_THREADS, PKG_POLYESTER, PKG_NONE, NUMBERTYPE_NONE, SUPPORTED_NUMBERTYPES, SUPPORTED_PACKAGES, ERRMSG_UNSUPPORTED_PACKAGE, INT_CUDA, INT_AMDGPU, INT_METAL, INT_POLYESTER, INT_THREADS, INDICES, INDICES_INN, PKNumber, RANGES_VARNAME, RANGES_TYPE, RANGELENGTH_XYZ_TYPE, RANGELENGTHS_VARNAMES, THREADIDS_VARNAMES, GENSYM_SEPARATOR, AD_SUPPORTED_ANNOTATIONS import .ParallelKernel: @require, @symbols, symbols, longnameof, @prettyexpand, @prettystring, prettystring, @gorgeousexpand, @gorgeousstring, gorgeousstring, interpolate From 37047fdc87d73e74a3c9cf28491bf23fd58c5836 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Thu, 31 Oct 2024 13:12:21 +0100 Subject: [PATCH 28/29] fix operator error --- test/test_FiniteDifferences3D.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/test_FiniteDifferences3D.jl b/test/test_FiniteDifferences3D.jl index 0eb25909..c3e261af 100644 --- a/test/test_FiniteDifferences3D.jl +++ b/test/test_FiniteDifferences3D.jl @@ -113,12 +113,12 @@ eval(:( @parallel av_xzi!(R, Axyyz) = (@all(R) = @av_xzi(Axyyz); return) @parallel av_yzi!(R, Axxyz) = (@all(R) = @av_yzi(Axxyz); return) R.=0; @parallel av!(R, Axyz); @test all(Array(R .== (Axyz[1:end-1,1:end-1,1:end-1].+Axyz[2:end,1:end-1,1:end-1].+Axyz[1:end-1,2:end,1:end-1].+Axyz[2:end,2:end,1:end-1].+Axyz[1:end-1,1:end-1,2:end].+Axyz[2:end,1:end-1,2:end].+Axyz[1:end-1,2:end,2:end].+Axyz[2:end,2:end,2:end])*0.125)) - R.=0; @parallel av_xa!(R, Ax); @test all(Array(R .== (Ax[2:end, :, :].+Ax[1:end-1, :, :])..*$precision(0.5))) - R.=0; @parallel av_ya!(R, Ay); @test all(Array(R .== (Ay[ :,2:end, :].+Ay[ :,1:end-1, :])..*$precision(0.5))) - R.=0; @parallel av_za!(R, Az); @test all(Array(R .== (Az[ :, :,2:end].+Az[ :, :,1:end-1])..*$precision(0.5))) - R.=0; @parallel av_xi!(R, Axyyzz); @test all(Array(R .== (Axyyzz[2:end ,2:end-1,2:end-1].+Axyyzz[1:end-1,2:end-1,2:end-1])..*$precision(0.5))) - R.=0; @parallel av_yi!(R, Axxyzz); @test all(Array(R .== (Axxyzz[2:end-1,2:end ,2:end-1].+Axxyzz[2:end-1,1:end-1,2:end-1])..*$precision(0.5))) - R.=0; @parallel av_zi!(R, Axxyyz); @test all(Array(R .== (Axxyyz[2:end-1,2:end-1,2:end ].+Axxyyz[2:end-1,2:end-1,1:end-1])..*$precision(0.5))) + R.=0; @parallel av_xa!(R, Ax); @test all(Array(R .== (Ax[2:end, :, :].+Ax[1:end-1, :, :]).*$precision(0.5))) + R.=0; @parallel av_ya!(R, Ay); @test all(Array(R .== (Ay[ :,2:end, :].+Ay[ :,1:end-1, :]).*$precision(0.5))) + R.=0; @parallel av_za!(R, Az); @test all(Array(R .== (Az[ :, :,2:end].+Az[ :, :,1:end-1]).*$precision(0.5))) + R.=0; @parallel av_xi!(R, Axyyzz); @test all(Array(R .== (Axyyzz[2:end ,2:end-1,2:end-1].+Axyyzz[1:end-1,2:end-1,2:end-1]).*$precision(0.5))) + R.=0; @parallel av_yi!(R, Axxyzz); @test all(Array(R .== (Axxyzz[2:end-1,2:end ,2:end-1].+Axxyzz[2:end-1,1:end-1,2:end-1]).*$precision(0.5))) + R.=0; @parallel av_zi!(R, Axxyyz); @test all(Array(R .== (Axxyyz[2:end-1,2:end-1,2:end ].+Axxyyz[2:end-1,2:end-1,1:end-1]).*$precision(0.5))) R.=0; @parallel av_xya!(R, Axy); @test all(Array(R .== (Axy[1:end-1,1:end-1,:].+Axy[2:end,1:end-1,:].+Axy[1:end-1,2:end,:].+Axy[2:end,2:end,:]).*$precision(0.25))) R.=0; @parallel av_xza!(R, Axz); @test all(Array(R .== (Axz[1:end-1,:,1:end-1].+Axz[2:end,:,1:end-1].+Axz[1:end-1,:,2:end].+Axz[2:end,:,2:end]).*$precision(0.25))) R.=0; @parallel av_yza!(R, Ayz); @test all(Array(R .== (Ayz[:,1:end-1,1:end-1].+Ayz[:,2:end,1:end-1].+Ayz[:,1:end-1,2:end].+Ayz[:,2:end,2:end]).*$precision(0.25))) From 3b2112d9feed6a49d0c2bbfb123061983ea516f2 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Thu, 31 Oct 2024 16:19:23 +0100 Subject: [PATCH 29/29] remove loop on precision in unit tests --- test/ParallelKernel/test_allocators.jl | 1 + .../ParallelKernel/test_hide_communication.jl | 13 +++-- .../test_init_parallel_kernel.jl | 1 + test/ParallelKernel/test_kernel_language.jl | 23 ++++----- test/ParallelKernel/test_parallel.jl | 19 ++++--- .../test_reset_parallel_kernel.jl | 1 + test/test_FiniteDifferences1D.jl | 12 ++--- test/test_FiniteDifferences2D.jl | 21 ++++---- test/test_FiniteDifferences3D.jl | 35 +++++++------ test/test_extensions.jl | 1 + test/test_incremental_compilation.jl | 1 + test/test_init_parallel_stencil.jl | 1 + test/test_parallel.jl | 51 +++++++++---------- test/test_reset_parallel_stencil.jl | 1 + 14 files changed, 90 insertions(+), 91 deletions(-) diff --git a/test/ParallelKernel/test_allocators.jl b/test/ParallelKernel/test_allocators.jl index 6c174492..6f4f1247 100644 --- a/test/ParallelKernel/test_allocators.jl +++ b/test/ParallelKernel/test_allocators.jl @@ -30,6 +30,7 @@ end Base.retry_load_extensions() # Potentially needed to load the extensions after the packages have been filtered. const DATA_INDEX = ParallelStencil.INT_THREADS # TODO: using Data.Index does not work in combination with @reset_parallel_kernel, because the macros from module Test alternate the order of evaluation, resulting in the Data module being replaced with an empty module before Data.Index is evaluated. If at some point the indexing varies depending on the used package, then something more sophisticated is needed here (e.g., wrapping the test for each package in a module and using then Data.Index everywhere). + @static for package in TEST_PACKAGES eval(:( diff --git a/test/ParallelKernel/test_hide_communication.jl b/test/ParallelKernel/test_hide_communication.jl index 48171b19..696ace8f 100644 --- a/test/ParallelKernel/test_hide_communication.jl +++ b/test/ParallelKernel/test_hide_communication.jl @@ -27,16 +27,15 @@ end end Base.retry_load_extensions() # Potentially needed to load the extensions after the packages have been filtered. -const TEST_PRECISIONS = [Float32, Float64] + @static for package in TEST_PACKAGES -for precision in TEST_PRECISIONS -(package == PKG_METAL && precision == Float64) ? continue : nothing # Metal does not support Float64 + FloatDefault = (package == PKG_METAL) ? Float32 : Float64 # Metal does not support Float64 eval(:( - @testset "$(basename(@__FILE__)) (package: $(nameof($package))) (precision: $(nameof($precision)))" begin + @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin @testset "1. hide_communication macro" begin @require !@is_initialized() - @init_parallel_kernel($package, $precision) + @init_parallel_kernel($package, $FloatDefault) @require @is_initialized() @testset "@hide_communication boundary_width block (macro expansion)" begin @static if @isgpu($package) @@ -180,7 +179,7 @@ eval(:( end; @testset "2. Exceptions" begin @require !@is_initialized() - @init_parallel_kernel($package, $precision) + @init_parallel_kernel($package, $FloatDefault) @require @is_initialized @testset "arguments @hide_communication" begin @test_throws ArgumentError checkargs_hide_communication(:boundary_width, :block) # Error: the last argument must be a code block. @@ -222,4 +221,4 @@ eval(:( end; )) -end end == nothing || true; +end == nothing || true; diff --git a/test/ParallelKernel/test_init_parallel_kernel.jl b/test/ParallelKernel/test_init_parallel_kernel.jl index a846ebe7..852b039b 100644 --- a/test/ParallelKernel/test_init_parallel_kernel.jl +++ b/test/ParallelKernel/test_init_parallel_kernel.jl @@ -27,6 +27,7 @@ end end Base.retry_load_extensions() # Potentially needed to load the extensions after the packages have been filtered. + @static for package in TEST_PACKAGES eval(:( @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin @testset "1. initialization of ParallelKernel" begin diff --git a/test/ParallelKernel/test_kernel_language.jl b/test/ParallelKernel/test_kernel_language.jl index fe4ffd76..761620b4 100644 --- a/test/ParallelKernel/test_kernel_language.jl +++ b/test/ParallelKernel/test_kernel_language.jl @@ -23,16 +23,15 @@ end end Base.retry_load_extensions() # Potentially needed to load the extensions after the packages have been filtered. -const TEST_PRECISIONS = [Float32, Float64] + @static for package in TEST_PACKAGES -for precision in TEST_PRECISIONS -(package == PKG_METAL && precision == Float64) ? continue : nothing # Metal does not support Float64 + FloatDefault = (package == PKG_METAL) ? Float32 : Float64 # Metal does not support Float64 eval(:( - @testset "$(basename(@__FILE__)) (package: $(nameof($package))) (precision: $(nameof($precision)))" begin + @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin @testset "1. kernel language macros" begin @require !@is_initialized() - @init_parallel_kernel($package, $precision) + @init_parallel_kernel($package, $FloatDefault) @require @is_initialized() @testset "mapping to package" begin if $package == $PKG_CUDA @@ -41,7 +40,7 @@ eval(:( @test @prettystring(1, @blockDim()) == "CUDA.blockDim()" @test @prettystring(1, @threadIdx()) == "CUDA.threadIdx()" @test @prettystring(1, @sync_threads()) == "CUDA.sync_threads()" - @test @prettystring(1, @sharedMem($precision, (2,3))) == "CUDA.@cuDynamicSharedMem $(nameof($precision)) (2, 3)" + @test @prettystring(1, @sharedMem($FloatDefault, (2,3))) == "CUDA.@cuDynamicSharedMem $(nameof($FloatDefault)) (2, 3)" # @test @prettystring(1, @pk_show()) == "CUDA.@cushow" # @test @prettystring(1, @pk_println()) == "CUDA.@cuprintln" elseif $package == $AMDGPU @@ -50,7 +49,7 @@ eval(:( @test @prettystring(1, @blockDim()) == "AMDGPU.workgroupDim()" @test @prettystring(1, @threadIdx()) == "AMDGPU.workitemIdx()" @test @prettystring(1, @sync_threads()) == "AMDGPU.sync_workgroup()" - # @test @prettystring(1, @sharedMem($precision, (2,3))) == "" #TODO: not yet supported for AMDGPU + # @test @prettystring(1, @sharedMem($FloatDefault, (2,3))) == "" #TODO: not yet supported for AMDGPU # @test @prettystring(1, @pk_show()) == "CUDA.@cushow" #TODO: not yet supported for AMDGPU # @test @prettystring(1, @pk_println()) == "AMDGPU.@rocprintln" elseif $package == $PKG_METAL @@ -59,7 +58,7 @@ eval(:( @test @prettystring(1, @blockDim()) == "Metal.threads_per_threadgroup_3d()" @test @prettystring(1, @threadIdx()) == "Metal.thread_position_in_threadgroup_3d()" @test @prettystring(1, @sync_threads()) == "Metal.threadgroup_barrier(; flag = Metal.MemoryFlagThreadGroup)" - @test @prettystring(1, @sharedMem($precision, (2,3))) == "ParallelStencil.ParallelKernel.@sharedMem_metal $(nameof($precision)) (2, 3)" + @test @prettystring(1, @sharedMem($FloatDefault, (2,3))) == "ParallelStencil.ParallelKernel.@sharedMem_metal $(nameof($FloatDefault)) (2, 3)" # @test @prettystring(1, @pk_show()) == "Metal.@mtlshow" #TODO: not yet supported for Metal # @test @prettystring(1, @pk_println()) == "Metal.@mtlprintln" #TODO: not yet supported for Metal elseif @iscpu($package) @@ -68,7 +67,7 @@ eval(:( @test @prettystring(1, @blockDim()) == "ParallelStencil.ParallelKernel.@blockDim_cpu" @test @prettystring(1, @threadIdx()) == "ParallelStencil.ParallelKernel.@threadIdx_cpu" @test @prettystring(1, @sync_threads()) == "ParallelStencil.ParallelKernel.@sync_threads_cpu" - @test @prettystring(1, @sharedMem($precision, (2,3))) == "ParallelStencil.ParallelKernel.@sharedMem_cpu $(nameof($precision)) (2, 3)" + @test @prettystring(1, @sharedMem($FloatDefault, (2,3))) == "ParallelStencil.ParallelKernel.@sharedMem_cpu $(nameof($FloatDefault)) (2, 3)" # @test @prettystring(1, @pk_show()) == "Base.@show" # @test @prettystring(1, @pk_println()) == "Base.println()" end; @@ -138,7 +137,7 @@ eval(:( end; @testset "shared memory (allocation)" begin @static if @iscpu($package) - @test typeof(@sharedMem($precision,(2,3))) == typeof(ParallelStencil.ParallelKernel.MArray{Tuple{2,3}, $precision, length((2,3)), prod((2,3))}(undef)) + @test typeof(@sharedMem($FloatDefault,(2,3))) == typeof(ParallelStencil.ParallelKernel.MArray{Tuple{2,3}, $FloatDefault, length((2,3)), prod((2,3))}(undef)) @test typeof(@sharedMem(Bool,(2,3,4))) == typeof(ParallelStencil.ParallelKernel.MArray{Tuple{2,3,4}, Bool, length((2,3,4)), prod((2,3,4))}(undef)) end; end; @@ -214,7 +213,7 @@ eval(:( @reset_parallel_kernel() end; @testset "2. Exceptions" begin - @init_parallel_kernel($package, $precision) + @init_parallel_kernel($package, $FloatDefault) @require @is_initialized @testset "no arguments" begin @test_throws ArgumentError checknoargs(:(something)); # Error: length(args) != 0 @@ -229,4 +228,4 @@ eval(:( end; )) -end end == nothing || true; +end == nothing || true; diff --git a/test/ParallelKernel/test_parallel.jl b/test/ParallelKernel/test_parallel.jl index a6585847..7a4dbdde 100644 --- a/test/ParallelKernel/test_parallel.jl +++ b/test/ParallelKernel/test_parallel.jl @@ -29,16 +29,15 @@ macro compute(A) esc(:($(INDICES[1]) + ($(INDICES[2])-1)*size($A,1) macro compute_with_aliases(A) esc(:(ix + (iz -1)*size($A,1))) end import Enzyme -const TEST_PRECISIONS = [Float32, Float64] + @static for package in TEST_PACKAGES -for precision in TEST_PRECISIONS -(package == PKG_METAL && precision == Float64) ? continue : nothing # Metal does not support Float64 + FloatDefault = (package == PKG_METAL) ? Float32 : Float64 # Metal does not support Float64 eval(:( - @testset "$(basename(@__FILE__)) (package: $(nameof($package))) (precision: $(nameof($precision)))" begin + @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin @testset "1. parallel macros" begin @require !@is_initialized() - @init_parallel_kernel($package, $precision) + @init_parallel_kernel($package, $FloatDefault) @require @is_initialized() @testset "@parallel" begin @static if $package == $PKG_CUDA @@ -123,8 +122,8 @@ eval(:( B̄ = @ones(N) A_ref = Array(A) B_ref = Array(B) - Ā_ref = ones($precision, N) - B̄_ref = ones($precision, N) + Ā_ref = ones($FloatDefault, N) + B̄_ref = ones($FloatDefault, N) @parallel_indices (ix) function f!(A, B, a) A[ix] += a * B[ix] * 100.65 return @@ -567,7 +566,7 @@ eval(:( @testset "3. global defaults" begin @testset "inbounds=true" begin @require !@is_initialized() - @init_parallel_kernel($package, $precision, inbounds=true) + @init_parallel_kernel($package, $FloatDefault, inbounds=true) @require @is_initialized expansion = @prettystring(1, @parallel_indices (ix) inbounds=true f(A) = (2*A; return)) @test occursin("Base.@inbounds begin", expansion) @@ -628,7 +627,7 @@ eval(:( end; @testset "5. Exceptions" begin @require !@is_initialized() - @init_parallel_kernel($package, $precision) + @init_parallel_kernel($package, $FloatDefault) @require @is_initialized @testset "arguments @parallel" begin @test_throws ArgumentError checkargs_parallel(); # Error: isempty(args) @@ -665,4 +664,4 @@ eval(:( end; )) -end end == nothing || true; +end == nothing || true; diff --git a/test/ParallelKernel/test_reset_parallel_kernel.jl b/test/ParallelKernel/test_reset_parallel_kernel.jl index fe2cc01a..a156fd28 100644 --- a/test/ParallelKernel/test_reset_parallel_kernel.jl +++ b/test/ParallelKernel/test_reset_parallel_kernel.jl @@ -25,6 +25,7 @@ end end Base.retry_load_extensions() # Potentially needed to load the extensions after the packages have been filtered. + @static for package in TEST_PACKAGES eval(:( @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin @testset "1. Reset of ParallelKernel" begin diff --git a/test/test_FiniteDifferences1D.jl b/test/test_FiniteDifferences1D.jl index 01f7a120..cb3e0065 100644 --- a/test/test_FiniteDifferences1D.jl +++ b/test/test_FiniteDifferences1D.jl @@ -25,15 +25,14 @@ end end Base.retry_load_extensions() # Potentially needed to load the extensions after the packages have been filtered. -const TEST_PRECISIONS = [Float32, Float64] + @static for package in TEST_PACKAGES -for precision in TEST_PRECISIONS -(package == PKG_METAL && precision == Float64) && continue # Metal does not support Float64 + FloatDefault = (package == PKG_METAL) ? Float32 : Float64 # Metal does not support Float64 eval(:( - @testset "$(basename(@__FILE__)) (package: $(nameof($package))) (precision: $(nameof($precision)))" begin + @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin @require !@is_initialized() - @init_parallel_stencil($package, $precision, 1) + @init_parallel_stencil($package, $FloatDefault, 1) @require @is_initialized() nx = 7 A = @rand(nx ); @@ -56,7 +55,7 @@ eval(:( end; @testset "averages" begin @parallel av!(R, Ax) = (@all(R) = @av(Ax); return) - R.=0; @parallel av!(R, Ax); @test all(Array(R .== (Ax[1:end-1].+Ax[2:end]).*$precision(0.5))) + R.=0; @parallel av!(R, Ax); @test all(Array(R .== (Ax[1:end-1].+Ax[2:end]).*$FloatDefault(0.5))) end; @testset "harmonic averages" begin @parallel harm!(R, Ax) = (@all(R) = @harm(Ax); return) @@ -89,5 +88,4 @@ eval(:( end; )) -end end == nothing || true; diff --git a/test/test_FiniteDifferences2D.jl b/test/test_FiniteDifferences2D.jl index d70b92a2..3099662f 100644 --- a/test/test_FiniteDifferences2D.jl +++ b/test/test_FiniteDifferences2D.jl @@ -25,15 +25,14 @@ end end Base.retry_load_extensions() # Potentially needed to load the extensions after the packages have been filtered. -const TEST_PRECISIONS = [Float32, Float64] + @static for package in TEST_PACKAGES -for precision in TEST_PRECISIONS -(package == PKG_METAL && precision == Float64) && continue # Metal does not support Float64 + FloatDefault = (package == PKG_METAL) ? Float32 : Float64 # Metal does not support Float64 eval(:( - @testset "$(basename(@__FILE__)) (package: $(nameof($package))) (precision: $(nameof($precision)))" begin + @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin @require !@is_initialized() - @init_parallel_stencil($package, $precision, 2) + @init_parallel_stencil($package, $FloatDefault, 2) @require @is_initialized() nx, ny = 7, 5 A = @rand(nx, ny ); @@ -82,11 +81,11 @@ eval(:( @parallel av_ya!(R, Ay) = (@all(R) = @av_ya(Ay); return) @parallel av_xi!(R, Axyy) = (@all(R) = @av_xi(Axyy); return) @parallel av_yi!(R, Axxy) = (@all(R) = @av_yi(Axxy); return) - R.=0; @parallel av!(R, Axy); @test all(Array(R .== (Axy[1:end-1,1:end-1].+Axy[2:end,1:end-1].+Axy[1:end-1,2:end].+Axy[2:end,2:end]).*$precision(0.25))) - R.=0; @parallel av_xa!(R, Ax); @test all(Array(R .== (Ax[2:end, :].+Ax[1:end-1, :]).*$precision(0.5))) - R.=0; @parallel av_ya!(R, Ay); @test all(Array(R .== (Ay[ :,2:end].+Ay[ :,1:end-1]).*$precision(0.5))) - R.=0; @parallel av_xi!(R, Axyy); @test all(Array(R .== (Axyy[2:end ,2:end-1].+Axyy[1:end-1,2:end-1]).*$precision(0.5))) - R.=0; @parallel av_yi!(R, Axxy); @test all(Array(R .== (Axxy[2:end-1,2:end ].+Axxy[2:end-1,1:end-1]).*$precision(0.5))) + R.=0; @parallel av!(R, Axy); @test all(Array(R .== (Axy[1:end-1,1:end-1].+Axy[2:end,1:end-1].+Axy[1:end-1,2:end].+Axy[2:end,2:end]).*$FloatDefault(0.25))) + R.=0; @parallel av_xa!(R, Ax); @test all(Array(R .== (Ax[2:end, :].+Ax[1:end-1, :]).*$FloatDefault(0.5))) + R.=0; @parallel av_ya!(R, Ay); @test all(Array(R .== (Ay[ :,2:end].+Ay[ :,1:end-1]).*$FloatDefault(0.5))) + R.=0; @parallel av_xi!(R, Axyy); @test all(Array(R .== (Axyy[2:end ,2:end-1].+Axyy[1:end-1,2:end-1]).*$FloatDefault(0.5))) + R.=0; @parallel av_yi!(R, Axxy); @test all(Array(R .== (Axxy[2:end-1,2:end ].+Axxy[2:end-1,1:end-1]).*$FloatDefault(0.5))) end; @testset "harmonic averages" begin @parallel harm!(R, Axy) = (@all(R) = @harm(Axy); return) @@ -130,4 +129,4 @@ eval(:( end; )) -end end == nothing || true; +end == nothing || true; diff --git a/test/test_FiniteDifferences3D.jl b/test/test_FiniteDifferences3D.jl index c3e261af..e41045e3 100644 --- a/test/test_FiniteDifferences3D.jl +++ b/test/test_FiniteDifferences3D.jl @@ -25,15 +25,14 @@ end end Base.retry_load_extensions() # Potentially needed to load the extensions after the packages have been filtered. -const TEST_PRECISIONS = [Float32, Float64] + @static for package in TEST_PACKAGES -for precision in TEST_PRECISIONS -(package == PKG_METAL && precision == Float64) && continue # Metal does not support Float64 + FloatDefault = (package == PKG_METAL) ? Float32 : Float64 # Metal does not support Float64 eval(:( - @testset "$(basename(@__FILE__)) (package: $(nameof($package))) (precision: $(nameof($precision)))" begin + @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin @require !@is_initialized() - @init_parallel_stencil($package, $precision, 3) + @init_parallel_stencil($package, $FloatDefault, 3) @require @is_initialized() nx, ny, nz = 7, 5, 6 A = @rand(nx , ny , nz ); @@ -113,18 +112,18 @@ eval(:( @parallel av_xzi!(R, Axyyz) = (@all(R) = @av_xzi(Axyyz); return) @parallel av_yzi!(R, Axxyz) = (@all(R) = @av_yzi(Axxyz); return) R.=0; @parallel av!(R, Axyz); @test all(Array(R .== (Axyz[1:end-1,1:end-1,1:end-1].+Axyz[2:end,1:end-1,1:end-1].+Axyz[1:end-1,2:end,1:end-1].+Axyz[2:end,2:end,1:end-1].+Axyz[1:end-1,1:end-1,2:end].+Axyz[2:end,1:end-1,2:end].+Axyz[1:end-1,2:end,2:end].+Axyz[2:end,2:end,2:end])*0.125)) - R.=0; @parallel av_xa!(R, Ax); @test all(Array(R .== (Ax[2:end, :, :].+Ax[1:end-1, :, :]).*$precision(0.5))) - R.=0; @parallel av_ya!(R, Ay); @test all(Array(R .== (Ay[ :,2:end, :].+Ay[ :,1:end-1, :]).*$precision(0.5))) - R.=0; @parallel av_za!(R, Az); @test all(Array(R .== (Az[ :, :,2:end].+Az[ :, :,1:end-1]).*$precision(0.5))) - R.=0; @parallel av_xi!(R, Axyyzz); @test all(Array(R .== (Axyyzz[2:end ,2:end-1,2:end-1].+Axyyzz[1:end-1,2:end-1,2:end-1]).*$precision(0.5))) - R.=0; @parallel av_yi!(R, Axxyzz); @test all(Array(R .== (Axxyzz[2:end-1,2:end ,2:end-1].+Axxyzz[2:end-1,1:end-1,2:end-1]).*$precision(0.5))) - R.=0; @parallel av_zi!(R, Axxyyz); @test all(Array(R .== (Axxyyz[2:end-1,2:end-1,2:end ].+Axxyyz[2:end-1,2:end-1,1:end-1]).*$precision(0.5))) - R.=0; @parallel av_xya!(R, Axy); @test all(Array(R .== (Axy[1:end-1,1:end-1,:].+Axy[2:end,1:end-1,:].+Axy[1:end-1,2:end,:].+Axy[2:end,2:end,:]).*$precision(0.25))) - R.=0; @parallel av_xza!(R, Axz); @test all(Array(R .== (Axz[1:end-1,:,1:end-1].+Axz[2:end,:,1:end-1].+Axz[1:end-1,:,2:end].+Axz[2:end,:,2:end]).*$precision(0.25))) - R.=0; @parallel av_yza!(R, Ayz); @test all(Array(R .== (Ayz[:,1:end-1,1:end-1].+Ayz[:,2:end,1:end-1].+Ayz[:,1:end-1,2:end].+Ayz[:,2:end,2:end]).*$precision(0.25))) - R.=0; @parallel av_xyi!(R, Axyzz); @test all(Array(R .== (Axyzz[1:end-1,1:end-1,2:end-1].+Axyzz[2:end,1:end-1,2:end-1].+Axyzz[1:end-1,2:end,2:end-1].+Axyzz[2:end,2:end,2:end-1]).*$precision(0.25))) - R.=0; @parallel av_xzi!(R, Axyyz); @test all(Array(R .== (Axyyz[1:end-1,2:end-1,1:end-1].+Axyyz[2:end,2:end-1,1:end-1].+Axyyz[1:end-1,2:end-1,2:end].+Axyyz[2:end,2:end-1,2:end]).*$precision(0.25))) - R.=0; @parallel av_yzi!(R, Axxyz); @test all(Array(R .== (Axxyz[2:end-1,1:end-1,1:end-1].+Axxyz[2:end-1,2:end,1:end-1].+Axxyz[2:end-1,1:end-1,2:end].+Axxyz[2:end-1,2:end,2:end]).*$precision(0.25))) + R.=0; @parallel av_xa!(R, Ax); @test all(Array(R .== (Ax[2:end, :, :].+Ax[1:end-1, :, :]).*$FloatDefault(0.5))) + R.=0; @parallel av_ya!(R, Ay); @test all(Array(R .== (Ay[ :,2:end, :].+Ay[ :,1:end-1, :]).*$FloatDefault(0.5))) + R.=0; @parallel av_za!(R, Az); @test all(Array(R .== (Az[ :, :,2:end].+Az[ :, :,1:end-1]).*$FloatDefault(0.5))) + R.=0; @parallel av_xi!(R, Axyyzz); @test all(Array(R .== (Axyyzz[2:end ,2:end-1,2:end-1].+Axyyzz[1:end-1,2:end-1,2:end-1]).*$FloatDefault(0.5))) + R.=0; @parallel av_yi!(R, Axxyzz); @test all(Array(R .== (Axxyzz[2:end-1,2:end ,2:end-1].+Axxyzz[2:end-1,1:end-1,2:end-1]).*$FloatDefault(0.5))) + R.=0; @parallel av_zi!(R, Axxyyz); @test all(Array(R .== (Axxyyz[2:end-1,2:end-1,2:end ].+Axxyyz[2:end-1,2:end-1,1:end-1]).*$FloatDefault(0.5))) + R.=0; @parallel av_xya!(R, Axy); @test all(Array(R .== (Axy[1:end-1,1:end-1,:].+Axy[2:end,1:end-1,:].+Axy[1:end-1,2:end,:].+Axy[2:end,2:end,:]).*$FloatDefault(0.25))) + R.=0; @parallel av_xza!(R, Axz); @test all(Array(R .== (Axz[1:end-1,:,1:end-1].+Axz[2:end,:,1:end-1].+Axz[1:end-1,:,2:end].+Axz[2:end,:,2:end]).*$FloatDefault(0.25))) + R.=0; @parallel av_yza!(R, Ayz); @test all(Array(R .== (Ayz[:,1:end-1,1:end-1].+Ayz[:,2:end,1:end-1].+Ayz[:,1:end-1,2:end].+Ayz[:,2:end,2:end]).*$FloatDefault(0.25))) + R.=0; @parallel av_xyi!(R, Axyzz); @test all(Array(R .== (Axyzz[1:end-1,1:end-1,2:end-1].+Axyzz[2:end,1:end-1,2:end-1].+Axyzz[1:end-1,2:end,2:end-1].+Axyzz[2:end,2:end,2:end-1]).*$FloatDefault(0.25))) + R.=0; @parallel av_xzi!(R, Axyyz); @test all(Array(R .== (Axyyz[1:end-1,2:end-1,1:end-1].+Axyyz[2:end,2:end-1,1:end-1].+Axyyz[1:end-1,2:end-1,2:end].+Axyyz[2:end,2:end-1,2:end]).*$FloatDefault(0.25))) + R.=0; @parallel av_yzi!(R, Axxyz); @test all(Array(R .== (Axxyz[2:end-1,1:end-1,1:end-1].+Axxyz[2:end-1,2:end,1:end-1].+Axxyz[2:end-1,1:end-1,2:end].+Axxyz[2:end-1,2:end,2:end]).*$FloatDefault(0.25))) end; @testset "harmonic averages" begin @parallel harm!(R, Axyz) = (@all(R) = @harm(Axyz); return) @@ -184,4 +183,4 @@ eval(:( end; )) -end end == nothing || true; +end == nothing || true; diff --git a/test/test_extensions.jl b/test/test_extensions.jl index c79b7ded..bd35d210 100644 --- a/test/test_extensions.jl +++ b/test/test_extensions.jl @@ -24,6 +24,7 @@ end exename = joinpath(Sys.BINDIR, Base.julia_exename()) const TEST_PROJECTS = ["Diffusion3D_minimal"] # ["Diffusion3D_minimal", "Diffusion3D", "Diffusion"] + @static for package in TEST_PACKAGES eval(:( @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin @testset "extensions ($project)" for project in TEST_PROJECTS diff --git a/test/test_incremental_compilation.jl b/test/test_incremental_compilation.jl index e7da4fab..57f2a8c1 100644 --- a/test/test_incremental_compilation.jl +++ b/test/test_incremental_compilation.jl @@ -22,6 +22,7 @@ end end exename = joinpath(Sys.BINDIR, Base.julia_exename()) + @static for package in TEST_PACKAGES eval(:( @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin @testset "incremental compilation" begin diff --git a/test/test_init_parallel_stencil.jl b/test/test_init_parallel_stencil.jl index cbfa4321..fad22c6e 100644 --- a/test/test_init_parallel_stencil.jl +++ b/test/test_init_parallel_stencil.jl @@ -26,6 +26,7 @@ end end Base.retry_load_extensions() # Potentially needed to load the extensions after the packages have been filtered. + @static for package in TEST_PACKAGES eval(:( @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin @testset "1. initialization of ParallelStencil" begin diff --git a/test/test_parallel.jl b/test/test_parallel.jl index a737495d..a6be8377 100644 --- a/test/test_parallel.jl +++ b/test/test_parallel.jl @@ -26,16 +26,15 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t import ParallelStencil.@gorgeousexpand -const TEST_PRECISIONS = [Float32, Float64] + @static for package in TEST_PACKAGES -for precision in TEST_PRECISIONS -(package == PKG_METAL && precision == Float64) ? continue : nothing # Metal does not support Float64 + FloatDefault = (package == PKG_METAL) ? Float32 : Float64 # Metal does not support Float64 eval(:( - @testset "$(basename(@__FILE__)) (package: $(nameof($package))) (precision: $(nameof($precision)))" begin + @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin @testset "1. parallel macros" begin @require !@is_initialized() - @init_parallel_stencil($package, $precision, 3) + @init_parallel_stencil($package, $FloatDefault, 3) @require @is_initialized() @testset "@parallel " begin # NOTE: calls must go to ParallelStencil.ParallelKernel.parallel and must therefore give the same result as in ParallelKernel, except for memopt tests (tests copied 1-to-1 from there). @static if $package == $PKG_CUDA @@ -224,13 +223,13 @@ eval(:( end @testset "@parallel (3D; on-the-fly)" begin nx, ny, nz = 32, 8, 8 - lam=dt=_dx=_dy=_dz = $precision(1) + lam=dt=_dx=_dy=_dz = $FloatDefault(1) T = @zeros(nx, ny, nz); T2 = @zeros(nx, ny, nz); T2_ref = @zeros(nx, ny, nz); Ci = @ones(nx, ny, nz); copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); - @parallel function diffusion3D_step!(T2, T, Ci, lam::Data.Number, dt::$precision, _dx, _dy, _dz) + @parallel function diffusion3D_step!(T2, T, Ci, lam::Data.Number, dt::$FloatDefault, _dx, _dy, _dz) @all(qx) = -lam*@d_xi(T)*_dx # Fourier's law of heat conduction @all(qy) = -lam*@d_yi(T)*_dy # ... @all(qz) = -lam*@d_zi(T)*_dz # ... @@ -331,7 +330,7 @@ eval(:( @test all(Array(A2) .== Array(A2_ref)) end @testset "@parallel_indices (3D, memopt, stencilranges=-1:1)" begin - lam=dt=_dx=_dy=_dz = $precision(1) + lam=dt=_dx=_dy=_dz = $FloatDefault(1) T = @zeros(nx, ny, nz); T2 = @zeros(nx, ny, nz); T2_ref = @zeros(nx, ny, nz); @@ -390,13 +389,13 @@ eval(:( @test all(Array(A2) .== Array(A2_ref)) end @testset "@parallel (3D, memopt, stencilranges=0:2; on-the-fly)" begin - lam=dt=_dx=_dy=_dz = $precision(1) + lam=dt=_dx=_dy=_dz = $FloatDefault(1) T = @zeros(nx, ny, nz); T2 = @zeros(nx, ny, nz); T2_ref = @zeros(nx, ny, nz); Ci = @ones(nx, ny, nz); copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); - @parallel memopt=true loopsize=3 function diffusion3D_step!(T2, T, Ci, lam::Data.Number, dt::$precision, _dx, _dy, _dz) + @parallel memopt=true loopsize=3 function diffusion3D_step!(T2, T, Ci, lam::Data.Number, dt::$FloatDefault, _dx, _dy, _dz) @all(qx) = -lam*@d_xi(T)*_dx # Fourier's law of heat conduction @all(qy) = -lam*@d_yi(T)*_dy # ... @all(qz) = -lam*@d_zi(T)*_dz # ... @@ -477,7 +476,7 @@ eval(:( @test all(Array(A2) .== Array(A2_ref)) end @testset "@parallel (3D, memopt; 2 arrays, x-y-z- + z-stencil)" begin - lam=dt=_dx=_dy=_dz = $precision(1) + lam=dt=_dx=_dy=_dz = $FloatDefault(1) T = @zeros(nx, ny, nz); T2 = @zeros(nx, ny, nz); T2_ref = @zeros(nx, ny, nz); @@ -497,7 +496,7 @@ eval(:( @test all(Array(T2) .== Array(T2_ref)) end @testset "@parallel (3D, memopt; 2 arrays, x-y-z- + x-stencil)" begin - lam=dt=_dx=_dy=_dz = $precision(1) + lam=dt=_dx=_dy=_dz = $FloatDefault(1) T = @zeros(nx, ny, nz); T2 = @zeros(nx, ny, nz); T2_ref = @zeros(nx, ny, nz); @@ -517,7 +516,7 @@ eval(:( @test all(Array(T2) .== Array(T2_ref)) end @testset "@parallel (3D, memopt; 3 arrays, x-y-z- + y- + x-stencil)" begin - lam=dt=_dx=_dy=_dz = $precision(1) + lam=dt=_dx=_dy=_dz = $FloatDefault(1) T = @zeros(nx, ny, nz); T2 = @zeros(nx, ny, nz); T2_ref = @zeros(nx, ny, nz); @@ -828,7 +827,7 @@ eval(:( @test all(Array(A2) .== Array(A)) end @testset "@parallel (3D, memopt, stencilranges=0:2)" begin - lam=dt=_dx=_dy=_dz = $precision(1) + lam=dt=_dx=_dy=_dz = $FloatDefault(1) T = @zeros(nx, ny, nz); T2 = @zeros(nx, ny, nz); T2_ref = @zeros(nx, ny, nz); @@ -847,7 +846,7 @@ eval(:( @test all(Array(T2) .== Array(T2_ref)) end @testset "@parallel (3D, memopt; 3 arrays, x-y-z- + y- + x-stencil)" begin - lam=dt=_dx=_dy=_dz = $precision(1) + lam=dt=_dx=_dy=_dz = $FloatDefault(1) T = @zeros(nx, ny, nz); T2 = @zeros(nx, ny, nz); T2_ref = @zeros(nx, ny, nz); @@ -880,12 +879,12 @@ eval(:( end; @testset "2. parallel macros (2D)" begin @require !@is_initialized() - @init_parallel_stencil($package, $precision, 2) + @init_parallel_stencil($package, $FloatDefault, 2) @require @is_initialized() @static if $package in [$PKG_CUDA, $PKG_AMDGPU] # TODO add support for Metal nx, ny, nz = 32, 8, 1 @testset "@parallel_indices (2D, memopt, stencilranges=(-1:1,-1:1,0:0))" begin - lam=dt=_dx=_dy = $precision(1) + lam=dt=_dx=_dy = $FloatDefault(1) T = @zeros(nx, ny, nz); T2 = @zeros(nx, ny, nz); T2_ref = @zeros(nx, ny, nz); @@ -913,7 +912,7 @@ eval(:( @testset "3. global defaults" begin @testset "inbounds=true" begin @require !@is_initialized() - @init_parallel_stencil($package, $precision, 1, inbounds=true) + @init_parallel_stencil($package, $FloatDefault, 1, inbounds=true) @require @is_initialized expansion = @prettystring(1, @parallel_indices (ix) inbounds=true f(A) = (2*A; return)) @test occursin("Base.@inbounds begin", expansion) @@ -925,10 +924,10 @@ eval(:( end; @testset "@parallel_indices (I...) (1D)" begin @require !@is_initialized() - @init_parallel_stencil($package, $precision, 1) + @init_parallel_stencil($package, $FloatDefault, 1) @require @is_initialized A = @zeros(4*5*6) - one = $precision(1) + one = $FloatDefault(1) @parallel_indices (I...) function write_indices!(A, one) A[I...] = sum((I .- (1,)) .* (one)); return @@ -939,10 +938,10 @@ eval(:( end; @testset "@parallel_indices (I...) (2D)" begin @require !@is_initialized() - @init_parallel_stencil($package, $precision, 2) + @init_parallel_stencil($package, $FloatDefault, 2) @require @is_initialized A = @zeros(4, 5*6) - one = $precision(1) + one = $FloatDefault(1) @parallel_indices (I...) function write_indices!(A, one) A[I...] = sum((I .- (1,)) .* (one, size(A,1))); return @@ -953,10 +952,10 @@ eval(:( end; @testset "@parallel_indices (I...) (3D)" begin @require !@is_initialized() - @init_parallel_stencil($package, $precision, 3) + @init_parallel_stencil($package, $FloatDefault, 3) @require @is_initialized A = @zeros(4, 5, 6) - one = $precision(1) + one = $FloatDefault(1) @parallel_indices (I...) function write_indices!(A, one) A[I...] = sum((I .- (1,)) .* (one, size(A,1), size(A,1)*size(A,2))); return @@ -1061,7 +1060,7 @@ eval(:( @reset_parallel_stencil() end; @testset "5. Exceptions" begin - @init_parallel_stencil($package, $precision, 3) + @init_parallel_stencil($package, $FloatDefault, 3) @require @is_initialized @testset "arguments @parallel" begin @test_throws ArgumentError checkargs_parallel(); # Error: isempty(args) @@ -1080,4 +1079,4 @@ eval(:( end; )) -end end == nothing || true; +end == nothing || true; diff --git a/test/test_reset_parallel_stencil.jl b/test/test_reset_parallel_stencil.jl index 08b66da5..4177139a 100644 --- a/test/test_reset_parallel_stencil.jl +++ b/test/test_reset_parallel_stencil.jl @@ -24,6 +24,7 @@ end end Base.retry_load_extensions() # Potentially needed to load the extensions after the packages have been filtered. + @static for package in TEST_PACKAGES eval(:( @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin @testset "1. Reset of ParallelStencil" begin