diff --git a/src/FieldAllocators.jl b/src/FieldAllocators.jl index 77635d5a..3b39028e 100644 --- a/src/FieldAllocators.jl +++ b/src/FieldAllocators.jl @@ -28,6 +28,7 @@ To see a description of a macro type `?` (including the `@`). """ module FieldAllocators import ..ParallelKernel + import ..ParallelStencil: check_initialized @doc replace(ParallelKernel.FieldAllocators.ALLOCATE_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro allocate(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@allocate($(args...)))); end @doc replace(ParallelKernel.FieldAllocators.FIELD_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro Field(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@Field($(args...)))); end @doc replace(ParallelKernel.FieldAllocators.VECTORFIELD_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro VectorField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@VectorField($(args...)))); end @@ -46,5 +47,19 @@ module FieldAllocators @doc replace(ParallelKernel.FieldAllocators.TENSORFIELD_COMP_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro XZField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@XZField($(args...)))); end @doc replace(ParallelKernel.FieldAllocators.TENSORFIELD_COMP_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro YZField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@YZField($(args...)))); end + macro IField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@IField($(args...)))); end + macro XXYField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@XXYField($(args...)))); end + macro XYYField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@XYYField($(args...)))); end + macro XYZField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@XYZField($(args...)))); end + macro XXYZField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@XXYZField($(args...)))); end + macro XYYZField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@XYYZField($(args...)))); end + macro XYZZField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@XYZZField($(args...)))); end + macro XXYYField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@XXYYField($(args...)))); end + macro XXZZField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@XXZZField($(args...)))); end + macro YYZZField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@YYZZField($(args...)))); end + macro XXYYZField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@XXYYZField($(args...)))); end + macro XYYZZField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@XYYZZField($(args...)))); end + macro XXYZZField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@XXYZZField($(args...)))); end + export @allocate, @Field, @VectorField, @BVectorField, @TensorField, @XField, @BXField, @YField, @BYField, @ZField, @BZField, @XXField, @YYField, @ZZField, @XYField, @XZField, @YZField end \ No newline at end of file diff --git a/src/FiniteDifferences.jl b/src/FiniteDifferences.jl index 352073f4..9765fcd8 100644 --- a/src/FiniteDifferences.jl +++ b/src/FiniteDifferences.jl @@ -46,24 +46,25 @@ export @within @doc "`@minloc(A)`: Compute the minimum between 2nd order adjacent elements of `A`, using a moving window of size 3." :(@minloc) import ..ParallelStencil -import ..ParallelStencil: INDICES, INDICES_INN, WITHIN_DOC, @expandargs +import ..ParallelStencil: INDICES, INDICES_INN, INDICES_DIR, WITHIN_DOC, @expandargs const ix = INDICES[1] const ixi = INDICES_INN[1] +const ixd = INDICES_DIR[1] -macro d(A) @expandargs(A); esc(:( $A[$ixi] - $A[$ixi-1] )) end +macro d(A) @expandargs(A); esc(:( $A[$ixd+1] - $A[$ixd] )) end macro d2(A) @expandargs(A); esc(:( ($A[$ixi+1] - $A[$ixi]) - ($A[$ixi] - $A[$ixi-1]) )) end macro all(A) @expandargs(A); esc(:( $A[$ix ] )) end macro inn(A) @expandargs(A); esc(:( $A[$ixi ] )) end -macro av(A) @expandargs(A); esc(:(($A[$ixi-1] + $A[$ixi] )*0.5 )) end -macro harm(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ixi-1] + 1.0/$A[$ixi])*2.0 )) end +macro av(A) @expandargs(A); esc(:(($A[$ixd] + $A[$ixd+1] )*0.5 )) end +macro harm(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ixd] + 1.0/$A[$ixd+1])*2.0 )) end macro maxloc(A) @expandargs(A); esc(:( max( max($A[$ixi-1], $A[$ixi+1]), $A[$ixi] ) )) end macro minloc(A) @expandargs(A); esc(:( min( min($A[$ixi-1], $A[$ixi+1]), $A[$ixi] ) )) end @doc WITHIN_DOC macro within(macroname::String, A) @expandargs(A) - if macroname == "@all" esc( :( $ix <= lastindex($A,1)) ) - elseif macroname == "@inn" esc( :(firstindex($A,1) < $ixi < lastindex($A,1)) ) + if macroname == "@all" esc( :(firstindex($A,1) <= $ix <= lastindex($A,1)) ) + elseif macroname == "@inn" esc( :(firstindex($A,1) < $ixi < lastindex($A,1)) ) else error("unkown macroname: $macroname. If you want to add your own assignement macros, overwrite the macro 'within(macroname::String, A)'; to still use the exising macro within as well call ParallelStencil.FiniteDifferences{1|2|3}D.@within(macroname, A) at the end.") end end @@ -151,14 +152,15 @@ export @within @doc "`@minloc(A)`: Compute the minimum between 2nd order adjacent elements of `A`, using a moving window of size 3." :(@minloc) import ..ParallelStencil -import ..ParallelStencil: INDICES, INDICES_INN, WITHIN_DOC, @expandargs +import ..ParallelStencil: INDICES, INDICES_INN, INDICES_DIR, WITHIN_DOC, @expandargs ix, iy = INDICES[1], INDICES[2] ixi, iyi = INDICES_INN[1], INDICES_INN[2] +ixd, iyd = INDICES_DIR[1], INDICES_DIR[2] -macro d_xa(A) @expandargs(A); esc(:( $A[$ixi,$iy ] - $A[$ixi-1,$iy ] )) end -macro d_ya(A) @expandargs(A); esc(:( $A[$ix ,$iyi] - $A[$ix ,$iyi-1] )) end -macro d_xi(A) @expandargs(A); esc(:( $A[$ixi,$iyi] - $A[$ixi-1,$iyi ] )) end -macro d_yi(A) @expandargs(A); esc(:( $A[$ixi,$iyi] - $A[$ixi ,$iyi-1] )) end +macro d_xa(A) @expandargs(A); esc(:( $A[$ixd+1,$iy ] - $A[$ixd ,$iy ] )) end +macro d_ya(A) @expandargs(A); esc(:( $A[$ix ,$iyd+1] - $A[$ix ,$iyd ] )) end +macro d_xi(A) @expandargs(A); esc(:( $A[$ixd+1,$iyi] - $A[$ixd ,$iyi ] )) end +macro d_yi(A) @expandargs(A); esc(:( $A[$ixi,$iyd+1] - $A[$ixi ,$iyd ] )) end macro d2_xa(A) @expandargs(A); esc(:( ($A[$ixi+1,$iy ] - $A[$ixi ,$iy ]) - ($A[$ixi ,$iy ] - $A[$ixi-1,$iy ]) )) end macro d2_ya(A) @expandargs(A); esc(:( ($A[$ix ,$iyi+1] - $A[$ix ,$iyi]) - ($A[$ix ,$iyi] - $A[$ix ,$iyi-1]) )) end macro d2_xi(A) @expandargs(A); esc(:( ($A[$ixi+1,$iyi ] - $A[$ixi ,$iyi]) - ($A[$ixi ,$iyi] - $A[$ixi-1,$iyi ]) )) end @@ -167,16 +169,16 @@ macro all(A) @expandargs(A); esc(:( $A[$ix ,$iy ] )) end macro inn(A) @expandargs(A); esc(:( $A[$ixi ,$iyi ] )) end macro inn_x(A) @expandargs(A); esc(:( $A[$ixi ,$iy ] )) end macro inn_y(A) @expandargs(A); esc(:( $A[$ix ,$iyi ] )) end -macro av(A) @expandargs(A); esc(:(($A[$ixi-1,$iyi-1] + $A[$ixi,$iyi-1] + $A[$ixi-1,$iyi] + $A[$ixi,$iyi])*0.25 )) end -macro av_xa(A) @expandargs(A); esc(:(($A[$ixi-1,$iy ] + $A[$ixi,$iy ] )*0.5 )) end -macro av_ya(A) @expandargs(A); esc(:(($A[$ix ,$iyi-1] + $A[$ix ,$iyi] )*0.5 )) end -macro av_xi(A) @expandargs(A); esc(:(($A[$ixi-1,$iyi ] + $A[$ixi,$iyi] )*0.5 )) end -macro av_yi(A) @expandargs(A); esc(:(($A[$ixi ,$iyi-1] + $A[$ixi,$iyi] )*0.5 )) end -macro harm(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ixi-1,$iyi-1] + 1.0/$A[$ixi,$iyi-1] + 1.0/$A[$ixi-1,$iyi] + 1.0/$A[$ixi,$iyi])*4.0 )) end -macro harm_xa(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ixi-1,$iy ] + 1.0/$A[$ixi,$iy ] )*2.0 )) end -macro harm_ya(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ix ,$iyi-1] + 1.0/$A[$ix ,$iyi] )*2.0 )) end -macro harm_xi(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ixi-1,$iyi ] + 1.0/$A[$ixi,$iyi] )*2.0 )) end -macro harm_yi(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ixi ,$iyi-1] + 1.0/$A[$ixi,$iyi] )*2.0 )) end +macro av(A) @expandargs(A); esc(:(($A[$ixd ,$iyd ] + $A[$ixd+1,$iyd ] + $A[$ixd,$iyd+1] + $A[$ixd+1,$iyd+1])*0.25 )) end +macro av_xa(A) @expandargs(A); esc(:(($A[$ixd ,$iy ] + $A[$ixd+1,$iy ] )*0.5 )) end +macro av_ya(A) @expandargs(A); esc(:(($A[$ix ,$iyd ] + $A[$ix ,$iyd+1] )*0.5 )) end +macro av_xi(A) @expandargs(A); esc(:(($A[$ixd ,$iyi ] + $A[$ixd+1,$iyi] )*0.5 )) end +macro av_yi(A) @expandargs(A); esc(:(($A[$ixi ,$iyd ] + $A[$ixi,$iyd+1] )*0.5 )) end +macro harm(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ixd ,$iyd ] + 1.0/$A[$ixd+1,$iyd ] + 1.0/$A[$ixd,$iyd+1] + 1.0/$A[$ixd+1,$iyd+1])*4.0 )) end +macro harm_xa(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ixd ,$iy ] + 1.0/$A[$ixd+1,$iy ] )*2.0 )) end +macro harm_ya(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ix ,$iyd ] + 1.0/$A[$ix ,$iyd+1] )*2.0 )) end +macro harm_xi(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ixd ,$iyi ] + 1.0/$A[$ixd+1,$iyi] )*2.0 )) end +macro harm_yi(A) @expandargs(A); esc(:(1.0/(1.0/$A[$ixi ,$iyd ] + 1.0/$A[$ixi,$iyd+1] )*2.0 )) end macro maxloc(A) @expandargs(A); esc(:( max( max( max($A[$ixi-1,$iyi ], $A[$ixi+1,$iyi ]) , $A[$ixi ,$iyi ] ), max($A[$ixi ,$iyi-1], $A[$ixi ,$iyi+1]) ) )) end macro minloc(A) @expandargs(A); esc(:( min( min( min($A[$ixi-1,$iyi ], $A[$ixi+1,$iyi ]) , $A[$ixi ,$iyi ] ), @@ -185,10 +187,10 @@ macro minloc(A) @expandargs(A); esc(:( min( min( min($A[$ixi-1,$iyi ], $A[$ @doc WITHIN_DOC macro within(macroname::String, A) @expandargs(A) - if macroname == "@all" esc( :( $ix<=lastindex($A,1) && $iy<=lastindex($A,2)) ) - elseif macroname == "@inn" esc( :(firstindex($A,1)<$ixi ", true; eval_args=(:inbounds,)) + kwargs, backend_kwargs_expr = extract_kwargs(caller, kwargs_expr, (:inbounds, :padding), "@parallel_indices ", true; eval_args=(:inbounds,)) inbounds = haskey(kwargs, :inbounds) ? kwargs.inbounds : get_inbounds(caller) - parallel_kernel(caller, package, numbertype, inbounds, posargs..., kernelarg) + padding = haskey(kwargs, :padding) ? kwargs.padding : get_padding(caller) + parallel_kernel(caller, package, numbertype, inbounds, padding, posargs..., kernelarg) end function synchronize(caller::Module, args::Union{Symbol,Expr}...; package::Symbol=get_package(caller)) @@ -172,24 +173,24 @@ end ## @PARALLEL KERNEL FUNCTIONS -function parallel_kernel(caller::Module, package::Symbol, numbertype::DataType, inbounds::Bool, indices::Union{Symbol,Expr}, kernel::Expr) +function parallel_kernel(caller::Module, package::Symbol, numbertype::DataType, inbounds::Bool, padding::Bool, indices::Union{Symbol,Expr}, kernel::Expr) if (!isa(indices,Symbol) && !isa(indices.head,Symbol)) @ArgumentError("@parallel_indices: argument 'indices' must be a tuple of indices or a single index (e.g. (ix, iy, iz) or (ix, iy) or ix ).") end indices = extract_tuple(indices) - padding = get_padding(caller) + ndims = length(indices) body = get_body(kernel) body = remove_return(body) body = macroexpand(caller, body) - use_aliases = !all(indices .== INDICES[1:length(indices)]) + use_aliases = !all(indices .== INDICES[1:ndims]) if use_aliases # NOTE: we treat explicit parallel indices as aliases to the statically retrievable indices INDICES. indices_aliases = indices - indices = [INDICES[1:length(indices)]...] + indices = [INDICES[1:ndims]...] for i=1:length(indices_aliases) body = substitute(body, indices_aliases[i], indices[i]) end end if isgpu(package) kernel = insert_device_types(caller, kernel) end kernel = adjust_signatures(kernel, package) - body = handle_padding(body, padding) # TODO: padding can later be made configurable per kernel (to enable working with arrays as before). + body = handle_padding(caller, body, padding, indices) body = handle_inverses(body) body = handle_indices_and_literals(body, indices, package, numbertype) if (inbounds) body = add_inbounds(body) end @@ -362,7 +363,7 @@ function literaltypes(type1::DataType, type2::DataType, expr::Expr) end -## FUNCTIONS TO HANDLE SIGNATURES, INDICES, INVERSES AND PADDING +## FUNCTIONS AND MACROS TO HANDLE SIGNATURES, INDICES, INVERSES AND PADDING function adjust_signatures(kernel::Expr, package::Symbol) int_type = kernel_int_type(package) @@ -373,6 +374,44 @@ function adjust_signatures(kernel::Expr, package::Symbol) return kernel end +function simplify_conditions(caller::Module, expr::Expr) + expr = postwalk(expr) do ex + if @capture(ex, if condition_ body_ end) + condition = postwalk(condition) do cond + if (@capture(cond, a_ < ixyz_ + c_ < b_) && ixyz in INDICES) cond = :($a - $c < $ixyz < $b - $c) + elseif (@capture(cond, a_ <= ixyz_ + c_ < b_) && ixyz in INDICES) cond = :($a - $c <= $ixyz < $b - $c) + elseif (@capture(cond, a_ < ixyz_ + c_ <= b_) && ixyz in INDICES) cond = :($a - $c < $ixyz <= $b - $c) + elseif (@capture(cond, a_ <= ixyz_ + c_ <= b_) && ixyz in INDICES) cond = :($a - $c <= $ixyz <= $b - $c) + elseif (@capture(cond, a_ < ixyz_ - c_ < b_) && ixyz in INDICES) cond = :($a + $c < $ixyz < $b + $c) + elseif (@capture(cond, a_ <= ixyz_ - c_ < b_) && ixyz in INDICES) cond = :($a + $c <= $ixyz < $b + $c) + elseif (@capture(cond, a_ < ixyz_ - c_ <= b_) && ixyz in INDICES) cond = :($a + $c < $ixyz <= $b + $c) + elseif (@capture(cond, a_ <= ixyz_ - c_ <= b_) && ixyz in INDICES) cond = :($a + $c <= $ixyz <= $b + $c) + end + if @capture(cond, a_ < x_ < b_) || @capture(cond, a_ < x_ <= b_) || @capture(cond, a_ <= x_ < b_) || @capture(cond, a_ <= x_ <= b_) + a_val = eval_try(caller, a) + b_val = eval_try(caller, b) + if !isnothing(a_val) cond = substitute(cond, a, :($a_val), inQuoteNode=true) end + if !isnothing(b_val) cond = substitute(cond, b, :($b_val), inQuoteNode=true) end + end + if (@capture(cond, a_ < ixyz_ < b_) && (ixyz in INDICES) && isa(a, Integer) && isa(b, Integer) && a==0 && b==2) cond = :($x == 1) # NOTE: a check that there is no second assignment to the parallel indices could be added. + elseif (@capture(cond, a_ < ixyz_ < b_) && (ixyz in INDICES) && isa(a, Integer) && a==0) cond = :($x < $b) + elseif (@capture(cond, a_ <= ixyz_ < b_) && (ixyz in INDICES) && isa(a, Integer) && isa(b, Integer) && a==1 && b==2) cond = :($x == 1) + elseif (@capture(cond, a_ <= ixyz_ < b_) && (ixyz in INDICES) && isa(a, Integer) && a==1) cond = :($x < $b) + elseif (@capture(cond, a_ < ixyz_ <= b_) && (ixyz in INDICES) && isa(a, Integer) && isa(b, Integer) && a==0 && b==1) cond = :($x == 1) + elseif (@capture(cond, a_ < ixyz_ <= b_) && (ixyz in INDICES) && isa(a, Integer) && a==0) cond = :($x <= $b) + elseif (@capture(cond, a_ <= ixyz_ <= b_) && (ixyz in INDICES) && isa(a, Integer) && isa(b, Integer) && a==1 && b==1) cond = :($x == 1) + elseif (@capture(cond, a_ <= ixyz_ <= b_) && (ixyz in INDICES) && isa(a, Integer) && a==1) cond = :($x <= $b) + end + return cond + end + return :(if ($condition); $body end) + else + return ex + end + end + return expr +end + function handle_inverses(body::Expr) return postwalk(body) do ex if @capture(ex, (1 | 1.0 | 1.0f0) / x_) @@ -383,12 +422,14 @@ function handle_inverses(body::Expr) end end -function handle_padding(body::Expr, padding::Bool) - body = substitute_indices_inn(body, padding) - if padding - body = substitute_firstlastindex(body) - body = substitute_view_accesses(body, INDICES) +function handle_padding(caller::Module, body::Expr, padding::Bool, indices; handle_view_accesses::Bool=true, handle_indexing::Bool=true, dir_handling::Bool=true, delay_dir_handling::Bool=false) + if (handle_indexing) + body = substitute_indices_inn(body, padding) + if (dir_handling) body = substitute_indices_dir(caller, body, padding; delay_handling=delay_dir_handling) end + body = substitute_firstlastindex(caller, body, padding) + body = simplify_conditions(caller, body) end + if (handle_view_accesses && padding) body = substitute_view_accesses(body, (indices...,), (INDICES_DIR[1:length(indices)]...,)) end return body end @@ -400,12 +441,64 @@ function substitute_indices_inn(body::Expr, padding::Bool) return body end -function substitute_firstlastindex(body::Expr) - padding = true +macro handle_indices_dir(expr::Expr, padding::Bool) expr = macroexpand(__module__, expr); esc(substitute_indices_dir(__module__, expr, padding)) end + +function substitute_indices_dir(caller::Module, expr::Expr, padding::Bool; delay_handling::Bool=false) + ix, iy, iz = INDICES + ixd_f, iyd_f, izd_f = INDICES_DIR_FUNCTIONS_SYMS + if delay_handling + expr = :(ParallelStencil.ParallelKernel.@handle_indices_dir($expr, $padding)) + else + if padding + expr = postwalk(expr) do exp + if @capture(exp, (B_[ixyz_expr__] = rhs_) | (B_[ixyz_expr__] .= rhs_)) && any(map(inexpr_walk, ixyz_expr, INDICES)) + B_parent = promote_to_parent(B) + rhs = postwalk(rhs) do ex + if @capture(ex, A_[indices_expr__]) && any(map(inexpr_walk, indices_expr, INDICES_DIR)) + A_parent = promote_to_parent(A) + ex = substitute(ex, NamedTuple{INDICES_DIR}( + ((A_parent==B_parent) ? ix : :($ix - (size($B_parent, 1) > size($A_parent, 1))), + (A_parent==B_parent) ? iy : :($iy - (size($B_parent, 2) > size($A_parent, 2))), + (A_parent==B_parent) ? iz : :($iz - (size($B_parent, 3) > size($A_parent, 3)))) + ); inQuoteNode=true) + elseif @capture(ex, A_[indices_expr__]) && any(map(inexpr_walk, indices_expr, INDICES_DIR_FUNCTIONS_SYMS)) + A_parent = promote_to_parent(A) + ex = postwalk(ex) do e + if @capture(e, f_(arg_)) && (f in INDICES_DIR_FUNCTIONS_SYMS) + if !isa(arg, Integer) @ModuleInternalError("invalid argument in function $f found (expected: Integer): $arg.") end + offset_base = arg ÷ 2 + if (f == ixd_f) e = :($ix - $offset_base) + elseif (f == iyd_f) e = :($iy - $offset_base) + elseif (f == izd_f) e = :($iz - $offset_base) + end + if (f == ixd_f && (A_parent!=B_parent)) e = :($e - (size($B_parent, 1) > size($A_parent, 1))) + elseif (f == iyd_f && (A_parent!=B_parent)) e = :($e - (size($B_parent, 2) > size($A_parent, 2))) + elseif (f == izd_f && (A_parent!=B_parent)) e = :($e - (size($B_parent, 3) > size($A_parent, 3))) + end + end + return e + end + end + return ex + end + exp = :($B[$(ixyz_expr...)] = $rhs) + end + return exp + end + else + for i=1:length(INDICES_DIR) + expr = substitute(expr, INDICES_DIR[i], INDICES[i], inQuoteNode=true) + end + end + end + return expr +end + +function substitute_firstlastindex(caller::Module, body::Expr, padding::Bool) return postwalk(body) do ex if @capture(ex, f_(args__)) - if (f == :firstindex) return :(ParallelStencil.ParallelKernel.@firstindex($(args...), $padding)) - elseif (f == :lastindex) return :(ParallelStencil.ParallelKernel.@lastindex($(args...), $padding)) + if (f == :firstindex) return _firstindex(caller, args..., padding) + elseif (f == :lastindex) return _lastindex(caller, args..., padding) else return ex end else @@ -414,11 +507,12 @@ function substitute_firstlastindex(body::Expr) end end -function substitute_view_accesses(expr::Expr, indices::NTuple{N,<:Union{Symbol,Expr}} where N) +function substitute_view_accesses(expr::Expr, indices::NTuple{N,<:Union{Symbol,Expr}}, indices_dir::NTuple{N,<:Union{Symbol,Expr}}) where N return postwalk(expr) do ex - if is_access(ex, indices...) + if is_access(ex, indices, indices_dir) @capture(ex, A_[indices_expr__]) || @ModuleInternalError("a stencil access could not be pattern matched.") - return :($A.parent[$(indices_expr...)]) + A_parent = promote_to_parent(A) + return :($A_parent[$(indices_expr...)]) else return ex end @@ -578,6 +672,7 @@ promote_maxsize(maxsize) = @ArgumentError("maxsize must b maxsize(t::T) where T<:Union{Tuple, NamedTuple} = maxsize(t...) maxsize(A::T) where T<:AbstractArray = (size(A,1),size(A,2),size(A,3)) # NOTE: using size(A,dim) three times instead of size(A) ensures to have a tuple of length 3. +maxsize(A::T) where T<:SubArray = (size(A.parent,1),size(A.parent,2),size(A.parent,3)) maxsize(a::T) where T<:Number = (1, 1, 1) maxsize(x) = _maxsize(Val{isbitstype(typeof(x))}) _maxsize(::Type{Val{true}}) = (1, 1, 1) diff --git a/src/ParallelKernel/shared.jl b/src/ParallelKernel/shared.jl index e725a412..4325b372 100644 --- a/src/ParallelKernel/shared.jl +++ b/src/ParallelKernel/shared.jl @@ -9,6 +9,10 @@ gensym_world(tag::String, generator::Module) = gensym(string(tag, GENSYM_SEPARAT gensym_world(tag::Symbol, generator::Module) = gensym(string(tag, GENSYM_SEPARATOR, generator)) gensym_world(tag::Expr, generator::Module) = gensym(string(tag, GENSYM_SEPARATOR, generator)) +ixd(count) = @ModuleInternalError("function ixd had not be evaluated at parse time") +iyd(count) = @ModuleInternalError("function iyd had not be evaluated at parse time") +izd(count) = @ModuleInternalError("function izd had not be evaluated at parse time") + const PKG_CUDA = :CUDA const PKG_AMDGPU = :AMDGPU const PKG_METAL = :Metal @@ -26,6 +30,8 @@ const NTHREADS_X_MAX_AMDGPU = 64 const NTHREADS_MAX = 256 const INDICES = (gensym_world("ix", @__MODULE__), gensym_world("iy", @__MODULE__), gensym_world("iz", @__MODULE__)) const INDICES_INN = (gensym_world("ixi", @__MODULE__), gensym_world("iyi", @__MODULE__), gensym_world("izi", @__MODULE__)) # ( :($(INDICES[1])+1), :($(INDICES[2])+1), :($(INDICES[3])+1) ) +const INDICES_DIR = (gensym_world("ixd", @__MODULE__), gensym_world("iyd", @__MODULE__), gensym_world("izd", @__MODULE__)) +const INDICES_DIR_FUNCTIONS_SYMS = (:(ParallelStencil.ParallelKernel.ixd), :(ParallelStencil.ParallelKernel.iyd), :(ParallelStencil.ParallelKernel.izd)) const RANGES_VARNAME = gensym_world("ranges", @__MODULE__) const RANGELENGTHS_VARNAMES = (gensym_world("rangelength_x", @__MODULE__), gensym_world("rangelength_y", @__MODULE__), gensym_world("rangelength_z", @__MODULE__)) const THREADIDS_VARNAMES = (gensym_world("tx", @__MODULE__), gensym_world("ty", @__MODULE__), gensym_world("tz", @__MODULE__)) @@ -269,6 +275,10 @@ is_access(ex::Expr, ix::Symbol, iy::Symbol) = @capture(ex, A_[x_, y_ is_access(ex::Expr, ix::Symbol) = @capture(ex, A_[x_]) && inexpr_walk(x, ix) is_access(ex, indices...) = false +function is_access(ex::Expr, indices::NTuple{N,<:Union{Symbol,Expr}}, indices_dir::NTuple{N,<:Union{Symbol,Expr}}) where N + return @capture(ex, A_[ind__]) && length(ind) == N && all(inexpr_walk.(ind, indices) .⊻ inexpr_walk.(ind, indices_dir)) +end + ## FUNCTIONS TO DEAL WITH KERNEL/MACRO CALLS: CHECK IF DEFINITION/CALL, EXTRACT, SPLIT AND EVALUATE ARGUMENTS @@ -365,29 +375,37 @@ function eval_arg(caller::Module, arg) end function eval_try(caller::Module, expr) - try - return @eval(caller, $expr) - catch e + if isinteractive() # NOTE: this is required to avoid that this function returns non-constant values in interactive sessions. return nothing + else + try + return @eval(caller, $expr) + catch e + return nothing + end end end ## FUNCTIONS FOR COMMON MANIPULATIONS ON EXPRESSIONS -function substitute(expr::Expr, old, new; inQuoteNode=false) +function substitute(expr::Expr, old, new; inQuoteNode=false, inString=false) + old_str = string(old) + new_str = string(new) return postwalk(expr) do x if x == old return new elseif inQuoteNode && isa(x, QuoteNode) && x.value == old return QuoteNode(new) + elseif inString && isa(x, String) && occursin(old_str, x) + return replace(x, old_str => new_str) else return x; end end end -function substitute(expr::Expr, rules::NamedTuple; inQuoteNode=false) +function substitute(expr::Union{Symbol,Expr}, rules::NamedTuple; inQuoteNode=false) return postwalk(expr) do x if isa(x, Symbol) && haskey(rules, x) return rules[x] @@ -397,9 +415,30 @@ function substitute(expr::Expr, rules::NamedTuple; inQuoteNode=false) return x end end -end +end + +substitute(expr, old, new; inQuoteNode=false, inString=false) = (old == expr) ? new : expr + +function increment_arg(expr::Union{Symbol,Expr}, f::Union{Symbol,Expr}; increment::Integer=1) + return postwalk(expr) do x + if @capture(x, $f(arg_)) && isa(arg, Integer) + return :($f($(arg + increment))) + else + return x + end + # if isa(x, Expr) && (x.head == :call) && length(x.args==2) && (x.args[1] == f) && isa(x.args[2], Integer) + # return :($f($(x.args[2] + increment))) + # else + # return x + # end + end +end -substitute(expr, old, new) = (old == expr) ? new : expr +function promote_to_parent(expr::Union{Symbol,Expr}) + if !@capture(expr, ex_.parent) return :($(expr).parent) + else return expr + end +end function cast(expr::Expr, f::Symbol, type::DataType) return postwalk(expr) do ex @@ -521,7 +560,7 @@ end function interpolate(sym::Symbol, vals::NTuple, block::Expr) return quote - $((substitute(block, :(_$($sym)), val) for val in vals)...) + $((substitute(block, sym, val; inQuoteNode=true, inString=true) for val in vals)...) end end diff --git a/src/init_parallel_stencil.jl b/src/init_parallel_stencil.jl index 9f09cd4e..cfa923fd 100644 --- a/src/init_parallel_stencil.jl +++ b/src/init_parallel_stencil.jl @@ -44,13 +44,14 @@ macro init_parallel_stencil(args...) if (length(posargs) == 3) package, numbertype_val, ndims_val = extract_posargs_init(__module__, posargs...) else package, numbertype_val, ndims_val = extract_kwargs_init(__module__, kwargs) end - inbounds_val, padding_val, memopt_val = extract_kwargs_nopos(__module__, kwargs) + inbounds_val, padding_val, memopt_val, nonconst_metadata_val = extract_kwargs_nopos(__module__, kwargs) if (package == PKG_NONE) @ArgumentError("the package argument cannot be ommited.") end #TODO: this error message will disappear, once the package can be defined at runtime. - check_already_initialized(__module__, package, numbertype_val, ndims_val, inbounds_val, padding_val, memopt_val) - esc(init_parallel_stencil(__module__, package, numbertype_val, ndims_val, inbounds_val, padding_val, memopt_val)) + if (package == PKG_POLYESTER && padding_val) @ArgumentError("padding is not yet supported for Polyester.") end + check_already_initialized(__module__, package, numbertype_val, ndims_val, inbounds_val, padding_val, memopt_val, nonconst_metadata_val) + esc(init_parallel_stencil(__module__, package, numbertype_val, ndims_val, inbounds_val, padding_val, memopt_val, nonconst_metadata_val)) end -function init_parallel_stencil(caller::Module, package::Symbol, numbertype::DataType, ndims::Integer, inbounds::Bool, padding::Bool, memopt::Bool) +function init_parallel_stencil(caller::Module, package::Symbol, numbertype::DataType, ndims::Integer, inbounds::Bool, padding::Bool, memopt::Bool, nonconst_metadata::Bool) if (numbertype == NUMBERTYPE_NONE) datadoc_call = :(@doc replace(ParallelStencil.ParallelKernel.DATA_DOC_NUMBERTYPE_NONE, "ParallelKernel" => "ParallelStencil", "@init_parallel_kernel" => "@init_parallel_stencil") Data) else datadoc_call = :(@doc replace(ParallelStencil.ParallelKernel.DATA_DOC, "ParallelKernel" => "ParallelStencil", "@init_parallel_kernel" => "@init_parallel_stencil") Data) end @@ -61,6 +62,7 @@ function init_parallel_stencil(caller::Module, package::Symbol, numbertype::Data set_inbounds(caller, inbounds) set_padding(caller, padding) set_memopt(caller, memopt) + set_nonconst_metadata(caller, nonconst_metadata) set_initialized(caller, true) return return_expr end @@ -73,34 +75,38 @@ macro get_ndims() get_ndims(__module__) end macro get_inbounds() get_inbounds(__module__) end macro get_padding() get_padding(__module__) end macro get_memopt() get_memopt(__module__) end +macro get_nonconst_metadata() get_nonconst_metadata(__module__) end let - global is_initialized, set_initialized, set_package, get_package, set_numbertype, get_numbertype, set_ndims, get_ndims, set_inbounds, get_inbounds, set_padding, get_padding, set_memopt, get_memopt, check_initialized, check_already_initialized - _is_initialized::Dict{Module, Bool} = Dict{Module, Bool}() - package::Dict{Module, Symbol} = Dict{Module, Symbol}() - numbertype::Dict{Module, DataType} = Dict{Module, DataType}() - ndims::Dict{Module, Integer} = Dict{Module, Integer}() - inbounds::Dict{Module, Bool} = Dict{Module, Bool}() - padding::Dict{Module, Bool} = Dict{Module, Bool}() - memopt::Dict{Module, Bool} = Dict{Module, Bool}() - set_initialized(caller::Module, flag::Bool) = (_is_initialized[caller] = flag) - is_initialized(caller::Module) = haskey(_is_initialized, caller) && _is_initialized[caller] - set_package(caller::Module, pkg::Symbol) = (package[caller] = pkg) - get_package(caller::Module) = package[caller] - set_numbertype(caller::Module, T::DataType) = (numbertype[caller] = T) - get_numbertype(caller::Module) = numbertype[caller] - set_ndims(caller::Module, n::Integer) = (ndims[caller] = n) - get_ndims(caller::Module) = ndims[caller] - set_inbounds(caller::Module, flag::Bool) = (inbounds[caller] = flag) - get_inbounds(caller::Module) = inbounds[caller] - set_padding(caller::Module, flag::Bool) = (padding[caller] = flag) - get_padding(caller::Module) = padding[caller] - set_memopt(caller::Module, flag::Bool) = (memopt[caller] = flag) - get_memopt(caller::Module) = memopt[caller] - check_initialized(caller::Module) = if !is_initialized(caller) @NotInitializedError("no ParallelStencil macro or function can be called before @init_parallel_stencil in each module (missing call in $caller).") end + global is_initialized, set_initialized, set_package, get_package, set_numbertype, get_numbertype, set_ndims, get_ndims, set_inbounds, get_inbounds, set_padding, get_padding, set_memopt, get_memopt, set_nonconst_metadata, get_nonconst_metadata, check_initialized, check_already_initialized + _is_initialized::Dict{Module, Bool} = Dict{Module, Bool}() + package::Dict{Module, Symbol} = Dict{Module, Symbol}() + numbertype::Dict{Module, DataType} = Dict{Module, DataType}() + ndims::Dict{Module, Integer} = Dict{Module, Integer}() + inbounds::Dict{Module, Bool} = Dict{Module, Bool}() + padding::Dict{Module, Bool} = Dict{Module, Bool}() + memopt::Dict{Module, Bool} = Dict{Module, Bool}() + nonconst_metadata::Dict{Module, Bool} = Dict{Module, Bool}() + set_initialized(caller::Module, flag::Bool) = (_is_initialized[caller] = flag) + is_initialized(caller::Module) = haskey(_is_initialized, caller) && _is_initialized[caller] + set_package(caller::Module, pkg::Symbol) = (package[caller] = pkg) + get_package(caller::Module) = package[caller] + set_numbertype(caller::Module, T::DataType) = (numbertype[caller] = T) + get_numbertype(caller::Module) = numbertype[caller] + set_ndims(caller::Module, n::Integer) = (ndims[caller] = n) + get_ndims(caller::Module) = ndims[caller] + set_inbounds(caller::Module, flag::Bool) = (inbounds[caller] = flag) + get_inbounds(caller::Module) = inbounds[caller] + set_padding(caller::Module, flag::Bool) = (padding[caller] = flag) + get_padding(caller::Module) = padding[caller] + set_memopt(caller::Module, flag::Bool) = (memopt[caller] = flag) + get_memopt(caller::Module) = memopt[caller] + set_nonconst_metadata(caller::Module, flag::Bool) = (nonconst_metadata[caller] = flag) + get_nonconst_metadata(caller::Module) = nonconst_metadata[caller] + check_initialized(caller::Module) = if !is_initialized(caller) @NotInitializedError("no ParallelStencil macro or function can be called before @init_parallel_stencil in each module (missing call in $caller).") end - function check_already_initialized(caller::Module, package::Symbol, numbertype::DataType, ndims::Integer, inbounds::Bool, padding::Bool, memopt::Bool) + function check_already_initialized(caller::Module, package::Symbol, numbertype::DataType, ndims::Integer, inbounds::Bool, padding::Bool, memopt::Bool, nonconst_metadata::Bool) if is_initialized(caller) - if package==get_package(caller) && numbertype==get_numbertype(caller) && ndims==get_ndims(caller) && inbounds==get_inbounds(caller) && padding==get_padding(caller) && memopt==get_memopt(caller) + if package==get_package(caller) && numbertype==get_numbertype(caller) && ndims==get_ndims(caller) && inbounds==get_inbounds(caller) && padding==get_padding(caller) && memopt==get_memopt(caller) && nonconst_metadata==get_nonconst_metadata(caller) if !isinteractive() @warn "ParallelStencil has already been initialized for the module $caller, with the same arguments. You are likely using ParallelStencil in an inconsistent way: @init_parallel_stencil should only be called once at the beginning of each module, right after 'using ParallelStencil'. Note: this warning is only shown in non-interactive mode." end else @IncoherentCallError("ParallelStencil has already been initialized for the module $caller, with different arguments. If you are using ParallelStencil interactively in the REPL and want to avoid restarting Julia, then you can call ParallelStencil.@reset_parallel_stencil() and rerun all parts of your code (in module $caller) that use ParallelStencil features (including kernel definitions and array allocations). If you are using ParallelStencil non-interactively, then you are using ParallelStencil in an invalid way: @init_parallel_stencil should only be called once at the beginning of each module, right after 'using ParallelStencil'.") @@ -129,5 +135,8 @@ function extract_kwargs_nopos(caller::Module, kwargs::Dict) if (:memopt in keys(kwargs)) memopt_val = eval_arg(caller, kwargs[:memopt]); check_memopt(memopt_val) else memopt_val = false end - return inbounds_val, padding_val, memopt_val + if (:nonconst_metadata in keys(kwargs)) nonconst_metadata_val = eval_arg(caller, kwargs[:nonconst_metadata]); check_nonconst_metadata(nonconst_metadata_val) + else nonconst_metadata_val = false + end + return inbounds_val, padding_val, memopt_val, nonconst_metadata_val end \ No newline at end of file diff --git a/src/kernel_language.jl b/src/kernel_language.jl index 234c6ac1..de3d2e00 100644 --- a/src/kernel_language.jl +++ b/src/kernel_language.jl @@ -460,7 +460,7 @@ $(( # NOTE: the if statement is not needed here as we only deal with registers else @ArgumentError("memopt: only loopdim=3 is currently supported.") end - store_metadata(metadata_module, is_parallel_kernel, offset_mins, offset_maxs, offsets, optvars, loopdim, loopsize, optranges, use_shmemhalos) + store_metadata(metadata_module, is_parallel_kernel, caller, offset_mins, offset_maxs, offsets, optvars, loopdim, loopsize, optranges, use_shmemhalos) # @show QuoteNode(ParallelKernel.simplify_varnames!(ParallelKernel.remove_linenumbernodes!(deepcopy(body)))) return body end @@ -1009,17 +1009,36 @@ function wrap_loop(index::Symbol, range::UnitRange, block::Expr; unroll=false) end end -function store_metadata(metadata_module::Module, is_parallel_kernel::Bool, offset_mins::Dict{Symbol, <:NTuple{3,Integer}}, offset_maxs::Dict{Symbol, <:NTuple{3,Integer}}, offsets::Dict{Symbol, Dict{Any, Any}}, optvars::NTuple{N,Symbol} where N, loopdim::Integer, loopsize::Integer, optranges::Dict{Any, Any}, use_shmemhalos) - storeexpr = quote - const is_parallel_kernel = $is_parallel_kernel - const memopt = true - const stencilranges = $(NamedTuple(A => (offset_mins[A][1]:offset_maxs[A][1], offset_mins[A][2]:offset_maxs[A][2], offset_mins[A][3]:offset_maxs[A][3]) for A in optvars)) - const offsets = $offsets - const optvars = $optvars - const loopdim = $loopdim - const loopsize = $loopsize - const optranges = $optranges - const use_shmemhalos = $use_shmemhalos +function store_metadata(metadata_module::Module, is_parallel_kernel::Bool, caller::Module, offset_mins::Dict{Symbol, <:NTuple{3,Integer}}, offset_maxs::Dict{Symbol, <:NTuple{3,Integer}}, offsets::Dict{Symbol, Dict{Any, Any}}, optvars::NTuple{N,Symbol} where N, loopdim::Integer, loopsize::Integer, optranges::Dict{Any, Any}, use_shmemhalos) + memopt = true + nonconst_metadata = get_nonconst_metadata(caller) + stencilranges = NamedTuple(A => (offset_mins[A][1]:offset_maxs[A][1], offset_mins[A][2]:offset_maxs[A][2], offset_mins[A][3]:offset_maxs[A][3]) for A in optvars) + if nonconst_metadata + storeexpr = quote + is_parallel_kernel = $is_parallel_kernel + memopt = $memopt + nonconst_metadata = $nonconst_metadata + stencilranges = $stencilranges + offsets = $offsets + optvars = $optvars + loopdim = $loopdim + loopsize = $loopsize + optranges = $optranges + use_shmemhalos = $use_shmemhalos + end + else + storeexpr = quote + const is_parallel_kernel = $is_parallel_kernel + const memopt = $memopt + const nonconst_metadata = $nonconst_metadata + const stencilranges = $stencilranges + const offsets = $offsets + const optvars = $optvars + const loopdim = $loopdim + const loopsize = $loopsize + const optranges = $optranges + const use_shmemhalos = $use_shmemhalos + end end @eval(metadata_module, $storeexpr) end diff --git a/src/parallel.jl b/src/parallel.jl index 85f9fe92..ab45670c 100644 --- a/src/parallel.jl +++ b/src/parallel.jl @@ -132,7 +132,7 @@ parallel_async(source::LineNumberNode, caller::Module, args::Union{Symbol,Expr}. function parallel(source::LineNumberNode, caller::Module, args::Union{Symbol,Expr}...; package::Symbol=get_package(caller), async::Bool=false) if is_kernel(args[end]) posargs, kwargs_expr, kernelarg = split_parallel_args(args, is_call=false) - kwargs = extract_kwargs(caller, kwargs_expr, (:ndims, :N, :inbounds, :memopt, :optvars, :loopdim, :loopsize, :optranges, :useshmemhalos, :optimize_halo_read, :metadata_module, :metadata_function), "@parallel "; eval_args=(:ndims, :inbounds, :memopt, :loopdim, :optranges, :useshmemhalos, :optimize_halo_read, :metadata_module)) + kwargs = extract_kwargs(caller, kwargs_expr, (:ndims, :N, :inbounds, :padding, :memopt, :optvars, :loopdim, :loopsize, :optranges, :useshmemhalos, :optimize_halo_read, :metadata_module, :metadata_function), "@parallel "; eval_args=(:ndims, :inbounds, :padding, :memopt, :loopdim, :optranges, :useshmemhalos, :optimize_halo_read, :metadata_module)) ndims = haskey(kwargs, :ndims) ? kwargs.ndims : get_ndims(caller) is_parallel_kernel = true if typeof(ndims) <: Tuple @@ -175,7 +175,7 @@ function parallel_indices(source::LineNumberNode, caller::Module, args::Union{Sy is_parallel_kernel = false numbertype = get_numbertype(caller) posargs, kwargs_expr, kernelarg = split_parallel_args(args, is_call=false) - kwargs = extract_kwargs(caller, kwargs_expr, (:ndims, :N, :inbounds, :memopt, :optvars, :loopdim, :loopsize, :optranges, :useshmemhalos, :optimize_halo_read, :metadata_module, :metadata_function), "@parallel_indices"; eval_args=(:ndims, :inbounds, :memopt, :loopdim, :optranges, :useshmemhalos, :optimize_halo_read, :metadata_module)) + kwargs = extract_kwargs(caller, kwargs_expr, (:ndims, :N, :inbounds, :padding, :memopt, :optvars, :loopdim, :loopsize, :optranges, :useshmemhalos, :optimize_halo_read, :metadata_module, :metadata_function), "@parallel_indices"; eval_args=(:ndims, :inbounds, :padding, :memopt, :loopdim, :optranges, :useshmemhalos, :optimize_halo_read, :metadata_module)) indices_expr = posargs[1] ndims = haskey(kwargs, :ndims) ? kwargs.ndims : get_ndims(caller) if typeof(ndims) <: Tuple @@ -193,6 +193,7 @@ function parallel_indices(source::LineNumberNode, caller::Module, args::Union{Sy metadata_module, metadata_function = kwargs.metadata_module, kwargs.metadata_function end inbounds = haskey(kwargs, :inbounds) ? kwargs.inbounds : get_inbounds(caller) + padding = haskey(kwargs, :padding) ? kwargs.padding : get_padding(caller) memopt = haskey(kwargs, :memopt) ? kwargs.memopt : get_memopt(caller) if memopt quote @@ -200,8 +201,8 @@ function parallel_indices(source::LineNumberNode, caller::Module, args::Union{Sy $metadata_function end else - kwargs_expr = :(inbounds=$inbounds) - ParallelKernel.parallel_indices(caller, posargs..., kwargs_expr, kernelarg; package=package) + kwargs_expr = (:(inbounds=$inbounds), :(padding=$padding)) + ParallelKernel.parallel_indices(caller, posargs..., kwargs_expr..., kernelarg; package=package) end end end @@ -247,7 +248,7 @@ function parallel_indices_splatarg(caller::Module, package::Symbol, ndims::Integ return :(@parallel_indices $indices_expr $(kwargs_expr...) $kernel) #TODO: the package and numbertype will have to be passed here further once supported as kwargs (currently removed from signature: package::Symbol, numbertype::DataType, ) end -function parallel_indices_memopt(metadata_module::Module, metadata_function::Expr, is_parallel_kernel::Bool, caller::Module, package::Symbol, indices::Union{Symbol,Expr}, kernel::Expr; ndims::Integer=get_ndims(caller), inbounds::Bool=get_inbounds(caller), memopt::Bool=get_memopt(caller), optvars::Union{Expr,Symbol}=Symbol(""), loopdim::Integer=determine_loopdim(indices), loopsize::Integer=compute_loopsize(), optranges::Union{Nothing, NamedTuple{t, <:NTuple{N,NTuple{3,UnitRange}} where N} where t}=nothing, useshmemhalos::Union{Nothing, NamedTuple{t, <:NTuple{N,Bool} where N} where t}=nothing, optimize_halo_read::Bool=true) +function parallel_indices_memopt(metadata_module::Module, metadata_function::Expr, is_parallel_kernel::Bool, caller::Module, package::Symbol, indices::Union{Symbol,Expr}, kernel::Expr; ndims::Integer=get_ndims(caller), inbounds::Bool=get_inbounds(caller), padding::Bool=get_padding(caller), memopt::Bool=get_memopt(caller), optvars::Union{Expr,Symbol}=Symbol(""), loopdim::Integer=determine_loopdim(indices), loopsize::Integer=compute_loopsize(), optranges::Union{Nothing, NamedTuple{t, <:NTuple{N,NTuple{3,UnitRange}} where N} where t}=nothing, useshmemhalos::Union{Nothing, NamedTuple{t, <:NTuple{N,Bool} where N} where t}=nothing, optimize_halo_read::Bool=true) if (!memopt) @ModuleInternalError("parallel_indices_memopt: called with `memopt=false` which should never happen.") end if (!isa(indices,Symbol) && !isa(indices.head,Symbol)) @ArgumentError("@parallel_indices: argument 'indices' must be a tuple of indices, a single index or a variable followed by the splat operator representing a tuple of indices (e.g. (ix, iy, iz) or (ix, iy) or ix or I...).") end if (!isa(optvars,Symbol) && !isa(optvars.head,Symbol)) @KeywordArgumentError("@parallel_indices: keyword argument 'optvars' must be a tuple of optvars or a single optvar (e.g. (A, B, C) or A ).") end @@ -257,34 +258,38 @@ function parallel_indices_memopt(metadata_module::Module, metadata_function::Exp body = add_return(body) set_body!(kernel, body) indices = extract_tuple(indices) - return :(@parallel_indices $(Expr(:tuple, indices[1:end-1]...)) ndims=$ndims inbounds=$inbounds memopt=false metadata_module=$metadata_module metadata_function=$metadata_function $kernel) #TODO: the package and numbertype will have to be passed here further once supported as kwargs (currently removed from signature: package::Symbol, numbertype::DataType, ) + return :(@parallel_indices $(Expr(:tuple, indices[1:end-1]...)) ndims=$ndims inbounds=$inbounds padding=$padding memopt=false metadata_module=$metadata_module metadata_function=$metadata_function $kernel) #TODO: the package and numbertype will have to be passed here further once supported as kwargs (currently removed from signature: package::Symbol, numbertype::DataType, ) end function parallel_kernel(metadata_module::Module, metadata_function::Expr, caller::Module, package::Symbol, ndims::Integer, numbertype::DataType, kernel::Expr; kwargs::NamedTuple) is_parallel_kernel = true if (ndims < 1 || ndims > 3) @KeywordArgumentError("@parallel: keyword argument 'ndims' is invalid or missing (valid values are 1, 2 or 3; 'ndims' an be set globally in @init_parallel_stencil and overwritten per kernel if needed).") end inbounds = haskey(kwargs, :inbounds) ? kwargs.inbounds : get_inbounds(caller) - padding = get_padding(caller) # TODO: padding can later be made configurable per kernel (to enable working with arrays as before). + padding = haskey(kwargs, :padding) ? kwargs.padding : get_padding(caller) memopt = haskey(kwargs, :memopt) ? kwargs.memopt : get_memopt(caller) indices = get_indices_expr(ndims).args + indices_dir = get_indices_dir_expr(ndims).args body = get_body(kernel) body = remove_return(body) validate_body(body) kernelargs = splitarg.(extract_kernel_args(kernel)[1]) argvars = (arg[1] for arg in kernelargs) - onthefly_vars, onthefly_exprs, write_vars, body = extract_onthefly_arrays!(body, argvars) check_mask_macro(caller) + onthefly_vars, onthefly_exprs, write_vars, body = extract_onthefly_arrays!(body, argvars) + has_onthefly = !isempty(onthefly_vars) body = apply_masks(body, indices) body = macroexpand(caller, body) - body = handle_padding(body, padding) - if length(onthefly_vars) > 0 + body = handle_padding(caller, body, padding, indices; handle_view_accesses=false, delay_dir_handling=has_onthefly && padding) # NOTE: delay_dir_handling is mandatory in case of on-the-fly with padding, because the macros (missing dir_handling) created will only be available in the next world age. + if has_onthefly onthefly_syms = gensym_world.(onthefly_vars, (@__MODULE__,)) onthefly_exprs = macroexpand.((caller,), onthefly_exprs) - onthefly_exprs = handle_padding.(onthefly_exprs, (padding,)) - body = insert_onthefly!(body, onthefly_vars, onthefly_syms, indices) - onthefly_exprs = insert_onthefly!.(onthefly_exprs, (onthefly_vars,), (onthefly_syms,), (indices,)) - create_onthefly_macro.((caller,), onthefly_syms, onthefly_exprs, onthefly_vars, (indices,)) + onthefly_exprs = handle_padding.((caller,), onthefly_exprs, (padding,), (indices,); handle_view_accesses=false, dir_handling=!padding) # NOTE: dir_handling is done after macro expansion with the delayed handling. + onthefly_exprs = insert_onthefly!.(onthefly_exprs, (onthefly_vars,), (onthefly_syms,), (indices,), (indices_dir,)) + onthefly_exprs = handle_padding.((caller,), onthefly_exprs, (padding,), (indices,); handle_indexing=false) + body = insert_onthefly!(body, onthefly_vars, onthefly_syms, indices, indices_dir) + create_onthefly_macro.((caller,), onthefly_syms, onthefly_exprs, onthefly_vars, (indices,), (indices_dir,)) end + body = handle_padding(caller, body, padding, indices; handle_indexing=false) if isgpu(package) kernel = insert_device_types(caller, kernel) end if !memopt kernel = adjust_signatures(kernel, package) @@ -440,6 +445,18 @@ function get_indices_expr(ndims::Integer) end end +function get_indices_dir_expr(ndims::Integer) + if ndims == 1 + return :($(INDICES_DIR[1]),) + elseif ndims == 2 + return :($(INDICES_DIR[1]), $(INDICES_DIR[2])) + elseif ndims == 3 + return :($(INDICES_DIR[1]), $(INDICES_DIR[2]), $(INDICES_DIR[3])) + else + @ModuleInternalError("argument 'ndims' must be 1, 2 or 3.") + end +end + ## FUNCTIONS TO CREATE METADATA STORAGE @@ -514,23 +531,40 @@ function extract_onthefly_arrays!(body, argvars) return onthefly_vars, onthefly_exprs, write_vars, body end -function insert_onthefly!(expr, onthefly_vars, onthefly_syms, indices::Array) +function insert_onthefly!(expr, onthefly_vars, onthefly_syms, indices::Array, indices_dir::Array) indices = (indices...,) + indices_dir = (indices_dir...,) for (A, m) in zip(onthefly_vars, onthefly_syms) - expr = substitute(expr, A, m, indices) + expr = substitute(expr, A, m, indices, indices_dir) end return expr end -function create_onthefly_macro(caller, m, expr, var, indices) - ndims = length(indices) - ix, iy, iz = gensym_world.(("ix","iy","iz"), (@__MODULE__,)) - local_indices = (ndims==3) ? (ix, iy, iz) : (ndims==2) ? (ix, iy) : (ix,) +function determine_local_index_dir(local_index, dim) + id_l = local_index + id_l = increment_arg(id_l, INDICES_DIR_FUNCTIONS_SYMS[dim]) + id_l = substitute(id_l, INDICES_DIR[dim], :($(INDICES_DIR_FUNCTIONS_SYMS[dim])(2))) + id_l = substitute(id_l, INDICES[dim], INDICES_DIR[dim]) + return id_l +end + +function create_onthefly_macro(caller, m, expr, var, indices, indices_dir) + ndims = length(indices) + ix, iy, iz = gensym_world.(("ix","iy","iz"), (@__MODULE__,)) + ixd, iyd, izd = gensym_world.(("ixd","iyd","izd"), (@__MODULE__,)) + local_indices = (ndims==3) ? (ix, iy, iz) : (ndims==2) ? (ix, iy) : (ix,) + local_indices_dir = (ndims==3) ? (ixd, iyd, izd) : (ndims==2) ? (ixd, iyd) : (ixd,) for (index, local_index) in zip(indices, local_indices) expr = substitute(expr, index, Expr(:$, local_index)) end - quote_expr = :($(Expr(:quote, expr))) - m_function = :($m($(local_indices...)) = $quote_expr) + for (index, local_index) in zip(indices_dir, local_indices_dir) + expr = substitute(expr, index, Expr(:$, local_index)) + end + local_assign = quote + $((:($(local_indices_dir[i]) = ParallelStencil.determine_local_index_dir($(local_indices[i]), $i)) for i=1:ndims)...) + end + expr_quoted = :($(Expr(:quote, expr))) + m_function = :($m($(local_indices...)) = ($local_assign; $expr_quoted)) m_macro = :(macro $m(args...) if (length(args)!=$ndims) ParallelStencil.@ArgumentError("unsupported kernel statements in @parallel kernel definition: wrong number of indices in $var (expected $ndims indices).") end; esc($m(args...)) end) @eval(caller, $m_function) @eval(caller, $m_macro) diff --git a/src/shared.jl b/src/shared.jl index 0b7d7ca8..1617c310 100644 --- a/src/shared.jl +++ b/src/shared.jl @@ -1,6 +1,6 @@ import MacroTools: @capture, postwalk, splitdef, splitarg # NOTE: inexpr_walk used instead of MacroTools.inexpr -import .ParallelKernel: eval_arg, split_args, split_kwargs, extract_posargs_init, extract_kernel_args, insert_device_types, is_kernel, is_call, gensym_world, isgpu, iscpu, @isgpu, @iscpu, substitute, substitute_in_kernel, in_signature, inexpr_walk, adjust_signatures, handle_indices_and_literals, add_inbounds, cast, @ranges, @rangelengths, @return_value, @return_nothing, @firstindex, @lastindex, is_access, find_vars, handle_padding, handle_inverses -import .ParallelKernel: PKG_CUDA, PKG_AMDGPU, PKG_METAL, PKG_THREADS, PKG_POLYESTER, PKG_NONE, NUMBERTYPE_NONE, SUPPORTED_NUMBERTYPES, SUPPORTED_PACKAGES, ERRMSG_UNSUPPORTED_PACKAGE, INT_CUDA, INT_AMDGPU, INT_METAL, INT_POLYESTER, INT_THREADS, INDICES, INDICES_INN, PKNumber, RANGES_VARNAME, RANGES_TYPE, RANGELENGTH_XYZ_TYPE, RANGELENGTHS_VARNAMES, THREADIDS_VARNAMES, GENSYM_SEPARATOR, AD_SUPPORTED_ANNOTATIONS +import .ParallelKernel: eval_arg, split_args, split_kwargs, extract_posargs_init, extract_kernel_args, insert_device_types, is_kernel, is_call, gensym_world, isgpu, iscpu, @isgpu, @iscpu, substitute, substitute_in_kernel, in_signature, inexpr_walk, adjust_signatures, handle_indices_and_literals, add_inbounds, cast, @ranges, @rangelengths, @return_value, @return_nothing, @firstindex, @lastindex, is_access, find_vars, handle_padding, handle_inverses, increment_arg +import .ParallelKernel: PKG_CUDA, PKG_AMDGPU, PKG_METAL, PKG_THREADS, PKG_POLYESTER, PKG_NONE, NUMBERTYPE_NONE, SUPPORTED_NUMBERTYPES, SUPPORTED_PACKAGES, ERRMSG_UNSUPPORTED_PACKAGE, INT_CUDA, INT_AMDGPU, INT_METAL, INT_POLYESTER, INT_THREADS, INDICES, INDICES_INN, INDICES_DIR, INDICES_DIR_FUNCTIONS_SYMS, PKNumber, RANGES_VARNAME, RANGES_TYPE, RANGELENGTH_XYZ_TYPE, RANGELENGTHS_VARNAMES, THREADIDS_VARNAMES, GENSYM_SEPARATOR, AD_SUPPORTED_ANNOTATIONS, ARRAYTYPES, FIELDTYPES, SCALARTYPES import .ParallelKernel: @require, @symbols, symbols, longnameof, @prettyexpand, @prettystring, prettystring, @gorgeousexpand, @gorgeousstring, gorgeousstring, interpolate @@ -15,24 +15,25 @@ Return an expression that evaluates to `true` if the indices generated by @paral This macro is not intended for explicit manual usage. Calls to it are automatically added by @parallel where required. """ -const SUPPORTED_NDIMS = [1, 2, 3] -const NDIMS_NONE = 0 -const ERRMSG_KERNEL_UNSUPPORTED = "unsupported kernel statements in @parallel kernel definition: @parallel is only applicable to kernels that contain exclusively array assignments using macros from FiniteDifferences{1|2|3}D or from another compatible computation submodule. @parallel_indices supports any kind of statements in the kernels." -const ERRMSG_CHECK_NDIMS = "ndims must be evaluatable at parse time (e.g. literal or constant) and has to be one of the following Integers: $(join(SUPPORTED_NDIMS,", "))" -const ERRMSG_CHECK_MEMOPT = "memopt must be evaluatable at parse time (e.g. literal or constant) and has to be of type Bool." -const PSNumber = PKNumber -const LOOPSIZE = 16 -const LOOPDIM_NONE = 0 -const NTHREADS_MAX_MEMOPT_CUDA = 128 -const NTHREADS_MAX_MEMOPT_AMDGPU = 256 -const NTHREADS_MAX_MEMOPT_METAL = 256 -const USE_SHMEMHALO_DEFAULT = true -const USE_SHMEMHALO_1D_DEFAULT = true -const USE_FULLRANGE_DEFAULT = (false, false, true) -const FULLRANGE_THRESHOLD = 1 -const NOEXPR = :(begin end) -const MOD_METADATA = :__metadata__ # gensym_world("__metadata__", @__MODULE__) # # TODO: name mangling should be used here later, or if there is any sense to leave it like that then at check whether it's available must be done before creating it -const META_FUNCTION_PREFIX = string(gensym_world("META", @__MODULE__)) +const SUPPORTED_NDIMS = [1, 2, 3] +const NDIMS_NONE = 0 +const ERRMSG_KERNEL_UNSUPPORTED = "unsupported kernel statements in @parallel kernel definition: @parallel is only applicable to kernels that contain exclusively array assignments using macros from FiniteDifferences{1|2|3}D or from another compatible computation submodule. @parallel_indices supports any kind of statements in the kernels." +const ERRMSG_CHECK_NDIMS = "ndims must be evaluatable at parse time (e.g. literal or constant) and has to be one of the following Integers: $(join(SUPPORTED_NDIMS,", "))" +const ERRMSG_CHECK_MEMOPT = "memopt must be evaluatable at parse time (e.g. literal or constant) and has to be of type Bool." +const ERRMSG_CHECK_NONCONST_METADATA = "nonconst_metadata must be evaluatable at parse time (e.g. literal or constant) and has to be of type Bool." +const PSNumber = PKNumber +const LOOPSIZE = 16 +const LOOPDIM_NONE = 0 +const NTHREADS_MAX_MEMOPT_CUDA = 128 +const NTHREADS_MAX_MEMOPT_AMDGPU = 256 +const NTHREADS_MAX_MEMOPT_METAL = 256 +const USE_SHMEMHALO_DEFAULT = true +const USE_SHMEMHALO_1D_DEFAULT = true +const USE_FULLRANGE_DEFAULT = (false, false, true) +const FULLRANGE_THRESHOLD = 1 +const NOEXPR = :(begin end) +const MOD_METADATA = :__metadata__ # gensym_world("__metadata__", @__MODULE__) # # TODO: name mangling should be used here later, or if there is any sense to leave it like that then at check whether it's available must be done before creating it +const META_FUNCTION_PREFIX = string(gensym_world("META", @__MODULE__)) ## FUNCTIONS TO DEAL WITH KERNEL DEFINITIONS @@ -41,6 +42,10 @@ get_statements(body::Expr) = (body.head == :block) ? body.args : [body] is_array_assignment(statement) = isa(statement, Expr) && (statement.head == :(=)) && isa(statement.args[1], Expr) && (statement.args[1].head == :macrocall) is_stencil_access(ex, indices...) = is_access(ex, indices...) +function is_stencil_access(ex::Expr, indices::NTuple{N,<:Union{Symbol,Expr}}, indices_dir::NTuple{N,<:Union{Symbol,Expr}}) where N + is_access(ex, indices, indices_dir) +end + function validate_body(body::Expr) statements = get_statements(body) for statement in statements @@ -49,9 +54,9 @@ function validate_body(body::Expr) end end -function substitute(expr::Expr, A, m, indices::NTuple{N,<:Union{Symbol,Expr}} where N) +function substitute(expr::Union{Symbol,Expr}, A, m, indices::NTuple{N,<:Union{Symbol,Expr}} where N, indices_dir::NTuple{N,<:Union{Symbol,Expr}} where N) return postwalk(expr) do ex - if is_stencil_access(ex, indices...) + if is_stencil_access(ex, indices, indices_dir) @capture(ex, B_[indices_expr__]) || @ModuleInternalError("a stencil access could not be pattern matched.") if B == A m_call = :(@f($(indices_expr...))) # NOTE: interpolating the macro symbol m directly does not work @@ -82,5 +87,6 @@ end ## FUNCTIONS FOR ERROR HANDLING -check_ndims(ndims) = ( if !isa(ndims, Integer) || !(ndims in SUPPORTED_NDIMS) @ArgumentError("$ERRMSG_CHECK_NDIMS (obtained: $ndims)." ) end ) -check_memopt(memopt) = ( if !isa(memopt, Bool) @ArgumentError("$ERRMSG_CHECK_MEMOPT (obtained: $memopt)." ) end ) \ No newline at end of file +check_ndims(ndims) = ( if !isa(ndims, Integer) || !(ndims in SUPPORTED_NDIMS) @ArgumentError("$ERRMSG_CHECK_NDIMS (obtained: $ndims)." ) end ) +check_memopt(memopt) = ( if !isa(memopt, Bool) @ArgumentError("$ERRMSG_CHECK_MEMOPT (obtained: $memopt)." ) end ) +check_nonconst_metadata(nonconst_metadata) = ( if !isa(nonconst_metadata, Bool) @ArgumentError("$ERRMSG_CHECK_NONCONST_METADATA (obtained: $nonconst_metadata)." ) end ) \ No newline at end of file diff --git a/test/ParallelKernel/test_allocators.jl b/test/ParallelKernel/test_allocators.jl index 6f4f1247..50face09 100644 --- a/test/ParallelKernel/test_allocators.jl +++ b/test/ParallelKernel/test_allocators.jl @@ -566,10 +566,10 @@ eval(:( end @reset_parallel_kernel() end; - $(interpolate(:padding, (false, true), :( - @testset "6. Fields (padding=$(_$padding))" begin + $(interpolate(:__padding__, (false, true), :( + @testset "6. Fields (padding=$__padding__)" begin @require !@is_initialized() - @init_parallel_kernel($package, Float16, padding=_$padding) + @init_parallel_kernel($package, Float16, padding=__padding__) @require @is_initialized() (nx, ny, nz) = (3, 4, 5) @testset "mapping to array allocators" begin @@ -654,7 +654,7 @@ eval(:( @test size.(Tuple(@BVectorField((nx,)))) == (size(@BXField((nx,))),) @test size.(Tuple( @TensorField((nx,)))) == (size(@XXField((nx,))),) end; - @static if _$padding + @static if __padding__ @testset "array size (3D)" begin @test size( @Field((nx, ny, nz)).parent) == (nx, ny, nz ) @test size( @XField((nx, ny, nz)).parent) == (nx+1, ny, nz ) diff --git a/test/ParallelKernel/test_kernel_language.jl b/test/ParallelKernel/test_kernel_language.jl index 761620b4..87bc6473 100644 --- a/test/ParallelKernel/test_kernel_language.jl +++ b/test/ParallelKernel/test_kernel_language.jl @@ -15,7 +15,7 @@ end if !AMDGPU.functional() TEST_PACKAGES = filter!(x->x≠PKG_AMDGPU, TEST_PACKAGES) end end @static if PKG_METAL in TEST_PACKAGES - import Metal + import Metal # Import also on non-Apple systems to test macro expansions if !Metal.functional() TEST_PACKAGES = filter!(x->x≠PKG_METAL, TEST_PACKAGES) end end @static if PKG_POLYESTER in TEST_PACKAGES diff --git a/test/ParallelKernel/test_parallel.jl b/test/ParallelKernel/test_parallel.jl index 7a4dbdde..878cbf8b 100644 --- a/test/ParallelKernel/test_parallel.jl +++ b/test/ParallelKernel/test_parallel.jl @@ -3,8 +3,8 @@ import ParallelStencil using Enzyme using ParallelStencil.ParallelKernel import ParallelStencil.ParallelKernel.AD -import ParallelStencil.ParallelKernel: @reset_parallel_kernel, @is_initialized, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, PKG_METAL, PKG_THREADS, PKG_POLYESTER, INDICES, ARRAYTYPES, FIELDTYPES -import ParallelStencil.ParallelKernel: @require, @prettystring, @gorgeousstring, @isgpu, @iscpu +import ParallelStencil.ParallelKernel: @reset_parallel_kernel, @is_initialized, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, PKG_METAL, PKG_THREADS, PKG_POLYESTER, INDICES, ARRAYTYPES, FIELDTYPES, SCALARTYPES +import ParallelStencil.ParallelKernel: @require, @prettystring, @gorgeousstring, @isgpu, @iscpu, interpolate import ParallelStencil.ParallelKernel: checkargs_parallel, checkargs_parallel_indices, parallel_indices, maxsize using ParallelStencil.ParallelKernel.Exceptions TEST_PACKAGES = SUPPORTED_PACKAGES @@ -27,7 +27,6 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t macro compute(A) esc(:($(INDICES[1]) + ($(INDICES[2])-1)*size($A,1))) end macro compute_with_aliases(A) esc(:(ix + (iz -1)*size($A,1))) end -import Enzyme @static for package in TEST_PACKAGES @@ -152,85 +151,23 @@ eval(:( @testset "addition of range arguments" begin expansion = @gorgeousstring(1, @parallel_indices (ix,iy) f(a::T, b::T) where T <: Union{Array{Float32}, Array{Float64}} = (println("a=$a, b=$b)"); return)) @test occursin("f(a::T, b::T, ranges::Tuple{UnitRange, UnitRange, UnitRange}, rangelength_x::Int64, rangelength_y::Int64, rangelength_z::Int64", expansion) - end - @testset "Data.Array to Data.Device.Array" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Array, B::Data.Array, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.Device.Array, B::Data.Device.Array,", expansion) - end - end - @testset "Data.Cell to Data.Device.Cell" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Cell, B::Data.Cell, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.Device.Cell, B::Data.Device.Cell,", expansion) - end - end - @testset "Data.CellArray to Data.Device.CellArray" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.CellArray, B::Data.CellArray, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.Device.CellArray, B::Data.Device.CellArray,", expansion) - end - end - @testset "Data.ArrayTuple to Data.Device.ArrayTuple" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.ArrayTuple, B::Data.ArrayTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.Device.ArrayTuple, B::Data.Device.ArrayTuple,", expansion) - end - end - @testset "Data.CellTuple to Data.Device.CellTuple" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.CellTuple, B::Data.CellTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.Device.CellTuple, B::Data.Device.CellTuple,", expansion) - end - end - @testset "Data.CellArrayTuple to Data.Device.CellArrayTuple" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.CellArrayTuple, B::Data.CellArrayTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.Device.CellArrayTuple, B::Data.Device.CellArrayTuple,", expansion) - end - end - @testset "Data.NamedArrayTuple to Data.Device.NamedArrayTuple" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.NamedArrayTuple, B::Data.NamedArrayTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.Device.NamedArrayTuple, B::Data.Device.NamedArrayTuple,", expansion) - end - end - @testset "Data.NamedCellTuple to Data.Device.NamedCellTuple" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.NamedCellTuple, B::Data.NamedCellTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.Device.NamedCellTuple, B::Data.Device.NamedCellTuple,", expansion) - end - end - @testset "Data.NamedCellArrayTuple to Data.Device.NamedCellArrayTuple" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.NamedCellArrayTuple, B::Data.NamedCellArrayTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.Device.NamedCellArrayTuple, B::Data.Device.NamedCellArrayTuple,", expansion) - end - end - @testset "Data.ArrayCollection to Data.Device.ArrayCollection" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f!(A::Data.ArrayCollection, B::Data.ArrayCollection, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f!(A::Data.Device.ArrayCollection, B::Data.Device.ArrayCollection,", expansion) - end - end - @testset "Data.CellCollection to Data.Device.CellCollection" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f!(A::Data.CellCollection, B::Data.CellCollection, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f!(A::Data.Device.CellCollection, B::Data.Device.CellCollection,", expansion) - end end - @testset "Data.CellArrayCollection to Data.Device.CellArrayCollection" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f!(A::Data.CellArrayCollection, B::Data.CellArrayCollection, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f!(A::Data.Device.CellArrayCollection, B::Data.Device.CellArrayCollection,", expansion) + $(interpolate(:__T__, ARRAYTYPES, :( + @testset "Data.__T__ to Data.Device.__T__" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.__T__, B::Data.__T__, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::Data.Device.__T__, B::Data.Device.__T__,", expansion) + end end - end - @testset "Data.Fields.Field to Data.Fields.Device.Field" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.Field, B::Data.Fields.Field, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.Fields.Device.Field, B::Data.Fields.Device.Field,", expansion) + ))) + $(interpolate(:__T__, FIELDTYPES, :( + @testset "Data.Fields.__T__ to Data.Fields.Device.__T__" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.__T__, B::Data.Fields.__T__, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::Data.Fields.Device.__T__, B::Data.Fields.Device.__T__,", expansion) + end end - end + ))) # NOTE: the following GPU tests fail, because the Fields module cannot be imported. # @testset "Fields.Field to Data.Fields.Device.Field" begin # @static if @isgpu($package) @@ -246,102 +183,22 @@ eval(:( # @test occursin("f(A::Data.Fields.Device.Field, B::Data.Fields.Device.Field,", expansion) # end # end - @testset "Data.Fields.VectorField to Data.Fields.Device.VectorField" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.VectorField, B::Data.Fields.VectorField, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.Fields.Device.VectorField, B::Data.Fields.Device.VectorField,", expansion) - end - end - @testset "Data.Fields.BVectorField to Data.Fields.Device.BVectorField" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.BVectorField, B::Data.Fields.BVectorField, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.Fields.Device.BVectorField, B::Data.Fields.Device.BVectorField,", expansion) - end - end - @testset "Data.Fields.TensorField to Data.Fields.Device.TensorField" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.TensorField, B::Data.Fields.TensorField, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.Fields.Device.TensorField, B::Data.Fields.Device.TensorField,", expansion) - end - end - @testset "TData.Array to TData.Device.Array" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.Array, B::TData.Array, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::TData.Device.Array, B::TData.Device.Array,", expansion) - end - end - @testset "TData.Cell to TData.Device.Cell" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.Cell, B::TData.Cell, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::TData.Device.Cell, B::TData.Device.Cell,", expansion) - end - end - @testset "TData.CellArray to TData.Device.CellArray" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.CellArray, B::TData.CellArray, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::TData.Device.CellArray, B::TData.Device.CellArray,", expansion) - end - end - @testset "TData.ArrayTuple to TData.Device.ArrayTuple" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.ArrayTuple, B::TData.ArrayTuple, c::T) where T <: Integer = return) - @test occursin("f(A::TData.Device.ArrayTuple, B::TData.Device.ArrayTuple,", expansion) - end - end - @testset "TData.CellTuple to TData.Device.CellTuple" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.CellTuple, B::TData.CellTuple, c::T) where T <: Integer = return) - @test occursin("f(A::TData.Device.CellTuple, B::TData.Device.CellTuple,", expansion) - end - end - @testset "TData.CellArrayTuple to TData.Device.CellArrayTuple" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.CellArrayTuple, B::TData.CellArrayTuple, c::T) where T <: Integer = return) - @test occursin("f(A::TData.Device.CellArrayTuple, B::TData.Device.CellArrayTuple,", expansion) - end - end - @testset "TData.NamedArrayTuple to TData.Device.NamedArrayTuple" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.NamedArrayTuple, B::TData.NamedArrayTuple, c::T) where T <: Integer = return) - @test occursin("f(A::TData.Device.NamedArrayTuple, B::TData.Device.NamedArrayTuple,", expansion) - end - end - @testset "TData.NamedCellTuple to TData.Device.NamedCellTuple" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.NamedCellTuple, B::TData.NamedCellTuple, c::T) where T <: Integer = return) - @test occursin("f(A::TData.Device.NamedCellTuple, B::TData.Device.NamedCellTuple,", expansion) - end - end - @testset "TData.NamedCellArrayTuple to TData.Device.NamedCellArrayTuple" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.NamedCellArrayTuple, B::TData.NamedCellArrayTuple, c::T) where T <: Integer = return) - @test occursin("f(A::TData.Device.NamedCellArrayTuple, B::TData.Device.NamedCellArrayTuple,", expansion) - end - end - @testset "TData.ArrayCollection to TData.Device.ArrayCollection" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f!(A::TData.ArrayCollection, B::TData.ArrayCollection, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f!(A::TData.Device.ArrayCollection, B::TData.Device.ArrayCollection,", expansion) - end - end - @testset "TData.CellCollection to TData.Device.CellCollection" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f!(A::TData.CellCollection, B::TData.CellCollection, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f!(A::TData.Device.CellCollection, B::TData.Device.CellCollection,", expansion) - end - end - @testset "TData.CellArrayCollection to TData.Device.CellArrayCollection" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f!(A::TData.CellArrayCollection, B::TData.CellArrayCollection, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f!(A::TData.Device.CellArrayCollection, B::TData.Device.CellArrayCollection,", expansion) + $(interpolate(:__T__, ARRAYTYPES, :( + @testset "TData.__T__ to TData.Device.__T__" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.__T__, B::TData.__T__, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::TData.Device.__T__, B::TData.Device.__T__,", expansion) + end end - end - @testset "TData.Fields.Field to TData.Fields.Device.Field" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.Fields.Field, B::TData.Fields.Field, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::TData.Fields.Device.Field, B::TData.Fields.Device.Field,", expansion) + ))) + $(interpolate(:__T__, FIELDTYPES, :( + @testset "TData.Fields.__T__ to TData.Fields.Device.__T__" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.Fields.__T__, B::TData.Fields.__T__, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::TData.Fields.Device.__T__, B::TData.Fields.Device.__T__,", expansion) + end end - end + ))) # NOTE: the following GPU tests fail, because the Fields module cannot be imported. # @testset "Fields.Field to TData.Fields.Device.Field" begin # @static if @isgpu($package) @@ -352,35 +209,19 @@ eval(:( # end # @testset "Field to TData.Fields.Device.Field" begin # @static if @isgpu($package) - # using .TData.Fields + # using .TData.Fields # expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Field, B::Field, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) # @test occursin("f(A::TData.Fields.Device.Field, B::TData.Fields.Device.Field,", expansion) # end # end - @testset "TData.Fields.VectorField to TData.Fields.Device.VectorField" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.Fields.VectorField, B::TData.Fields.VectorField, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::TData.Fields.Device.VectorField, B::TData.Fields.Device.VectorField,", expansion) - end - end - @testset "TData.Fields.BVectorField to TData.Fields.Device.BVectorField" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.Fields.BVectorField, B::TData.Fields.BVectorField, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::TData.Fields.Device.BVectorField, B::TData.Fields.Device.BVectorField,", expansion) - end - end - @testset "TData.Fields.TensorField to TData.Fields.Device.TensorField" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.Fields.TensorField, B::TData.Fields.TensorField, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::TData.Fields.Device.TensorField, B::TData.Fields.Device.TensorField,", expansion) - end - end - @testset "Nested Data.Array to Data.Device.Array" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::NamedTuple{T1, NTuple{T2,T3}} where {T1,T2} where T3 <: Data.Array, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::((NamedTuple{T1, NTuple{T2, T3}} where {T1, T2}) where T3 <: Data.Device.Array),", expansion) + $(interpolate(:__T__, ARRAYTYPES, :( + @testset "Nested Data.__T__ to Data.Device.__T__" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::NamedTuple{T1, NTuple{T2,T3}} where {T1,T2} where T3 <: Data.__T__, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::((NamedTuple{T1, NTuple{T2, T3}} where {T1, T2}) where T3 <: Data.Device.__T__),", expansion) + end end - end + ))) @testset "@parallel_indices (1D)" begin A = @zeros(4) @parallel_indices (ix) function write_indices!(A) @@ -581,48 +422,22 @@ eval(:( @require !@is_initialized() @init_parallel_kernel(package = $package) @require @is_initialized - @testset "Data.Array{T} to Data.Device.Array{T}" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Array{T}, B::Data.Array{T}, c<:Integer) where T <: Union{Float32, Float64} = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.Device.Array{T}, B::Data.Device.Array{T},", expansion) - end - end; - @testset "Data.Cell{T} to Data.Device.Cell{T}" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Cell{T}, B::Data.Cell{T}, c<:Integer) where T <: Union{Float32, Float64} = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.Device.Cell{T}, B::Data.Device.Cell{T},", expansion) - end - end; - @testset "Data.CellArray{T} to Data.Device.CellArray{T}" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.CellArray{T}, B::Data.CellArray{T}, c<:Integer) where T <: Union{Float32, Float64} = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.Device.CellArray{T}, B::Data.Device.CellArray{T},", expansion) - end - end; - @testset "Data.Fields.Field{T} to Data.Fields.Device.Field{T}" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.Field{T}, B::Data.Fields.Field{T}, c<:Integer) where T <: Union{Float32, Float64} = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.Fields.Device.Field{T}, B::Data.Fields.Device.Field{T},", expansion) - end - end; - @testset "Data.Fields.VectorField{T} to Data.Fields.Device.VectorField{T}" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.VectorField{T}, B::Data.Fields.VectorField{T}, c<:Integer) where T <: Union{Float32, Float64} = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.Fields.Device.VectorField{T}, B::Data.Fields.Device.VectorField{T},", expansion) - end - end; - @testset "Data.Fields.BVectorField{T} to Data.Fields.Device.BVectorField{T}" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.BVectorField{T}, B::Data.Fields.BVectorField{T}, c<:Integer) where T <: Union{Float32, Float64} = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.Fields.Device.BVectorField{T}, B::Data.Fields.Device.BVectorField{T},", expansion) - end - end; - @testset "Data.Fields.TensorField{T} to Data.Fields.Device.TensorField{T}" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.TensorField{T}, B::Data.Fields.TensorField{T}, c<:Integer) where T <: Union{Float32, Float64} = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.Fields.Device.TensorField{T}, B::Data.Fields.Device.TensorField{T},", expansion) - end - end; + $(interpolate(:__T__, ARRAYTYPES, :( + @testset "Data.__T__{T2} to Data.Device.__T__{T2}" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.__T__{T2}, B::Data.__T__{T2}, c<:Integer) where T2 <: Union{Float32, Float64} = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::Data.Device.__T__{T2}, B::Data.Device.__T__{T2},", expansion) + end + end; + ))) + $(interpolate(:__T__, FIELDTYPES, :( + @testset "Data.Fields.__T__{T2} to Data.Fields.Device.__T__{T2}" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.__T__{T2}, B::Data.Fields.__T__{T2}, c<:Integer) where T2 <: Union{Float32, Float64} = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::Data.Fields.Device.__T__{T2}, B::Data.Fields.Device.__T__{T2},", expansion) + end + end; + ))) @reset_parallel_kernel() end; @testset "5. Exceptions" begin diff --git a/test/test_FiniteDifferences1D.jl b/test/test_FiniteDifferences1D.jl index cb3e0065..d9179ce7 100644 --- a/test/test_FiniteDifferences1D.jl +++ b/test/test_FiniteDifferences1D.jl @@ -1,8 +1,10 @@ using Test using ParallelStencil import ParallelStencil: @reset_parallel_stencil, @is_initialized, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, PKG_METAL, PKG_POLYESTER -import ParallelStencil: @require +import ParallelStencil: @require, interpolate using ParallelStencil.FiniteDifferences1D +using ParallelStencil.FieldAllocators +import ParallelStencil.FieldAllocators: @IField TEST_PACKAGES = SUPPORTED_PACKAGES @static if PKG_CUDA in TEST_PACKAGES import CUDA @@ -31,60 +33,64 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t eval(:( @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin - @require !@is_initialized() - @init_parallel_stencil($package, $FloatDefault, 1) - @require @is_initialized() - nx = 7 - A = @rand(nx ); - Ax = @rand(nx+1); - Axx = @rand(nx+2); - R = @zeros(nx ); - Rxx = @zeros(nx+2); - @testset "1. compute macros" begin - @testset "differences" begin - @parallel d!(R, Ax) = (@all(R) = @d(Ax); return) - @parallel d2!(R, Axx) = (@all(R) = @d2(Axx); return) - R.=0; @parallel d!(R, Ax); @test all(Array(R .== Ax[2:end].-Ax[1:end-1])) # INFO: AMDGPU arrays need to be compared on CPU - R.=0; @parallel d2!(R, Axx); @test all(Array(R .== (Axx[3:end].-Axx[2:end-1]).-(Axx[2:end-1].-Axx[1:end-2]))) + $(interpolate(:__padding__, (false, package!=PKG_POLYESTER), :( #TODO: this needs to be restored to (false, true) when Polyester supports padding. + @testset "(padding=$__padding__)" begin + @require !@is_initialized() + @init_parallel_stencil($package, $FloatDefault, 1, padding=__padding__) + @require @is_initialized() + nx = (9,) + A = @IField(nx, @rand); + Ax = @XField(nx, @rand); + Axx = @Field(nx, @rand); + R = @IField(nx, @zeros); + Rxx = @Field(nx, @zeros); + @testset "1. compute macros" begin + @testset "differences" begin + @parallel d!(R, Ax) = (@all(R) = @d(Ax); return) + @parallel d2!(R, Axx) = (@all(R) = @d2(Axx); return) + R.=0; @parallel d!(R, Ax); @test all(Array(R .== Ax[2:end].-Ax[1:end-1])) # INFO: AMDGPU arrays need to be compared on CPU + R.=0; @parallel d2!(R, Axx); @test all(Array(R .== (Axx[3:end].-Axx[2:end-1]).-(Axx[2:end-1].-Axx[1:end-2]))) + end; + @testset "selection" begin + @parallel all!(R, A) = (@all(R) = @all(A); return) + @parallel inn!(R, Axx) = (@all(R) = @inn(Axx); return) + R.=0; @parallel all!(R, A); @test all(Array(R .== A)) + R.=0; @parallel inn!(R, Axx); @test all(Array(R .== Axx[2:end-1])) + end; + @testset "averages" begin + @parallel av!(R, Ax) = (@all(R) = @av(Ax); return) + R.=0; @parallel av!(R, Ax); @test all(Array(R .== (Ax[1:end-1].+Ax[2:end]).*$FloatDefault(0.5))) + end; + @testset "harmonic averages" begin + @parallel harm!(R, Ax) = (@all(R) = @harm(Ax); return) + R.=0; @parallel harm!(R, Ax); @test all(Array(R .== 2 ./(1 ./Ax[1:end-1].+1 ./Ax[2:end]))) + end; + @testset "others" begin + @parallel maxloc!(R, Axx) = (@all(R) = @maxloc(Axx); return) + R.=0; @parallel maxloc!(R, Axx); @test all(Array(R .== max.(max.(Axx[3:end],Axx[2:end-1]),Axx[1:end-2]))) + end; + end; + @testset "2. apply masks" begin + @testset "selection" begin + @parallel inn_all!(Rxx, A) = (@inn(Rxx) = @all(A); return) + @parallel inn_inn!(Rxx, Axx) = (@inn(Rxx) = @inn(Axx); return) + Rxx.=0; @parallel inn_all!(Rxx, A); @test all(Array(Rxx[2:end-1] .== A)) + Rxx[2:end-1].=0; @test all(Array(Rxx .== 0)) # Test that boundary values remained zero. + Rxx.=0; @parallel inn_inn!(Rxx, Axx); @test all(Array(Rxx[2:end-1] .== Axx[2:end-1])) + Rxx[2:end-1].=0; @test all(Array(Rxx .== 0)) # Test that boundary values remained zero. + end; + @testset "differences" begin + @parallel inn_d!(Rxx, Ax) = (@inn(Rxx) = @d(Ax); return) + @parallel inn_d2!(Rxx, Axx) = (@inn(Rxx) = @d2(Axx); return) + Rxx.=0; @parallel inn_d!(Rxx, Ax); @test all(Array(Rxx[2:end-1] .== Ax[2:end].-Ax[1:end-1])) + Rxx[2:end-1].=0; @test all(Array(Rxx .== 0)) # Test that boundary values remained zero. + Rxx.=0; @parallel inn_d2!(Rxx, Axx); @test all(Array(Rxx[2:end-1] .== (Axx[3:end].-Axx[2:end-1]).-(Axx[2:end-1].-Axx[1:end-2]))) + Rxx[2:end-1].=0; @test all(Array(Rxx .== 0)) # Test that boundary values remained zero. + end; + end; + @reset_parallel_stencil() end; - @testset "selection" begin - @parallel all!(R, A) = (@all(R) = @all(A); return) - @parallel inn!(R, Axx) = (@all(R) = @inn(Axx); return) - R.=0; @parallel all!(R, A); @test all(Array(R .== A)) - R.=0; @parallel inn!(R, Axx); @test all(Array(R .== Axx[2:end-1])) - end; - @testset "averages" begin - @parallel av!(R, Ax) = (@all(R) = @av(Ax); return) - R.=0; @parallel av!(R, Ax); @test all(Array(R .== (Ax[1:end-1].+Ax[2:end]).*$FloatDefault(0.5))) - end; - @testset "harmonic averages" begin - @parallel harm!(R, Ax) = (@all(R) = @harm(Ax); return) - R.=0; @parallel harm!(R, Ax); @test all(Array(R .== 2 ./(1 ./Ax[1:end-1].+1 ./Ax[2:end]))) - end; - @testset "others" begin - @parallel maxloc!(R, Axx) = (@all(R) = @maxloc(Axx); return) - R.=0; @parallel maxloc!(R, Axx); @test all(Array(R .== max.(max.(Axx[3:end],Axx[2:end-1]),Axx[1:end-2]))) - end; - end; - @testset "2. apply masks" begin - @testset "selection" begin - @parallel inn_all!(Rxx, A) = (@inn(Rxx) = @all(A); return) - @parallel inn_inn!(Rxx, Axx) = (@inn(Rxx) = @inn(Axx); return) - Rxx.=0; @parallel inn_all!(Rxx, A); @test all(Array(Rxx[2:end-1] .== A)) - Rxx[2:end-1].=0; @test all(Array(Rxx .== 0)) # Test that boundary values remained zero. - Rxx.=0; @parallel inn_inn!(Rxx, Axx); @test all(Array(Rxx[2:end-1] .== Axx[2:end-1])) - Rxx[2:end-1].=0; @test all(Array(Rxx .== 0)) # Test that boundary values remained zero. - end; - @testset "differences" begin - @parallel inn_d!(Rxx, Ax) = (@inn(Rxx) = @d(Ax); return) - @parallel inn_d2!(Rxx, Axx) = (@inn(Rxx) = @d2(Axx); return) - Rxx.=0; @parallel inn_d!(Rxx, Ax); @test all(Array(Rxx[2:end-1] .== Ax[2:end].-Ax[1:end-1])) - Rxx[2:end-1].=0; @test all(Array(Rxx .== 0)) # Test that boundary values remained zero. - Rxx.=0; @parallel inn_d2!(Rxx, Axx); @test all(Array(Rxx[2:end-1] .== (Axx[3:end].-Axx[2:end-1]).-(Axx[2:end-1].-Axx[1:end-2]))) - Rxx[2:end-1].=0; @test all(Array(Rxx .== 0)) # Test that boundary values remained zero. - end; - end; - @reset_parallel_stencil() + ))) end; )) diff --git a/test/test_FiniteDifferences2D.jl b/test/test_FiniteDifferences2D.jl index 3099662f..b99ab6da 100644 --- a/test/test_FiniteDifferences2D.jl +++ b/test/test_FiniteDifferences2D.jl @@ -1,8 +1,10 @@ using Test using ParallelStencil import ParallelStencil: @reset_parallel_stencil, @is_initialized, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, PKG_METAL, PKG_POLYESTER -import ParallelStencil: @require +import ParallelStencil: @require, interpolate using ParallelStencil.FiniteDifferences2D +using ParallelStencil.FieldAllocators +import ParallelStencil.FieldAllocators: @IField, @XXYField, @XYYField TEST_PACKAGES = SUPPORTED_PACKAGES @static if PKG_CUDA in TEST_PACKAGES import CUDA @@ -31,101 +33,105 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t eval(:( @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin - @require !@is_initialized() - @init_parallel_stencil($package, $FloatDefault, 2) - @require @is_initialized() - nx, ny = 7, 5 - A = @rand(nx, ny ); - Ax = @rand(nx+1, ny ); - Ay = @rand(nx, ny+1); - Axy = @rand(nx+1, ny+1); - Axx = @rand(nx+2, ny ); - Ayy = @rand(nx, ny+2); - Axyy = @rand(nx+1, ny+2); - Axxy = @rand(nx+2, ny+1); - Axxyy = @rand(nx+2, ny+2); - R = @zeros(nx, ny ); - Rxxyy = @zeros(nx+2, ny+2); - @testset "1. compute macros" begin - @testset "differences" begin - @parallel d_xa!(R, Ax) = (@all(R) = @d_xa(Ax); return) - @parallel d_ya!(R, Ay) = (@all(R) = @d_ya(Ay); return) - @parallel d_xi!(R, Axyy) = (@all(R) = @d_xi(Axyy); return) - @parallel d_yi!(R, Axxy) = (@all(R) = @d_yi(Axxy); return) - @parallel d2_xa!(R, Axx) = (@all(R) = @d2_xa(Axx); return) - @parallel d2_ya!(R, Ayy) = (@all(R) = @d2_ya(Ayy); return) - @parallel d2_xi!(R, Axxyy) = (@all(R) = @d2_xi(Axxyy); return) - @parallel d2_yi!(R, Axxyy) = (@all(R) = @d2_yi(Axxyy); return) - R.=0; @parallel d_xa!(R, Ax); @test all(Array(R .== Ax[2:end, :].-Ax[1:end-1, :])) - R.=0; @parallel d_ya!(R, Ay); @test all(Array(R .== Ay[ :,2:end].-Ay[ :,1:end-1])) - R.=0; @parallel d_xi!(R, Axyy); @test all(Array(R .== Axyy[2:end ,2:end-1].-Axyy[1:end-1,2:end-1])) - R.=0; @parallel d_yi!(R, Axxy); @test all(Array(R .== Axxy[2:end-1,2:end ].-Axxy[2:end-1,1:end-1])) - R.=0; @parallel d2_xa!(R, Axx); @test all(Array(R .== (Axx[3:end, :].-Axx[2:end-1, :]).-(Axx[2:end-1, :].-Axx[1:end-2, :]))) - R.=0; @parallel d2_ya!(R, Ayy); @test all(Array(R .== (Ayy[ :,3:end].-Ayy[ :,2:end-1]).-(Ayy[ :,2:end-1].-Ayy[ :,1:end-2]))) - R.=0; @parallel d2_xi!(R, Axxyy); @test all(Array(R .== (Axxyy[3:end,2:end-1].-Axxyy[2:end-1,2:end-1]).-(Axxyy[2:end-1,2:end-1].-Axxyy[1:end-2,2:end-1]))) - R.=0; @parallel d2_yi!(R, Axxyy); @test all(Array(R .== (Axxyy[2:end-1,3:end].-Axxyy[2:end-1,2:end-1]).-(Axxyy[2:end-1,2:end-1].-Axxyy[2:end-1,1:end-2]))) + $(interpolate(:__padding__, (false, package!=PKG_POLYESTER), :( #TODO: this needs to be restored to (false, true) when Polyester supports padding. + @testset "(padding=$__padding__)" begin + @require !@is_initialized() + @init_parallel_stencil($package, $FloatDefault, 2, padding=__padding__) + @require @is_initialized() + nxy = (9, 7) + A = @IField(nxy, @rand); + Ax = @XField(nxy, @rand); + Ay = @YField(nxy, @rand); + Axy = @XYField(nxy, @rand); + Axx = @XXField(nxy, @rand); + Ayy = @YYField(nxy, @rand); + Axyy = @XYYField(nxy, @rand); + Axxy = @XXYField(nxy, @rand); + Axxyy = @Field(nxy, @rand); + R = @IField(nxy, @zeros); + Rxxyy = @Field(nxy, @zeros); + @testset "1. compute macros" begin + @testset "differences" begin + @parallel d_xa!(R, Ax) = (@all(R) = @d_xa(Ax); return) + @parallel d_ya!(R, Ay) = (@all(R) = @d_ya(Ay); return) + @parallel d_xi!(R, Axyy) = (@all(R) = @d_xi(Axyy); return) + @parallel d_yi!(R, Axxy) = (@all(R) = @d_yi(Axxy); return) + @parallel d2_xa!(R, Axx) = (@all(R) = @d2_xa(Axx); return) + @parallel d2_ya!(R, Ayy) = (@all(R) = @d2_ya(Ayy); return) + @parallel d2_xi!(R, Axxyy) = (@all(R) = @d2_xi(Axxyy); return) + @parallel d2_yi!(R, Axxyy) = (@all(R) = @d2_yi(Axxyy); return) + R.=0; @parallel d_xa!(R, Ax); @test all(Array(R .== Ax[2:end, :].-Ax[1:end-1, :])) + R.=0; @parallel d_ya!(R, Ay); @test all(Array(R .== Ay[ :,2:end].-Ay[ :,1:end-1])) + R.=0; @parallel d_xi!(R, Axyy); @test all(Array(R .== Axyy[2:end ,2:end-1].-Axyy[1:end-1,2:end-1])) + R.=0; @parallel d_yi!(R, Axxy); @test all(Array(R .== Axxy[2:end-1,2:end ].-Axxy[2:end-1,1:end-1])) + R.=0; @parallel d2_xa!(R, Axx); @test all(Array(R .== (Axx[3:end, :].-Axx[2:end-1, :]).-(Axx[2:end-1, :].-Axx[1:end-2, :]))) + R.=0; @parallel d2_ya!(R, Ayy); @test all(Array(R .== (Ayy[ :,3:end].-Ayy[ :,2:end-1]).-(Ayy[ :,2:end-1].-Ayy[ :,1:end-2]))) + R.=0; @parallel d2_xi!(R, Axxyy); @test all(Array(R .== (Axxyy[3:end,2:end-1].-Axxyy[2:end-1,2:end-1]).-(Axxyy[2:end-1,2:end-1].-Axxyy[1:end-2,2:end-1]))) + R.=0; @parallel d2_yi!(R, Axxyy); @test all(Array(R .== (Axxyy[2:end-1,3:end].-Axxyy[2:end-1,2:end-1]).-(Axxyy[2:end-1,2:end-1].-Axxyy[2:end-1,1:end-2]))) + end; + @testset "selection" begin + @parallel all!(R, A) = (@all(R) = @all(A); return) + @parallel inn!(R, Axxyy) = (@all(R) = @inn(Axxyy); return) + @parallel inn_x!(R, Axx) = (@all(R) = @inn_x(Axx); return) + @parallel inn_y!(R, Ayy) = (@all(R) = @inn_y(Ayy); return) + R.=0; @parallel all!(R, A); @test all(Array(R .== A)) + R.=0; @parallel inn!(R, Axxyy); @test all(Array(R .== Axxyy[2:end-1,2:end-1])) + R.=0; @parallel inn_x!(R, Axx); @test all(Array(R .== Axx[2:end-1, :])) + R.=0; @parallel inn_y!(R, Ayy); @test all(Array(R .== Ayy[ :,2:end-1])) + end; + @testset "averages" begin + @parallel av!(R, Axy) = (@all(R) = @av(Axy); return) + @parallel av_xa!(R, Ax) = (@all(R) = @av_xa(Ax); return) + @parallel av_ya!(R, Ay) = (@all(R) = @av_ya(Ay); return) + @parallel av_xi!(R, Axyy) = (@all(R) = @av_xi(Axyy); return) + @parallel av_yi!(R, Axxy) = (@all(R) = @av_yi(Axxy); return) + R.=0; @parallel av!(R, Axy); @test all(Array(R .== (Axy[1:end-1,1:end-1].+Axy[2:end,1:end-1].+Axy[1:end-1,2:end].+Axy[2:end,2:end]).*$FloatDefault(0.25))) + R.=0; @parallel av_xa!(R, Ax); @test all(Array(R .== (Ax[2:end, :].+Ax[1:end-1, :]).*$FloatDefault(0.5))) + R.=0; @parallel av_ya!(R, Ay); @test all(Array(R .== (Ay[ :,2:end].+Ay[ :,1:end-1]).*$FloatDefault(0.5))) + R.=0; @parallel av_xi!(R, Axyy); @test all(Array(R .== (Axyy[2:end ,2:end-1].+Axyy[1:end-1,2:end-1]).*$FloatDefault(0.5))) + R.=0; @parallel av_yi!(R, Axxy); @test all(Array(R .== (Axxy[2:end-1,2:end ].+Axxy[2:end-1,1:end-1]).*$FloatDefault(0.5))) + end; + @testset "harmonic averages" begin + @parallel harm!(R, Axy) = (@all(R) = @harm(Axy); return) + @parallel harm_xa!(R, Ax) = (@all(R) = @harm_xa(Ax); return) + @parallel harm_ya!(R, Ay) = (@all(R) = @harm_ya(Ay); return) + @parallel harm_xi!(R, Axyy) = (@all(R) = @harm_xi(Axyy); return) + @parallel harm_yi!(R, Axxy) = (@all(R) = @harm_yi(Axxy); return) + R.=0; @parallel harm!(R, Axy); @test all(Array(R .== 4 ./(1 ./Axy[1:end-1,1:end-1].+1 ./Axy[2:end,1:end-1].+1 ./Axy[1:end-1,2:end].+1 ./Axy[2:end,2:end]))) + R.=0; @parallel harm_xa!(R, Ax); @test all(Array(R .== 2 ./(1 ./Ax[2:end, :].+1 ./Ax[1:end-1, :]))) + R.=0; @parallel harm_ya!(R, Ay); @test all(Array(R .== 2 ./(1 ./Ay[ :,2:end].+1 ./Ay[ :,1:end-1]))) + R.=0; @parallel harm_xi!(R, Axyy); @test all(Array(R .== 2 ./(1 ./Axyy[2:end ,2:end-1].+1 ./Axyy[1:end-1,2:end-1]))) + R.=0; @parallel harm_yi!(R, Axxy); @test all(Array(R .== 2 ./(1 ./Axxy[2:end-1,2:end ].+1 ./Axxy[2:end-1,1:end-1]))) + end; + @testset "others" begin + @parallel maxloc!(R, Axxyy) = (@all(R) = @maxloc(Axxyy); return) + R.=0; @parallel maxloc!(R, Axxyy); @test all(Array(R .== max.(max.(max.(max.(Axxyy[1:end-2,2:end-1],Axxyy[3:end,2:end-1]),Axxyy[2:end-1,2:end-1]),Axxyy[2:end-1,1:end-2]),Axxyy[2:end-1,3:end]))) + end; + end; + @testset "2. apply masks" begin + @testset "selection" begin + @parallel inn_all!(Rxxyy, A) = (@inn(Rxxyy) = @all(A); return) + @parallel inn_inn!(Rxxyy, Axxyy) = (@inn(Rxxyy) = @inn(Axxyy); return) + Rxxyy.=0; @parallel inn_all!(Rxxyy, A); @test all(Array(Rxxyy[2:end-1,2:end-1] .== A)) + Rxxyy[2:end-1,2:end-1].=0; @test all(Array(Rxxyy .== 0)) # Test that boundary values remained zero. + Rxxyy.=0; @parallel inn_inn!(Rxxyy, Axxyy); @test all(Array(Rxxyy[2:end-1,2:end-1] .== Axxyy[2:end-1,2:end-1])) + Rxxyy[2:end-1,2:end-1].=0; @test all(Array(Rxxyy .== 0)) # Test that boundary values remained zero. + end; + @testset "differences" begin + @parallel inn_d_xa!(Rxxyy, Ax) = (@inn(Rxxyy) = @d_xa(Ax); return) + @parallel inn_d_yi!(Rxxyy, Axxy) = (@inn(Rxxyy) = @d_yi(Axxy); return) + @parallel inn_d2_yi!(Rxxyy, Axxyy) = (@inn(Rxxyy) = @d2_yi(Axxyy); return) + Rxxyy.=0; @parallel inn_d_xa!(Rxxyy, Ax); @test all(Array(Rxxyy[2:end-1,2:end-1] .== Ax[2:end, :].-Ax[1:end-1, :])) + Rxxyy[2:end-1,2:end-1].=0; @test all(Array(Rxxyy .== 0)) # Test that boundary values remained zero. + Rxxyy.=0; @parallel inn_d_yi!(Rxxyy, Axxy); @test all(Array(Rxxyy[2:end-1,2:end-1] .== Axxy[2:end-1,2:end ].-Axxy[2:end-1,1:end-1])) + Rxxyy[2:end-1,2:end-1].=0; @test all(Array(Rxxyy .== 0)) # Test that boundary values remained zero. + Rxxyy.=0; @parallel inn_d2_yi!(Rxxyy, Axxyy); @test all(Array(Rxxyy[2:end-1,2:end-1] .== (Axxyy[2:end-1,3:end].-Axxyy[2:end-1,2:end-1]).-(Axxyy[2:end-1,2:end-1].-Axxyy[2:end-1,1:end-2]))) + Rxxyy[2:end-1,2:end-1].=0; @test all(Array(Rxxyy .== 0)) # Test that boundary values remained zero. + end; + end; + @reset_parallel_stencil() end; - @testset "selection" begin - @parallel all!(R, A) = (@all(R) = @all(A); return) - @parallel inn!(R, Axxyy) = (@all(R) = @inn(Axxyy); return) - @parallel inn_x!(R, Axx) = (@all(R) = @inn_x(Axx); return) - @parallel inn_y!(R, Ayy) = (@all(R) = @inn_y(Ayy); return) - R.=0; @parallel all!(R, A); @test all(Array(R .== A)) - R.=0; @parallel inn!(R, Axxyy); @test all(Array(R .== Axxyy[2:end-1,2:end-1])) - R.=0; @parallel inn_x!(R, Axx); @test all(Array(R .== Axx[2:end-1, :])) - R.=0; @parallel inn_y!(R, Ayy); @test all(Array(R .== Ayy[ :,2:end-1])) - end; - @testset "averages" begin - @parallel av!(R, Axy) = (@all(R) = @av(Axy); return) - @parallel av_xa!(R, Ax) = (@all(R) = @av_xa(Ax); return) - @parallel av_ya!(R, Ay) = (@all(R) = @av_ya(Ay); return) - @parallel av_xi!(R, Axyy) = (@all(R) = @av_xi(Axyy); return) - @parallel av_yi!(R, Axxy) = (@all(R) = @av_yi(Axxy); return) - R.=0; @parallel av!(R, Axy); @test all(Array(R .== (Axy[1:end-1,1:end-1].+Axy[2:end,1:end-1].+Axy[1:end-1,2:end].+Axy[2:end,2:end]).*$FloatDefault(0.25))) - R.=0; @parallel av_xa!(R, Ax); @test all(Array(R .== (Ax[2:end, :].+Ax[1:end-1, :]).*$FloatDefault(0.5))) - R.=0; @parallel av_ya!(R, Ay); @test all(Array(R .== (Ay[ :,2:end].+Ay[ :,1:end-1]).*$FloatDefault(0.5))) - R.=0; @parallel av_xi!(R, Axyy); @test all(Array(R .== (Axyy[2:end ,2:end-1].+Axyy[1:end-1,2:end-1]).*$FloatDefault(0.5))) - R.=0; @parallel av_yi!(R, Axxy); @test all(Array(R .== (Axxy[2:end-1,2:end ].+Axxy[2:end-1,1:end-1]).*$FloatDefault(0.5))) - end; - @testset "harmonic averages" begin - @parallel harm!(R, Axy) = (@all(R) = @harm(Axy); return) - @parallel harm_xa!(R, Ax) = (@all(R) = @harm_xa(Ax); return) - @parallel harm_ya!(R, Ay) = (@all(R) = @harm_ya(Ay); return) - @parallel harm_xi!(R, Axyy) = (@all(R) = @harm_xi(Axyy); return) - @parallel harm_yi!(R, Axxy) = (@all(R) = @harm_yi(Axxy); return) - R.=0; @parallel harm!(R, Axy); @test all(Array(R .== 4 ./(1 ./Axy[1:end-1,1:end-1].+1 ./Axy[2:end,1:end-1].+1 ./Axy[1:end-1,2:end].+1 ./Axy[2:end,2:end]))) - R.=0; @parallel harm_xa!(R, Ax); @test all(Array(R .== 2 ./(1 ./Ax[2:end, :].+1 ./Ax[1:end-1, :]))) - R.=0; @parallel harm_ya!(R, Ay); @test all(Array(R .== 2 ./(1 ./Ay[ :,2:end].+1 ./Ay[ :,1:end-1]))) - R.=0; @parallel harm_xi!(R, Axyy); @test all(Array(R .== 2 ./(1 ./Axyy[2:end ,2:end-1].+1 ./Axyy[1:end-1,2:end-1]))) - R.=0; @parallel harm_yi!(R, Axxy); @test all(Array(R .== 2 ./(1 ./Axxy[2:end-1,2:end ].+1 ./Axxy[2:end-1,1:end-1]))) - end; - @testset "others" begin - @parallel maxloc!(R, Axxyy) = (@all(R) = @maxloc(Axxyy); return) - R.=0; @parallel maxloc!(R, Axxyy); @test all(Array(R .== max.(max.(max.(max.(Axxyy[1:end-2,2:end-1],Axxyy[3:end,2:end-1]),Axxyy[2:end-1,2:end-1]),Axxyy[2:end-1,1:end-2]),Axxyy[2:end-1,3:end]))) - end; - end; - @testset "2. apply masks" begin - @testset "selection" begin - @parallel inn_all!(Rxxyy, A) = (@inn(Rxxyy) = @all(A); return) - @parallel inn_inn!(Rxxyy, Axxyy) = (@inn(Rxxyy) = @inn(Axxyy); return) - Rxxyy.=0; @parallel inn_all!(Rxxyy, A); @test all(Array(Rxxyy[2:end-1,2:end-1] .== A)) - Rxxyy[2:end-1,2:end-1].=0; @test all(Array(Rxxyy .== 0)) # Test that boundary values remained zero. - Rxxyy.=0; @parallel inn_inn!(Rxxyy, Axxyy); @test all(Array(Rxxyy[2:end-1,2:end-1] .== Axxyy[2:end-1,2:end-1])) - Rxxyy[2:end-1,2:end-1].=0; @test all(Array(Rxxyy .== 0)) # Test that boundary values remained zero. - end; - @testset "differences" begin - @parallel inn_d_xa!(Rxxyy, Ax) = (@inn(Rxxyy) = @d_xa(Ax); return) - @parallel inn_d_yi!(Rxxyy, Axxy) = (@inn(Rxxyy) = @d_yi(Axxy); return) - @parallel inn_d2_yi!(Rxxyy, Axxyy) = (@inn(Rxxyy) = @d2_yi(Axxyy); return) - Rxxyy.=0; @parallel inn_d_xa!(Rxxyy, Ax); @test all(Array(Rxxyy[2:end-1,2:end-1] .== Ax[2:end, :].-Ax[1:end-1, :])) - Rxxyy[2:end-1,2:end-1].=0; @test all(Array(Rxxyy .== 0)) # Test that boundary values remained zero. - Rxxyy.=0; @parallel inn_d_yi!(Rxxyy, Axxy); @test all(Array(Rxxyy[2:end-1,2:end-1] .== Axxy[2:end-1,2:end ].-Axxy[2:end-1,1:end-1])) - Rxxyy[2:end-1,2:end-1].=0; @test all(Array(Rxxyy .== 0)) # Test that boundary values remained zero. - Rxxyy.=0; @parallel inn_d2_yi!(Rxxyy, Axxyy); @test all(Array(Rxxyy[2:end-1,2:end-1] .== (Axxyy[2:end-1,3:end].-Axxyy[2:end-1,2:end-1]).-(Axxyy[2:end-1,2:end-1].-Axxyy[2:end-1,1:end-2]))) - Rxxyy[2:end-1,2:end-1].=0; @test all(Array(Rxxyy .== 0)) # Test that boundary values remained zero. - end; - end; - @reset_parallel_stencil() + ))) end; )) diff --git a/test/test_FiniteDifferences3D.jl b/test/test_FiniteDifferences3D.jl index e41045e3..481a6168 100644 --- a/test/test_FiniteDifferences3D.jl +++ b/test/test_FiniteDifferences3D.jl @@ -1,8 +1,10 @@ using Test using ParallelStencil import ParallelStencil: @reset_parallel_stencil, @is_initialized, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, PKG_METAL, PKG_POLYESTER -import ParallelStencil: @require +import ParallelStencil: @require, interpolate using ParallelStencil.FiniteDifferences3D +using ParallelStencil.FieldAllocators +import ParallelStencil.FieldAllocators: @IField, @XXYField, @XYYField, @XYZField, @XXYZField, @XYYZField, @XYZZField, @XXYYField, @XXZZField, @YYZZField, @XXYYZField, @XYYZZField, @XXYZZField TEST_PACKAGES = SUPPORTED_PACKAGES @static if PKG_CUDA in TEST_PACKAGES import CUDA @@ -31,155 +33,159 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t eval(:( @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin - @require !@is_initialized() - @init_parallel_stencil($package, $FloatDefault, 3) - @require @is_initialized() - nx, ny, nz = 7, 5, 6 - A = @rand(nx , ny , nz ); - Ax = @rand(nx+1, ny , nz ); - Ay = @rand(nx , ny+1, nz ); - Az = @rand(nx , ny , nz+1); - Axy = @rand(nx+1, ny+1, nz ); - Axz = @rand(nx+1, ny , nz+1); - Ayz = @rand(nx , ny+1, nz+1); - Axyz = @rand(nx+1, ny+1, nz+1); - Axyzz = @rand(nx+1, ny+1, nz+2); - Axyyz = @rand(nx+1, ny+2, nz+1); - Axxyz = @rand(nx+2, ny+1, nz+1); - Axx = @rand(nx+2, ny , nz ); - Ayy = @rand(nx , ny+2, nz ); - Azz = @rand(nx , ny , nz+2); - Axxyy = @rand(nx+2, ny+2, nz ); - Axxzz = @rand(nx+2, ny , nz+2); - Ayyzz = @rand(nx , ny+2, nz+2); - Axyyzz = @rand(nx+1, ny+2, nz+2); - Axxyzz = @rand(nx+2, ny+1, nz+2); - Axxyyz = @rand(nx+2, ny+2, nz+1); - Axxyyzz = @rand(nx+2, ny+2, nz+2); - R = @zeros(nx , ny , nz ); - Rxxyyzz = @zeros(nx+2, ny+2, nz+2); - @testset "1. compute macros" begin - @testset "differences" begin - @parallel d_xa!(R, Ax) = (@all(R) = @d_xa(Ax); return) - @parallel d_ya!(R, Ay) = (@all(R) = @d_ya(Ay); return) - @parallel d_za!(R, Az) = (@all(R) = @d_za(Az); return) - @parallel d_xi!(R, Axyyzz) = (@all(R) = @d_xi(Axyyzz); return) - @parallel d_yi!(R, Axxyzz) = (@all(R) = @d_yi(Axxyzz); return) - @parallel d_zi!(R, Axxyyz) = (@all(R) = @d_zi(Axxyyz); return) - @parallel d2_xi!(R, Axxyyzz) = (@all(R) = @d2_xi(Axxyyzz); return) - @parallel d2_yi!(R, Axxyyzz) = (@all(R) = @d2_yi(Axxyyzz); return) - @parallel d2_zi!(R, Axxyyzz) = (@all(R) = @d2_zi(Axxyyzz); return) - R.=0; @parallel d_xa!(R, Ax); @test all(Array(R .== Ax[2:end, :, :].-Ax[1:end-1, :, :])) - R.=0; @parallel d_ya!(R, Ay); @test all(Array(R .== Ay[ :,2:end, :].-Ay[ :,1:end-1, :])) - R.=0; @parallel d_za!(R, Az); @test all(Array(R .== Az[ :, :,2:end].-Az[ :, :,1:end-1])) - R.=0; @parallel d_xi!(R, Axyyzz); @test all(Array(R .== Axyyzz[2:end ,2:end-1,2:end-1].-Axyyzz[1:end-1,2:end-1,2:end-1])) - R.=0; @parallel d_yi!(R, Axxyzz); @test all(Array(R .== Axxyzz[2:end-1,2:end ,2:end-1].-Axxyzz[2:end-1,1:end-1,2:end-1])) - R.=0; @parallel d_zi!(R, Axxyyz); @test all(Array(R .== Axxyyz[2:end-1,2:end-1,2:end ].-Axxyyz[2:end-1,2:end-1,1:end-1])) - R.=0; @parallel d2_xi!(R, Axxyyzz); @test all(Array(R .== (Axxyyzz[3:end,2:end-1,2:end-1].-Axxyyzz[2:end-1,2:end-1,2:end-1]).-(Axxyyzz[2:end-1,2:end-1,2:end-1].-Axxyyzz[1:end-2,2:end-1,2:end-1]))) - R.=0; @parallel d2_yi!(R, Axxyyzz); @test all(Array(R .== (Axxyyzz[2:end-1,3:end,2:end-1].-Axxyyzz[2:end-1,2:end-1,2:end-1]).-(Axxyyzz[2:end-1,2:end-1,2:end-1].-Axxyyzz[2:end-1,1:end-2,2:end-1]))) - R.=0; @parallel d2_zi!(R, Axxyyzz); @test all(Array(R .== (Axxyyzz[2:end-1,2:end-1,3:end].-Axxyyzz[2:end-1,2:end-1,2:end-1]).-(Axxyyzz[2:end-1,2:end-1,2:end-1].-Axxyyzz[2:end-1,2:end-1,1:end-2]))) + $(interpolate(:__padding__, (false, package!=PKG_POLYESTER), :( #TODO: this needs to be restored to (false, true) when Polyester supports padding. + @testset "(padding=$__padding__)" begin + @require !@is_initialized() + @init_parallel_stencil($package, $FloatDefault, 3, padding=__padding__) + @require @is_initialized() + nxyz = (9, 7, 8) + A = @IField(nxyz, @rand) + Ax = @XField(nxyz, @rand) + Ay = @YField(nxyz, @rand) + Az = @ZField(nxyz, @rand) + Axy = @XYField(nxyz, @rand) + Axz = @XZField(nxyz, @rand) + Ayz = @YZField(nxyz, @rand) + Axyz = @XYZField(nxyz, @rand) + Axyzz = @XYZZField(nxyz, @rand) + Axyyz = @XYYZField(nxyz, @rand) + Axxyz = @XXYZField(nxyz, @rand) + Axx = @XXField(nxyz, @rand) + Ayy = @YYField(nxyz, @rand) + Azz = @ZZField(nxyz, @rand) + Axxyy = @XXYYField(nxyz, @rand) + Axxzz = @XXZZField(nxyz, @rand) + Ayyzz = @YYZZField(nxyz, @rand) + Axyyzz = @XYYZZField(nxyz, @rand) + Axxyzz = @XXYZZField(nxyz, @rand) + Axxyyz = @XXYYZField(nxyz, @rand) + Axxyyzz = @Field(nxyz, @rand) + R = @IField(nxyz, @zeros) + Rxxyyzz = @Field(nxyz, @zeros) + @testset "1. compute macros" begin + @testset "differences" begin + @parallel d_xa!(R, Ax) = (@all(R) = @d_xa(Ax); return) + @parallel d_ya!(R, Ay) = (@all(R) = @d_ya(Ay); return) + @parallel d_za!(R, Az) = (@all(R) = @d_za(Az); return) + @parallel d_xi!(R, Axyyzz) = (@all(R) = @d_xi(Axyyzz); return) + @parallel d_yi!(R, Axxyzz) = (@all(R) = @d_yi(Axxyzz); return) + @parallel d_zi!(R, Axxyyz) = (@all(R) = @d_zi(Axxyyz); return) + @parallel d2_xi!(R, Axxyyzz) = (@all(R) = @d2_xi(Axxyyzz); return) + @parallel d2_yi!(R, Axxyyzz) = (@all(R) = @d2_yi(Axxyyzz); return) + @parallel d2_zi!(R, Axxyyzz) = (@all(R) = @d2_zi(Axxyyzz); return) + R.=0; @parallel d_xa!(R, Ax); @test all(Array(R .== Ax[2:end, :, :].-Ax[1:end-1, :, :])) + R.=0; @parallel d_ya!(R, Ay); @test all(Array(R .== Ay[ :,2:end, :].-Ay[ :,1:end-1, :])) + R.=0; @parallel d_za!(R, Az); @test all(Array(R .== Az[ :, :,2:end].-Az[ :, :,1:end-1])) + R.=0; @parallel d_xi!(R, Axyyzz); @test all(Array(R .== Axyyzz[2:end ,2:end-1,2:end-1].-Axyyzz[1:end-1,2:end-1,2:end-1])) + R.=0; @parallel d_yi!(R, Axxyzz); @test all(Array(R .== Axxyzz[2:end-1,2:end ,2:end-1].-Axxyzz[2:end-1,1:end-1,2:end-1])) + R.=0; @parallel d_zi!(R, Axxyyz); @test all(Array(R .== Axxyyz[2:end-1,2:end-1,2:end ].-Axxyyz[2:end-1,2:end-1,1:end-1])) + R.=0; @parallel d2_xi!(R, Axxyyzz); @test all(Array(R .== (Axxyyzz[3:end,2:end-1,2:end-1].-Axxyyzz[2:end-1,2:end-1,2:end-1]).-(Axxyyzz[2:end-1,2:end-1,2:end-1].-Axxyyzz[1:end-2,2:end-1,2:end-1]))) + R.=0; @parallel d2_yi!(R, Axxyyzz); @test all(Array(R .== (Axxyyzz[2:end-1,3:end,2:end-1].-Axxyyzz[2:end-1,2:end-1,2:end-1]).-(Axxyyzz[2:end-1,2:end-1,2:end-1].-Axxyyzz[2:end-1,1:end-2,2:end-1]))) + R.=0; @parallel d2_zi!(R, Axxyyzz); @test all(Array(R .== (Axxyyzz[2:end-1,2:end-1,3:end].-Axxyyzz[2:end-1,2:end-1,2:end-1]).-(Axxyyzz[2:end-1,2:end-1,2:end-1].-Axxyyzz[2:end-1,2:end-1,1:end-2]))) + end; + @testset "selection" begin + @parallel all!(R, A) = (@all(R) = @all(A); return) + @parallel inn!(R, Axxyyzz) = (@all(R) = @inn(Axxyyzz); return) + @parallel inn_x!(R, Axx) = (@all(R) = @inn_x(Axx); return) + @parallel inn_y!(R, Ayy) = (@all(R) = @inn_y(Ayy); return) + @parallel inn_z!(R, Azz) = (@all(R) = @inn_z(Azz); return) + @parallel inn_xy!(R, Axxyy) = (@all(R) = @inn_xy(Axxyy); return) + @parallel inn_xz!(R, Axxzz) = (@all(R) = @inn_xz(Axxzz); return) + @parallel inn_yz!(R, Ayyzz) = (@all(R) = @inn_yz(Ayyzz); return) + R.=0; @parallel all!(R, A); @test all(Array(R .== A)) + R.=0; @parallel inn!(R, Axxyyzz); @test all(Array(R .== Axxyyzz[2:end-1,2:end-1,2:end-1])) + R.=0; @parallel inn_x!(R, Axx); @test all(Array(R .== Axx[2:end-1, :, :])) + R.=0; @parallel inn_y!(R, Ayy); @test all(Array(R .== Ayy[ :,2:end-1, :])) + R.=0; @parallel inn_z!(R, Azz); @test all(Array(R .== Azz[ :, :,2:end-1])) + R.=0; @parallel inn_xy!(R, Axxyy); @test all(Array(R .== Axxyy[2:end-1,2:end-1, :])) + R.=0; @parallel inn_xz!(R, Axxzz); @test all(Array(R .== Axxzz[2:end-1, :,2:end-1])) + R.=0; @parallel inn_yz!(R, Ayyzz); @test all(Array(R .== Ayyzz[ :,2:end-1,2:end-1])) + end; + @testset "averages" begin + @parallel av!(R, Axyz) = (@all(R) = @av(Axyz); return) + @parallel av_xa!(R, Ax) = (@all(R) = @av_xa(Ax); return) + @parallel av_ya!(R, Ay) = (@all(R) = @av_ya(Ay); return) + @parallel av_za!(R, Az) = (@all(R) = @av_za(Az); return) + @parallel av_xi!(R, Axyyzz) = (@all(R) = @av_xi(Axyyzz); return) + @parallel av_yi!(R, Axxyzz) = (@all(R) = @av_yi(Axxyzz); return) + @parallel av_zi!(R, Axxyyz) = (@all(R) = @av_zi(Axxyyz); return) + @parallel av_xya!(R, Axy) = (@all(R) = @av_xya(Axy); return) + @parallel av_xza!(R, Axz) = (@all(R) = @av_xza(Axz); return) + @parallel av_yza!(R, Ayz) = (@all(R) = @av_yza(Ayz); return) + @parallel av_xyi!(R, Axyzz) = (@all(R) = @av_xyi(Axyzz); return) + @parallel av_xzi!(R, Axyyz) = (@all(R) = @av_xzi(Axyyz); return) + @parallel av_yzi!(R, Axxyz) = (@all(R) = @av_yzi(Axxyz); return) + R.=0; @parallel av!(R, Axyz); @test all(Array(R .== (Axyz[1:end-1,1:end-1,1:end-1].+Axyz[2:end,1:end-1,1:end-1].+Axyz[1:end-1,2:end,1:end-1].+Axyz[2:end,2:end,1:end-1].+Axyz[1:end-1,1:end-1,2:end].+Axyz[2:end,1:end-1,2:end].+Axyz[1:end-1,2:end,2:end].+Axyz[2:end,2:end,2:end])*0.125)) + R.=0; @parallel av_xa!(R, Ax); @test all(Array(R .== (Ax[2:end, :, :].+Ax[1:end-1, :, :]).*$FloatDefault(0.5))) + R.=0; @parallel av_ya!(R, Ay); @test all(Array(R .== (Ay[ :,2:end, :].+Ay[ :,1:end-1, :]).*$FloatDefault(0.5))) + R.=0; @parallel av_za!(R, Az); @test all(Array(R .== (Az[ :, :,2:end].+Az[ :, :,1:end-1]).*$FloatDefault(0.5))) + R.=0; @parallel av_xi!(R, Axyyzz); @test all(Array(R .== (Axyyzz[2:end ,2:end-1,2:end-1].+Axyyzz[1:end-1,2:end-1,2:end-1]).*$FloatDefault(0.5))) + R.=0; @parallel av_yi!(R, Axxyzz); @test all(Array(R .== (Axxyzz[2:end-1,2:end ,2:end-1].+Axxyzz[2:end-1,1:end-1,2:end-1]).*$FloatDefault(0.5))) + R.=0; @parallel av_zi!(R, Axxyyz); @test all(Array(R .== (Axxyyz[2:end-1,2:end-1,2:end ].+Axxyyz[2:end-1,2:end-1,1:end-1]).*$FloatDefault(0.5))) + R.=0; @parallel av_xya!(R, Axy); @test all(Array(R .== (Axy[1:end-1,1:end-1,:].+Axy[2:end,1:end-1,:].+Axy[1:end-1,2:end,:].+Axy[2:end,2:end,:]).*$FloatDefault(0.25))) + R.=0; @parallel av_xza!(R, Axz); @test all(Array(R .== (Axz[1:end-1,:,1:end-1].+Axz[2:end,:,1:end-1].+Axz[1:end-1,:,2:end].+Axz[2:end,:,2:end]).*$FloatDefault(0.25))) + R.=0; @parallel av_yza!(R, Ayz); @test all(Array(R .== (Ayz[:,1:end-1,1:end-1].+Ayz[:,2:end,1:end-1].+Ayz[:,1:end-1,2:end].+Ayz[:,2:end,2:end]).*$FloatDefault(0.25))) + R.=0; @parallel av_xyi!(R, Axyzz); @test all(Array(R .== (Axyzz[1:end-1,1:end-1,2:end-1].+Axyzz[2:end,1:end-1,2:end-1].+Axyzz[1:end-1,2:end,2:end-1].+Axyzz[2:end,2:end,2:end-1]).*$FloatDefault(0.25))) + R.=0; @parallel av_xzi!(R, Axyyz); @test all(Array(R .== (Axyyz[1:end-1,2:end-1,1:end-1].+Axyyz[2:end,2:end-1,1:end-1].+Axyyz[1:end-1,2:end-1,2:end].+Axyyz[2:end,2:end-1,2:end]).*$FloatDefault(0.25))) + R.=0; @parallel av_yzi!(R, Axxyz); @test all(Array(R .== (Axxyz[2:end-1,1:end-1,1:end-1].+Axxyz[2:end-1,2:end,1:end-1].+Axxyz[2:end-1,1:end-1,2:end].+Axxyz[2:end-1,2:end,2:end]).*$FloatDefault(0.25))) + end; + @testset "harmonic averages" begin + @parallel harm!(R, Axyz) = (@all(R) = @harm(Axyz); return) + @parallel harm_xa!(R, Ax) = (@all(R) = @harm_xa(Ax); return) + @parallel harm_ya!(R, Ay) = (@all(R) = @harm_ya(Ay); return) + @parallel harm_za!(R, Az) = (@all(R) = @harm_za(Az); return) + @parallel harm_xi!(R, Axyyzz) = (@all(R) = @harm_xi(Axyyzz); return) + @parallel harm_yi!(R, Axxyzz) = (@all(R) = @harm_yi(Axxyzz); return) + @parallel harm_zi!(R, Axxyyz) = (@all(R) = @harm_zi(Axxyyz); return) + @parallel harm_xya!(R, Axy) = (@all(R) = @harm_xya(Axy); return) + @parallel harm_xza!(R, Axz) = (@all(R) = @harm_xza(Axz); return) + @parallel harm_yza!(R, Ayz) = (@all(R) = @harm_yza(Ayz); return) + @parallel harm_xyi!(R, Axyzz) = (@all(R) = @harm_xyi(Axyzz); return) + @parallel harm_xzi!(R, Axyyz) = (@all(R) = @harm_xzi(Axyyz); return) + @parallel harm_yzi!(R, Axxyz) = (@all(R) = @harm_yzi(Axxyz); return) + R.=0; @parallel harm!(R, Axyz); @test all(Array(R .== 8 ./(1 ./Axyz[1:end-1,1:end-1,1:end-1].+1 ./Axyz[2:end,1:end-1,1:end-1].+1 ./Axyz[1:end-1,2:end,1:end-1].+1 ./Axyz[2:end,2:end,1:end-1].+1 ./Axyz[1:end-1,1:end-1,2:end].+1 ./Axyz[2:end,1:end-1,2:end].+1 ./Axyz[1:end-1,2:end,2:end].+1 ./Axyz[2:end,2:end,2:end]) )) + R.=0; @parallel harm_xa!(R, Ax); @test all(Array(R .== 2 ./(1 ./Ax[2:end, :, :].+1 ./Ax[1:end-1, :, :]))) + R.=0; @parallel harm_ya!(R, Ay); @test all(Array(R .== 2 ./(1 ./Ay[ :,2:end, :].+1 ./Ay[ :,1:end-1, :]))) + R.=0; @parallel harm_za!(R, Az); @test all(Array(R .== 2 ./(1 ./Az[ :, :,2:end].+1 ./Az[ :, :,1:end-1]))) + R.=0; @parallel harm_xi!(R, Axyyzz); @test all(Array(R .== 2 ./(1 ./Axyyzz[2:end ,2:end-1,2:end-1].+1 ./Axyyzz[1:end-1,2:end-1,2:end-1]))) + R.=0; @parallel harm_yi!(R, Axxyzz); @test all(Array(R .== 2 ./(1 ./Axxyzz[2:end-1,2:end ,2:end-1].+1 ./Axxyzz[2:end-1,1:end-1,2:end-1]))) + R.=0; @parallel harm_zi!(R, Axxyyz); @test all(Array(R .== 2 ./(1 ./Axxyyz[2:end-1,2:end-1,2:end ].+1 ./Axxyyz[2:end-1,2:end-1,1:end-1]))) + R.=0; @parallel harm_xya!(R, Axy); @test all(Array(R .== 4 ./(1 ./Axy[1:end-1,1:end-1,:].+1 ./Axy[2:end,1:end-1,:].+1 ./Axy[1:end-1,2:end,:].+1 ./Axy[2:end,2:end,:]))) + R.=0; @parallel harm_xza!(R, Axz); @test all(Array(R .== 4 ./(1 ./Axz[1:end-1,:,1:end-1].+1 ./Axz[2:end,:,1:end-1].+1 ./Axz[1:end-1,:,2:end].+1 ./Axz[2:end,:,2:end]))) + R.=0; @parallel harm_yza!(R, Ayz); @test all(Array(R .== 4 ./(1 ./Ayz[:,1:end-1,1:end-1].+1 ./Ayz[:,2:end,1:end-1].+1 ./Ayz[:,1:end-1,2:end].+1 ./Ayz[:,2:end,2:end]))) + R.=0; @parallel harm_xyi!(R, Axyzz); @test all(Array(R .== 4 ./(1 ./Axyzz[1:end-1,1:end-1,2:end-1].+1 ./Axyzz[2:end,1:end-1,2:end-1].+1 ./Axyzz[1:end-1,2:end,2:end-1].+1 ./Axyzz[2:end,2:end,2:end-1]))) + R.=0; @parallel harm_xzi!(R, Axyyz); @test all(Array(R .== 4 ./(1 ./Axyyz[1:end-1,2:end-1,1:end-1].+1 ./Axyyz[2:end,2:end-1,1:end-1].+1 ./Axyyz[1:end-1,2:end-1,2:end].+1 ./Axyyz[2:end,2:end-1,2:end]))) + R.=0; @parallel harm_yzi!(R, Axxyz); @test all(Array(R .== 4 ./(1 ./Axxyz[2:end-1,1:end-1,1:end-1].+1 ./Axxyz[2:end-1,2:end,1:end-1].+1 ./Axxyz[2:end-1,1:end-1,2:end].+1 ./Axxyz[2:end-1,2:end,2:end]))) + end; + @testset "others" begin + @parallel maxloc!(R, Axxyyzz) = (@all(R) = @maxloc(Axxyyzz); return) + R.=0; @parallel maxloc!(R, Axxyyzz); @test all(Array(R .== max.(max.(max.(max.(max.(max.(Axxyyzz[1:end-2,2:end-1,2:end-1],Axxyyzz[3:end,2:end-1,2:end-1]),Axxyyzz[2:end-1,2:end-1,2:end-1]),Axxyyzz[2:end-1,1:end-2,2:end-1]),Axxyyzz[2:end-1,3:end,2:end-1]),Axxyyzz[2:end-1,2:end-1,1:end-2]),Axxyyzz[2:end-1,2:end-1,3:end]))) + end; + end; + @testset "2. apply masks" begin + @testset "selection" begin + @parallel inn_all!(Rxxyyzz, A) = (@inn(Rxxyyzz) = @all(A); return) + @parallel inn_inn!(Rxxyyzz, Axxyyzz) = (@inn(Rxxyyzz) = @inn(Axxyyzz); return) + Rxxyyzz.=0; @parallel inn_all!(Rxxyyzz, A); @test all(Array(Rxxyyzz[2:end-1,2:end-1,2:end-1] .== A)) + Rxxyyzz[2:end-1,2:end-1,2:end-1].=0; @test all(Array(Rxxyyzz .== 0)) # Test that boundary values remained zero. + Rxxyyzz.=0; @parallel inn_inn!(Rxxyyzz, Axxyyzz); @test all(Array(Rxxyyzz[2:end-1,2:end-1,2:end-1] .== Axxyyzz[2:end-1,2:end-1,2:end-1])) + Rxxyyzz[2:end-1,2:end-1,2:end-1].=0; @test all(Array(Rxxyyzz .== 0)) # Test that boundary values remained zero. + end; + @testset "differences" begin + @parallel inn_d_xa!(Rxxyyzz, Ax) = (@inn(Rxxyyzz) = @d_xa(Ax); return) + @parallel inn_d_yi!(Rxxyyzz, Axxyzz) = (@inn(Rxxyyzz) = @d_yi(Axxyzz); return) + @parallel inn_d2_yi!(Rxxyyzz, Axxyyzz) = (@inn(Rxxyyzz) = @d2_yi(Axxyyzz); return) + Rxxyyzz.=0; @parallel inn_d_xa!(Rxxyyzz, Ax); @test all(Array(Rxxyyzz[2:end-1,2:end-1,2:end-1] .== Ax[2:end, :, :].-Ax[1:end-1, :, :])) + Rxxyyzz[2:end-1,2:end-1,2:end-1].=0; @test all(Array(Rxxyyzz .== 0)) # Test that boundary values remained zero. + Rxxyyzz.=0; @parallel inn_d_yi!(Rxxyyzz, Axxyzz); @test all(Array(Rxxyyzz[2:end-1,2:end-1,2:end-1] .== Axxyzz[2:end-1,2:end ,2:end-1].-Axxyzz[2:end-1,1:end-1,2:end-1])) + Rxxyyzz[2:end-1,2:end-1,2:end-1].=0; @test all(Array(Rxxyyzz .== 0)) # Test that boundary values remained zero. + Rxxyyzz.=0; @parallel inn_d2_yi!(Rxxyyzz, Axxyyzz); @test all(Array(Rxxyyzz[2:end-1,2:end-1,2:end-1] .== (Axxyyzz[2:end-1,3:end,2:end-1].-Axxyyzz[2:end-1,2:end-1,2:end-1]).-(Axxyyzz[2:end-1,2:end-1,2:end-1].-Axxyyzz[2:end-1,1:end-2,2:end-1]))) + Rxxyyzz[2:end-1,2:end-1,2:end-1].=0; @test all(Array(Rxxyyzz .== 0)) # Test that boundary values remained zero. + end; + end; + @reset_parallel_stencil() end; - @testset "selection" begin - @parallel all!(R, A) = (@all(R) = @all(A); return) - @parallel inn!(R, Axxyyzz) = (@all(R) = @inn(Axxyyzz); return) - @parallel inn_x!(R, Axx) = (@all(R) = @inn_x(Axx); return) - @parallel inn_y!(R, Ayy) = (@all(R) = @inn_y(Ayy); return) - @parallel inn_z!(R, Azz) = (@all(R) = @inn_z(Azz); return) - @parallel inn_xy!(R, Axxyy) = (@all(R) = @inn_xy(Axxyy); return) - @parallel inn_xz!(R, Axxzz) = (@all(R) = @inn_xz(Axxzz); return) - @parallel inn_yz!(R, Ayyzz) = (@all(R) = @inn_yz(Ayyzz); return) - R.=0; @parallel all!(R, A); @test all(Array(R .== A)) - R.=0; @parallel inn!(R, Axxyyzz); @test all(Array(R .== Axxyyzz[2:end-1,2:end-1,2:end-1])) - R.=0; @parallel inn_x!(R, Axx); @test all(Array(R .== Axx[2:end-1, :, :])) - R.=0; @parallel inn_y!(R, Ayy); @test all(Array(R .== Ayy[ :,2:end-1, :])) - R.=0; @parallel inn_z!(R, Azz); @test all(Array(R .== Azz[ :, :,2:end-1])) - R.=0; @parallel inn_xy!(R, Axxyy); @test all(Array(R .== Axxyy[2:end-1,2:end-1, :])) - R.=0; @parallel inn_xz!(R, Axxzz); @test all(Array(R .== Axxzz[2:end-1, :,2:end-1])) - R.=0; @parallel inn_yz!(R, Ayyzz); @test all(Array(R .== Ayyzz[ :,2:end-1,2:end-1])) - end; - @testset "averages" begin - @parallel av!(R, Axyz) = (@all(R) = @av(Axyz); return) - @parallel av_xa!(R, Ax) = (@all(R) = @av_xa(Ax); return) - @parallel av_ya!(R, Ay) = (@all(R) = @av_ya(Ay); return) - @parallel av_za!(R, Az) = (@all(R) = @av_za(Az); return) - @parallel av_xi!(R, Axyyzz) = (@all(R) = @av_xi(Axyyzz); return) - @parallel av_yi!(R, Axxyzz) = (@all(R) = @av_yi(Axxyzz); return) - @parallel av_zi!(R, Axxyyz) = (@all(R) = @av_zi(Axxyyz); return) - @parallel av_xya!(R, Axy) = (@all(R) = @av_xya(Axy); return) - @parallel av_xza!(R, Axz) = (@all(R) = @av_xza(Axz); return) - @parallel av_yza!(R, Ayz) = (@all(R) = @av_yza(Ayz); return) - @parallel av_xyi!(R, Axyzz) = (@all(R) = @av_xyi(Axyzz); return) - @parallel av_xzi!(R, Axyyz) = (@all(R) = @av_xzi(Axyyz); return) - @parallel av_yzi!(R, Axxyz) = (@all(R) = @av_yzi(Axxyz); return) - R.=0; @parallel av!(R, Axyz); @test all(Array(R .== (Axyz[1:end-1,1:end-1,1:end-1].+Axyz[2:end,1:end-1,1:end-1].+Axyz[1:end-1,2:end,1:end-1].+Axyz[2:end,2:end,1:end-1].+Axyz[1:end-1,1:end-1,2:end].+Axyz[2:end,1:end-1,2:end].+Axyz[1:end-1,2:end,2:end].+Axyz[2:end,2:end,2:end])*0.125)) - R.=0; @parallel av_xa!(R, Ax); @test all(Array(R .== (Ax[2:end, :, :].+Ax[1:end-1, :, :]).*$FloatDefault(0.5))) - R.=0; @parallel av_ya!(R, Ay); @test all(Array(R .== (Ay[ :,2:end, :].+Ay[ :,1:end-1, :]).*$FloatDefault(0.5))) - R.=0; @parallel av_za!(R, Az); @test all(Array(R .== (Az[ :, :,2:end].+Az[ :, :,1:end-1]).*$FloatDefault(0.5))) - R.=0; @parallel av_xi!(R, Axyyzz); @test all(Array(R .== (Axyyzz[2:end ,2:end-1,2:end-1].+Axyyzz[1:end-1,2:end-1,2:end-1]).*$FloatDefault(0.5))) - R.=0; @parallel av_yi!(R, Axxyzz); @test all(Array(R .== (Axxyzz[2:end-1,2:end ,2:end-1].+Axxyzz[2:end-1,1:end-1,2:end-1]).*$FloatDefault(0.5))) - R.=0; @parallel av_zi!(R, Axxyyz); @test all(Array(R .== (Axxyyz[2:end-1,2:end-1,2:end ].+Axxyyz[2:end-1,2:end-1,1:end-1]).*$FloatDefault(0.5))) - R.=0; @parallel av_xya!(R, Axy); @test all(Array(R .== (Axy[1:end-1,1:end-1,:].+Axy[2:end,1:end-1,:].+Axy[1:end-1,2:end,:].+Axy[2:end,2:end,:]).*$FloatDefault(0.25))) - R.=0; @parallel av_xza!(R, Axz); @test all(Array(R .== (Axz[1:end-1,:,1:end-1].+Axz[2:end,:,1:end-1].+Axz[1:end-1,:,2:end].+Axz[2:end,:,2:end]).*$FloatDefault(0.25))) - R.=0; @parallel av_yza!(R, Ayz); @test all(Array(R .== (Ayz[:,1:end-1,1:end-1].+Ayz[:,2:end,1:end-1].+Ayz[:,1:end-1,2:end].+Ayz[:,2:end,2:end]).*$FloatDefault(0.25))) - R.=0; @parallel av_xyi!(R, Axyzz); @test all(Array(R .== (Axyzz[1:end-1,1:end-1,2:end-1].+Axyzz[2:end,1:end-1,2:end-1].+Axyzz[1:end-1,2:end,2:end-1].+Axyzz[2:end,2:end,2:end-1]).*$FloatDefault(0.25))) - R.=0; @parallel av_xzi!(R, Axyyz); @test all(Array(R .== (Axyyz[1:end-1,2:end-1,1:end-1].+Axyyz[2:end,2:end-1,1:end-1].+Axyyz[1:end-1,2:end-1,2:end].+Axyyz[2:end,2:end-1,2:end]).*$FloatDefault(0.25))) - R.=0; @parallel av_yzi!(R, Axxyz); @test all(Array(R .== (Axxyz[2:end-1,1:end-1,1:end-1].+Axxyz[2:end-1,2:end,1:end-1].+Axxyz[2:end-1,1:end-1,2:end].+Axxyz[2:end-1,2:end,2:end]).*$FloatDefault(0.25))) - end; - @testset "harmonic averages" begin - @parallel harm!(R, Axyz) = (@all(R) = @harm(Axyz); return) - @parallel harm_xa!(R, Ax) = (@all(R) = @harm_xa(Ax); return) - @parallel harm_ya!(R, Ay) = (@all(R) = @harm_ya(Ay); return) - @parallel harm_za!(R, Az) = (@all(R) = @harm_za(Az); return) - @parallel harm_xi!(R, Axyyzz) = (@all(R) = @harm_xi(Axyyzz); return) - @parallel harm_yi!(R, Axxyzz) = (@all(R) = @harm_yi(Axxyzz); return) - @parallel harm_zi!(R, Axxyyz) = (@all(R) = @harm_zi(Axxyyz); return) - @parallel harm_xya!(R, Axy) = (@all(R) = @harm_xya(Axy); return) - @parallel harm_xza!(R, Axz) = (@all(R) = @harm_xza(Axz); return) - @parallel harm_yza!(R, Ayz) = (@all(R) = @harm_yza(Ayz); return) - @parallel harm_xyi!(R, Axyzz) = (@all(R) = @harm_xyi(Axyzz); return) - @parallel harm_xzi!(R, Axyyz) = (@all(R) = @harm_xzi(Axyyz); return) - @parallel harm_yzi!(R, Axxyz) = (@all(R) = @harm_yzi(Axxyz); return) - R.=0; @parallel harm!(R, Axyz); @test all(Array(R .== 8 ./(1 ./Axyz[1:end-1,1:end-1,1:end-1].+1 ./Axyz[2:end,1:end-1,1:end-1].+1 ./Axyz[1:end-1,2:end,1:end-1].+1 ./Axyz[2:end,2:end,1:end-1].+1 ./Axyz[1:end-1,1:end-1,2:end].+1 ./Axyz[2:end,1:end-1,2:end].+1 ./Axyz[1:end-1,2:end,2:end].+1 ./Axyz[2:end,2:end,2:end]) )) - R.=0; @parallel harm_xa!(R, Ax); @test all(Array(R .== 2 ./(1 ./Ax[2:end, :, :].+1 ./Ax[1:end-1, :, :]))) - R.=0; @parallel harm_ya!(R, Ay); @test all(Array(R .== 2 ./(1 ./Ay[ :,2:end, :].+1 ./Ay[ :,1:end-1, :]))) - R.=0; @parallel harm_za!(R, Az); @test all(Array(R .== 2 ./(1 ./Az[ :, :,2:end].+1 ./Az[ :, :,1:end-1]))) - R.=0; @parallel harm_xi!(R, Axyyzz); @test all(Array(R .== 2 ./(1 ./Axyyzz[2:end ,2:end-1,2:end-1].+1 ./Axyyzz[1:end-1,2:end-1,2:end-1]))) - R.=0; @parallel harm_yi!(R, Axxyzz); @test all(Array(R .== 2 ./(1 ./Axxyzz[2:end-1,2:end ,2:end-1].+1 ./Axxyzz[2:end-1,1:end-1,2:end-1]))) - R.=0; @parallel harm_zi!(R, Axxyyz); @test all(Array(R .== 2 ./(1 ./Axxyyz[2:end-1,2:end-1,2:end ].+1 ./Axxyyz[2:end-1,2:end-1,1:end-1]))) - R.=0; @parallel harm_xya!(R, Axy); @test all(Array(R .== 4 ./(1 ./Axy[1:end-1,1:end-1,:].+1 ./Axy[2:end,1:end-1,:].+1 ./Axy[1:end-1,2:end,:].+1 ./Axy[2:end,2:end,:]))) - R.=0; @parallel harm_xza!(R, Axz); @test all(Array(R .== 4 ./(1 ./Axz[1:end-1,:,1:end-1].+1 ./Axz[2:end,:,1:end-1].+1 ./Axz[1:end-1,:,2:end].+1 ./Axz[2:end,:,2:end]))) - R.=0; @parallel harm_yza!(R, Ayz); @test all(Array(R .== 4 ./(1 ./Ayz[:,1:end-1,1:end-1].+1 ./Ayz[:,2:end,1:end-1].+1 ./Ayz[:,1:end-1,2:end].+1 ./Ayz[:,2:end,2:end]))) - R.=0; @parallel harm_xyi!(R, Axyzz); @test all(Array(R .== 4 ./(1 ./Axyzz[1:end-1,1:end-1,2:end-1].+1 ./Axyzz[2:end,1:end-1,2:end-1].+1 ./Axyzz[1:end-1,2:end,2:end-1].+1 ./Axyzz[2:end,2:end,2:end-1]))) - R.=0; @parallel harm_xzi!(R, Axyyz); @test all(Array(R .== 4 ./(1 ./Axyyz[1:end-1,2:end-1,1:end-1].+1 ./Axyyz[2:end,2:end-1,1:end-1].+1 ./Axyyz[1:end-1,2:end-1,2:end].+1 ./Axyyz[2:end,2:end-1,2:end]))) - R.=0; @parallel harm_yzi!(R, Axxyz); @test all(Array(R .== 4 ./(1 ./Axxyz[2:end-1,1:end-1,1:end-1].+1 ./Axxyz[2:end-1,2:end,1:end-1].+1 ./Axxyz[2:end-1,1:end-1,2:end].+1 ./Axxyz[2:end-1,2:end,2:end]))) - end; - @testset "others" begin - @parallel maxloc!(R, Axxyyzz) = (@all(R) = @maxloc(Axxyyzz); return) - R.=0; @parallel maxloc!(R, Axxyyzz); @test all(Array(R .== max.(max.(max.(max.(max.(max.(Axxyyzz[1:end-2,2:end-1,2:end-1],Axxyyzz[3:end,2:end-1,2:end-1]),Axxyyzz[2:end-1,2:end-1,2:end-1]),Axxyyzz[2:end-1,1:end-2,2:end-1]),Axxyyzz[2:end-1,3:end,2:end-1]),Axxyyzz[2:end-1,2:end-1,1:end-2]),Axxyyzz[2:end-1,2:end-1,3:end]))) - end; - end; - @testset "2. apply masks" begin - @testset "selection" begin - @parallel inn_all!(Rxxyyzz, A) = (@inn(Rxxyyzz) = @all(A); return) - @parallel inn_inn!(Rxxyyzz, Axxyyzz) = (@inn(Rxxyyzz) = @inn(Axxyyzz); return) - Rxxyyzz.=0; @parallel inn_all!(Rxxyyzz, A); @test all(Array(Rxxyyzz[2:end-1,2:end-1,2:end-1] .== A)) - Rxxyyzz[2:end-1,2:end-1,2:end-1].=0; @test all(Array(Rxxyyzz .== 0)) # Test that boundary values remained zero. - Rxxyyzz.=0; @parallel inn_inn!(Rxxyyzz, Axxyyzz); @test all(Array(Rxxyyzz[2:end-1,2:end-1,2:end-1] .== Axxyyzz[2:end-1,2:end-1,2:end-1])) - Rxxyyzz[2:end-1,2:end-1,2:end-1].=0; @test all(Array(Rxxyyzz .== 0)) # Test that boundary values remained zero. - end; - @testset "differences" begin - @parallel inn_d_xa!(Rxxyyzz, Ax) = (@inn(Rxxyyzz) = @d_xa(Ax); return) - @parallel inn_d_yi!(Rxxyyzz, Axxyzz) = (@inn(Rxxyyzz) = @d_yi(Axxyzz); return) - @parallel inn_d2_yi!(Rxxyyzz, Axxyyzz) = (@inn(Rxxyyzz) = @d2_yi(Axxyyzz); return) - Rxxyyzz.=0; @parallel inn_d_xa!(Rxxyyzz, Ax); @test all(Array(Rxxyyzz[2:end-1,2:end-1,2:end-1] .== Ax[2:end, :, :].-Ax[1:end-1, :, :])) - Rxxyyzz[2:end-1,2:end-1,2:end-1].=0; @test all(Array(Rxxyyzz .== 0)) # Test that boundary values remained zero. - Rxxyyzz.=0; @parallel inn_d_yi!(Rxxyyzz, Axxyzz); @test all(Array(Rxxyyzz[2:end-1,2:end-1,2:end-1] .== Axxyzz[2:end-1,2:end ,2:end-1].-Axxyzz[2:end-1,1:end-1,2:end-1])) - Rxxyyzz[2:end-1,2:end-1,2:end-1].=0; @test all(Array(Rxxyyzz .== 0)) # Test that boundary values remained zero. - Rxxyyzz.=0; @parallel inn_d2_yi!(Rxxyyzz, Axxyyzz); @test all(Array(Rxxyyzz[2:end-1,2:end-1,2:end-1] .== (Axxyyzz[2:end-1,3:end,2:end-1].-Axxyyzz[2:end-1,2:end-1,2:end-1]).-(Axxyyzz[2:end-1,2:end-1,2:end-1].-Axxyyzz[2:end-1,1:end-2,2:end-1]))) - Rxxyyzz[2:end-1,2:end-1,2:end-1].=0; @test all(Array(Rxxyyzz .== 0)) # Test that boundary values remained zero. - end; - end; - @reset_parallel_stencil() + ))) end; )) diff --git a/test/test_init_parallel_stencil.jl b/test/test_init_parallel_stencil.jl index fad22c6e..8a69d291 100644 --- a/test/test_init_parallel_stencil.jl +++ b/test/test_init_parallel_stencil.jl @@ -1,8 +1,8 @@ using Test using ParallelStencil -import ParallelStencil: @reset_parallel_stencil, @is_initialized, @get_package, @get_numbertype, @get_ndims, @get_inbounds, @get_padding, @get_memopt, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, PKG_METAL, PKG_POLYESTER, PKG_NONE, NUMBERTYPE_NONE, NDIMS_NONE +import ParallelStencil: @reset_parallel_stencil, @is_initialized, @get_package, @get_numbertype, @get_ndims, @get_inbounds, @get_padding, @get_memopt, @get_nonconst_metadata, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, PKG_METAL, PKG_POLYESTER, PKG_NONE, NUMBERTYPE_NONE, NDIMS_NONE import ParallelStencil: @require, @symbols -import ParallelStencil: extract_posargs_init, extract_kwargs_init, check_already_initialized, set_initialized, is_initialized, check_initialized, set_package, set_numbertype, set_ndims, set_inbounds, set_padding, set_memopt +import ParallelStencil: extract_posargs_init, extract_kwargs_init, check_already_initialized, set_initialized, is_initialized, check_initialized, set_package, set_numbertype, set_ndims, set_inbounds, set_padding, set_memopt, set_nonconst_metadata using ParallelStencil.Exceptions TEST_PACKAGES = SUPPORTED_PACKAGES @static if PKG_CUDA in TEST_PACKAGES @@ -38,6 +38,7 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t @test @get_numbertype() == ComplexF32 @test @get_ndims() == 3 @test @get_memopt() == false + @test @get_nonconst_metadata() == false @test @get_inbounds() == false @test @get_padding() == false end; @@ -69,17 +70,18 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t end; @reset_parallel_stencil() end; - @testset "2. initialization of ParallelStencil without numbertype and ndims, with memopt, inbounds and padding" begin + @testset "2. initialization of ParallelStencil without numbertype and ndims, with memopt, inbounds and padding (and nonconst_metadata)" begin @require !@is_initialized() - @init_parallel_stencil(package = $package, inbounds = true, padding = true, memopt = true) + @init_parallel_stencil(package = $package, inbounds = true, padding = false, memopt = true, nonconst_metadata = true) @testset "initialized" begin @test @is_initialized() @test @get_package() == $package @test @get_numbertype() == NUMBERTYPE_NONE @test @get_ndims() == NDIMS_NONE @test @get_memopt() == true + @test @get_nonconst_metadata() == true @test @get_inbounds() == true - @test @get_padding() == true + @test @get_padding() == false #TODO: this needs to be restored to true when Polyester supports padding. end; @testset "Data" begin @test @isdefined(Data) @@ -105,14 +107,15 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t set_memopt(@__MODULE__, false) set_inbounds(@__MODULE__, false) set_padding(@__MODULE__, false) + set_nonconst_metadata(@__MODULE__, false) @require is_initialized(@__MODULE__) - @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :Threads, Float64, 3, false, false, false) - @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float32, 3, false, false, false) - @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float64, 2, false, false, false) - @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float64, 3, true, false, false) - @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float64, 3, false, true, false) - @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float64, 3, false, false, true) - @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :AMDGPU, Float16, 1, true, false, true) + @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :Threads, Float64, 3, false, false, false, false) + @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float32, 3, false, false, false, false) + @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float64, 2, false, false, false, false) + @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float64, 3, true, false, false, false) + @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float64, 3, false, true, false, false) + @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :CUDA, Float64, 3, false, false, true, false) + @test_throws IncoherentCallError check_already_initialized(@__MODULE__, :AMDGPU, Float16, 1, true, false, true, false) set_initialized(@__MODULE__, false) set_package(@__MODULE__, PKG_NONE) set_numbertype(@__MODULE__, NUMBERTYPE_NONE) diff --git a/test/test_parallel.jl b/test/test_parallel.jl index a6be8377..f7882d22 100644 --- a/test/test_parallel.jl +++ b/test/test_parallel.jl @@ -1,11 +1,16 @@ using Test using ParallelStencil -import ParallelStencil: @reset_parallel_stencil, @is_initialized, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, PKG_METAL, PKG_THREADS, PKG_POLYESTER, INDICES -import ParallelStencil: @require, @prettystring, @gorgeousstring, @isgpu, @iscpu +import ParallelStencil: @reset_parallel_stencil, @is_initialized, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, PKG_METAL, PKG_THREADS, PKG_POLYESTER, INDICES, INDICES_INN, INDICES_DIR, ARRAYTYPES, FIELDTYPES, SCALARTYPES +import ParallelStencil: @require, @prettystring, @gorgeousstring, @isgpu, @iscpu, interpolate import ParallelStencil: checkargs_parallel, validate_body, parallel using ParallelStencil.Exceptions using ParallelStencil.FiniteDifferences3D +using ParallelStencil.FieldAllocators +import ParallelStencil.FieldAllocators: @XXYYZField, @XYYZZField ix, iy, iz = INDICES[1], INDICES[2], INDICES[3] +ixi, iyi, izi = INDICES_INN[1], INDICES_INN[2], INDICES_INN[3] +ixd, iyd, izd = INDICES_DIR[1], INDICES_DIR[2], INDICES_DIR[3] +ix_s, iy_s, iz_s = "var\"$ix\"", "var\"$iy\"", "var\"$iz\"" TEST_PACKAGES = SUPPORTED_PACKAGES @static if PKG_CUDA in TEST_PACKAGES import CUDA @@ -16,16 +21,18 @@ end if !AMDGPU.functional() TEST_PACKAGES = filter!(x->x≠PKG_AMDGPU, TEST_PACKAGES) end end @static if PKG_METAL in TEST_PACKAGES - import Metal - if !Metal.functional() TEST_PACKAGES = filter!(x->x≠PKG_METAL, TEST_PACKAGES) end + @static if Sys.isapple() + import Metal + if !Metal.functional() TEST_PACKAGES = filter!(x->x≠PKG_METAL, TEST_PACKAGES) end + else + TEST_PACKAGES = filter!(x->x≠PKG_METAL, TEST_PACKAGES) + end end @static if PKG_POLYESTER in TEST_PACKAGES import Polyester end Base.retry_load_extensions() # Potentially needed to load the extensions after the packages have been filtered. -import ParallelStencil.@gorgeousexpand - @static for package in TEST_PACKAGES FloatDefault = (package == PKG_METAL) ? Float32 : Float64 # Metal does not support Float64 @@ -34,7 +41,7 @@ eval(:( @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin @testset "1. parallel macros" begin @require !@is_initialized() - @init_parallel_stencil($package, $FloatDefault, 3) + @init_parallel_stencil($package, $FloatDefault, 3, nonconst_metadata=true) @require @is_initialized() @testset "@parallel " begin # NOTE: calls must go to ParallelStencil.ParallelKernel.parallel and must therefore give the same result as in ParallelKernel, except for memopt tests (tests copied 1-to-1 from there). @static if $package == $PKG_CUDA @@ -134,30 +141,22 @@ eval(:( expansion = @gorgeousstring(1, @parallel f(A, B, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) @test occursin("f(A, B, c::T, ranges::Tuple{UnitRange, UnitRange, UnitRange}, rangelength_x::Int64, rangelength_y::Int64, rangelength_z::Int64", expansion) end - @testset "Data.Array to Data.Device.Array" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel f(A::Data.Array, B::Data.Array, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::Data.Device.Array, B::Data.Device.Array,", expansion) - end - end - @testset "Data.Cell to Data.Device.Cell" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel f(A::Data.Cell, B::Data.Cell, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::Data.Device.Cell, B::Data.Device.Cell,", expansion) - end - end - @testset "Data.CellArray to Data.Device.CellArray" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel f(A::Data.CellArray, B::Data.CellArray, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::Data.Device.CellArray, B::Data.Device.CellArray,", expansion) + $(interpolate(:__T__, ARRAYTYPES, :( + @testset "Data.__T__ to Data.Device.__T__" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel f(A::Data.__T__, B::Data.__T__, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) + @test occursin("f(A::Data.Device.__T__, B::Data.Device.__T__,", expansion) + end end - end - @testset "Data.Fields.Field to Data.Fields.Device.Field" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel f(A::Data.Fields.Field, B::Data.Fields.Field, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::Data.Fields.Device.Field, B::Data.Fields.Device.Field,", expansion) + ))) + $(interpolate(:__T__, FIELDTYPES, :( + @testset "Data.Fields.__T__ to Data.Fields.Device.__T__" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel f(A::Data.Fields.__T__, B::Data.Fields.__T__, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) + @test occursin("f(A::Data.Fields.Device.__T__, B::Data.Fields.Device.__T__,", expansion) + end end - end + ))) # NOTE: the following GPU tests fail, because the Fields module cannot be imported. # @testset "Fields.Field to Data.Fields.Device.Field" begin # @static if @isgpu($package) @@ -173,30 +172,22 @@ eval(:( # @test occursin("f(A::Data.Fields.Device.Field, B::Data.Fields.Device.Field,", expansion) # end # end - @testset "TData.Array to TData.Device.Array" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel f(A::TData.Array, B::TData.Array, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::TData.Device.Array, B::TData.Device.Array,", expansion) - end - end - @testset "TData.Cell to TData.Device.Cell" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel f(A::TData.Cell, B::TData.Cell, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::TData.Device.Cell, B::TData.Device.Cell,", expansion) - end - end - @testset "TData.CellArray to TData.Device.CellArray" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel f(A::TData.CellArray, B::TData.CellArray, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::TData.Device.CellArray, B::TData.Device.CellArray,", expansion) + $(interpolate(:__T__, ARRAYTYPES, :( + @testset "TData.__T__ to TData.Device.__T__" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel f(A::TData.__T__, B::TData.__T__, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) + @test occursin("f(A::TData.Device.__T__, B::TData.Device.__T__,", expansion) + end end - end - @testset "TData.Fields.Field to TData.Fields.Device.Field" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel f(A::TData.Fields.Field, B::TData.Fields.Field, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::TData.Fields.Device.Field, B::TData.Fields.Device.Field,", expansion) + ))) + $(interpolate(:__T__, FIELDTYPES, :( + @testset "TData.Fields.__T__ to TData.Fields.Device.__T__" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel f(A::TData.Fields.__T__, B::TData.Fields.__T__, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) + @test occursin("f(A::TData.Fields.Device.__T__, B::TData.Fields.Device.__T__,", expansion) + end end - end + ))) # NOTE: the following GPU tests fail, because the Fields module cannot be imported. # @testset "Fields.Field to TData.Fields.Device.Field" begin # @static if @isgpu($package) @@ -244,651 +235,668 @@ eval(:( + ((T[2:end-1,2:end-1,3:end ] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,2:end-1,1:end-2])).*_dz^2) ); @test all(Array(T2) .== Array(T2_ref)) - end + end @static if $package in [$PKG_CUDA, $PKG_AMDGPU] # TODO add support for Metal - @testset "@parallel memopt (nx, ny, nz = x .* threads)" begin # NOTE: the following does not work for some reason: (nx, ny, nz = ($nx, $ny, $nz))" for (nx, ny, nz) in ((32, 8, 9), (32, 8, 8), (31, 7, 9), (33, 9, 9), (33, 7, 8)) - nx, ny, nz = 32, 8, 8 - # threads = (8, 4, 1) - # blocks = ceil.(Int, (nx/threads[1], ny/threads[2], nz/LOOPSIZE)) - # shmem = (threads[1]+2)*(threads[2]+2)*sizeof(Float64) - @testset "@parallel_indices (3D, memopt, stencilranges=0:0)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - @parallel_indices (ix,iy,iz) memopt=true loopsize=3 optvars=A optranges=(A=(0:0,0:0,0:0),) function copy_memopt!(A2, A) - A2[ix,iy,iz] = A[ix,iy,iz] - return - end - @parallel memopt=true copy_memopt!(A2, A); - @test all(Array(A2) .== Array(A)) - end - @testset "@parallel (3D, memopt, stencilranges=0:0)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - @parallel memopt=true loopsize=3 optvars=A optranges=(A=(0:0,0:0,0:0),) function copy_memopt!(A2, A) - @all(A2) = @all(A) - return - end - @parallel memopt=true copy_memopt!(A2, A); - @test all(Array(A2) .== Array(A)) - end - @testset "@parallel_indices (3D, memopt, stencilranges=(0:0, 0:0, -1:1); z-stencil)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - A2_ref = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - @parallel_indices (ix,iy,iz) memopt=true loopsize=3 function d2_memopt!(A2, A) - if (iz>1 && iz (nx, ny, nz = x .* threads)" begin # NOTE: the following does not work for some reason: (nx, ny, nz = ($nx, $ny, $nz))" for (nx, ny, nz) in ((32, 8, 9), (32, 8, 8), (31, 7, 9), (33, 9, 9), (33, 7, 8)) + nxyz = (32, 8, 8) + # threads = (8, 4, 1) + # blocks = ceil.(Int, (nx/threads[1], ny/threads[2], nz/LOOPSIZE)) + # shmem = (threads[1]+2)*(threads[2]+2)*sizeof(Float64) + @testset "@parallel_indices (3D, memopt, stencilranges=0:0)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + @parallel_indices (ix,iy,iz) memopt=true loopsize=3 optvars=A optranges=(A=(0:0,0:0,0:0),) function copy_memopt!(A2, A) + A2[ix,iy,iz] = A[ix,iy,iz] + return + end + @parallel memopt=true copy_memopt!(A2, A); + @test all(Array(A2) .== Array(A)) end - return - end - @parallel memopt=true d2_memopt!(A2, A); - A2_ref[:,:,2:end-1] .= A[:,:,3:end] .- 2*A[:,:,2:end-1] .+ A[:,:,1:end-2]; - @test all(Array(A2) .== Array(A2_ref)) - end - @testset "@parallel_indices (3D, memopt, stencilranges=(0:0, -1:1, 0:0); y-stencil)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - A2_ref = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - @parallel_indices (ix,iy,iz) memopt=true function d2_memopt!(A2, A) - if (iy>1 && iy (3D, memopt, stencilranges=0:0)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + @parallel memopt=true loopsize=3 optvars=A optranges=(A=(0:0,0:0,0:0),) function copy_memopt!(A2, A) + @all(A2) = @all(A) + return + end + @parallel memopt=true copy_memopt!(A2, A); + @test all(Array(A2) .== Array(A)) end - return - end - @parallel memopt=true d2_memopt!(A2, A); - A2_ref[:,2:end-1,:] .= A[:,3:end,:] .- 2*A[:,2:end-1,:] .+ A[:,1:end-2,:]; - @test all(Array(A2) .== Array(A2_ref)) - end - @testset "@parallel (3D, memopt, stencilranges=(1:1, 1:1, 0:2); z-stencil)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - A2_ref = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - @parallel memopt=true loopsize=3 function d2_memopt!(A2, A) - @inn(A2) = @d2_zi(A) - return - end - @parallel memopt=true d2_memopt!(A2, A); - A2_ref[2:end-1,2:end-1,2:end-1] .= (A[2:end-1,2:end-1,3:end] .- A[2:end-1,2:end-1,2:end-1]) .- (A[2:end-1,2:end-1,2:end-1] .- A[2:end-1,2:end-1,1:end-2]); - @test all(Array(A2) .== Array(A2_ref)) - end - @testset "@parallel (3D, memopt, stencilranges=(1:1, 0:2, 1:1); y-stencil)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - A2_ref = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - @parallel memopt=true loopsize=3 function d2_memopt!(A2, A) - @inn(A2) = @d2_yi(A) - return - end - @parallel memopt=true d2_memopt!(A2, A); - A2_ref[2:end-1,2:end-1,2:end-1] .= (A[2:end-1,3:end,2:end-1] .- A[2:end-1,2:end-1,2:end-1]) .- (A[2:end-1,2:end-1,2:end-1] .- A[2:end-1,1:end-2,2:end-1]); - @test all(Array(A2) .== Array(A2_ref)) - end - @testset "@parallel_indices (3D, memopt, stencilranges=-1:1)" begin - lam=dt=_dx=_dy=_dz = $FloatDefault(1) - T = @zeros(nx, ny, nz); - T2 = @zeros(nx, ny, nz); - T2_ref = @zeros(nx, ny, nz); - Ci = @ones(nx, ny, nz); - copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); - @parallel_indices (ix,iy,iz) memopt=true loopsize=3 function diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy, _dz) - if (ix>1 && ix1 && iy1 && iz (3D, memopt, stencilranges=(0:0, 0:0, -1:1); z-stencil)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + A2_ref = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + @parallel_indices (ix,iy,iz) memopt=true loopsize=3 function d2_memopt!(A2, A) + if (iz>1 && iz (3D, memopt, stencilranges=0:2)" begin - lam=dt=_dx=_dy=_dz = 1 - T = @zeros(nx, ny, nz); - T2 = @zeros(nx, ny, nz); - T2_ref = @zeros(nx, ny, nz); - Ci = @ones(nx, ny, nz); - copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); - @parallel memopt=true loopsize=3 function diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy, _dz) - @inn(T2) = @inn(T) + dt*(lam*@inn(Ci)*(@d2_xi(T)*_dx^2 + @d2_yi(T)*_dy^2 + @d2_zi(T)*_dz^2)) - return - end - @parallel memopt=true diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy, _dz); - T2_ref[2:end-1,2:end-1,2:end-1] .= T[2:end-1,2:end-1,2:end-1] .+ dt.*(lam.*Ci[2:end-1,2:end-1,2:end-1].*( - ((T[3:end ,2:end-1,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[1:end-2,2:end-1,2:end-1])).*_dx^2 - + ((T[2:end-1,3:end ,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,1:end-2,2:end-1])).*_dy^2 - + ((T[2:end-1,2:end-1,3:end ] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,2:end-1,1:end-2])).*_dz^2) - ); - @test all(Array(T2) .== Array(T2_ref)) - end - @testset "@parallel_indices (3D, memopt, stencilranges=(-4:-1, 2:2, -2:3); x-z-stencil, y-shift)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - A2_ref = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - @parallel_indices (ix,iy,iz) memopt=true loopsize=3 function higher_order_memopt!(A2, A) - if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) - A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + @testset "@parallel_indices (3D, memopt, stencilranges=(0:0, -1:1, 0:0); y-stencil)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + A2_ref = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + @parallel_indices (ix,iy,iz) memopt=true function d2_memopt!(A2, A) + if (iy>1 && iy (3D, memopt, stencilranges=0:2; on-the-fly)" begin - lam=dt=_dx=_dy=_dz = $FloatDefault(1) - T = @zeros(nx, ny, nz); - T2 = @zeros(nx, ny, nz); - T2_ref = @zeros(nx, ny, nz); - Ci = @ones(nx, ny, nz); - copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); - @parallel memopt=true loopsize=3 function diffusion3D_step!(T2, T, Ci, lam::Data.Number, dt::$FloatDefault, _dx, _dy, _dz) - @all(qx) = -lam*@d_xi(T)*_dx # Fourier's law of heat conduction - @all(qy) = -lam*@d_yi(T)*_dy # ... - @all(qz) = -lam*@d_zi(T)*_dz # ... - @all(dTdt) = @inn(Ci)*(-@d_xa(qx)*_dx - @d_ya(qy)*_dy - @d_za(qz)*_dz) # Conservation of energy - @inn(T2) = @inn(T) + dt*@all(dTdt) # Update of temperature - return - end - @parallel memopt=true diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy, _dz); - T2_ref[2:end-1,2:end-1,2:end-1] .= T[2:end-1,2:end-1,2:end-1] .+ dt.*(lam.*Ci[2:end-1,2:end-1,2:end-1].*( - ((T[3:end ,2:end-1,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[1:end-2,2:end-1,2:end-1])).*_dx^2 - + ((T[2:end-1,3:end ,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,1:end-2,2:end-1])).*_dy^2 - + ((T[2:end-1,2:end-1,3:end ] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,2:end-1,1:end-2])).*_dz^2) - ); - @test all(Array(T2) .== Array(T2_ref)) - end - @testset "@parallel (3D, memopt, stencilranges=0:0; 2 arrays)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - B = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - copy!(B, 2 .* [ix + (iy-1)*size(B,1) + (iz-1)*size(B,1)*size(B,2) for ix=1:size(B,1), iy=1:size(B,2), iz=1:size(B,3)].^3); - @parallel memopt=true loopsize=3 optvars=(A, B) optranges=(A=(0:0,0:0,0:0), B=(0:0,0:0,0:0)) function copy_memopt!(A2, A, B) - @all(A2) = @all(A) + @all(B) - return - end - @parallel memopt=true copy_memopt!(A2, A, B); - @test all(Array(A2) .== Array(A) .+ Array(B)) - end - @testset "@parallel_indices (3D, memopt; 2 arrays, z-stencil)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - A2_ref = @zeros(nx, ny, nz); - B = @zeros(nx, ny, nz-1); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - copy!(B, 2 .* [ix + (iy-1)*size(B,1) + (iz-1)*size(B,1)*size(B,2) for ix=1:size(B,1), iy=1:size(B,2), iz=1:size(B,3)].^3); - @parallel_indices (ix,iy,iz) memopt=true loopsize=3 function d2_memopt!(A2, A, B) - if (iz>1 && iz (3D, memopt, stencilranges=(1:1, 1:1, 0:2); z-stencil)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + A2_ref = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + @parallel memopt=true loopsize=3 function d2_memopt!(A2, A) + @inn(A2) = @d2_zi(A) + return + end + @parallel memopt=true d2_memopt!(A2, A); + A2_ref[2:end-1,2:end-1,2:end-1] .= (A[2:end-1,2:end-1,3:end] .- A[2:end-1,2:end-1,2:end-1]) .- (A[2:end-1,2:end-1,2:end-1] .- A[2:end-1,2:end-1,1:end-2]); + @test all(Array(A2) .== Array(A2_ref)) end - return - end - @parallel memopt=true d2_memopt!(A2, A, B); - A2_ref[:,:,2:end-1] .= A[:,:,3:end] .- 2*A[:,:,2:end-1] .+ A[:,:,1:end-2] .+ B[:,:,2:end] .- B[:,:,1:end-1]; - @test all(Array(A2) .== Array(A2_ref)) - end - @testset "@parallel_indices (3D, memopt; 2 arrays, y-stencil)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - A2_ref = @zeros(nx, ny, nz); - B = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - copy!(B, 2 .* [ix + (iy-1)*size(B,1) + (iz-1)*size(B,1)*size(B,2) for ix=1:size(B,1), iy=1:size(B,2), iz=1:size(B,3)].^3); - @parallel_indices (ix,iy,iz) memopt=true loopsize=3 function d2_memopt!(A2, A, B) - if (iy>1 && iy (3D, memopt, stencilranges=(1:1, 0:2, 1:1); y-stencil)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + A2_ref = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + @parallel memopt=true loopsize=3 function d2_memopt!(A2, A) + @inn(A2) = @d2_yi(A) + return + end + @parallel memopt=true d2_memopt!(A2, A); + A2_ref[2:end-1,2:end-1,2:end-1] .= (A[2:end-1,3:end,2:end-1] .- A[2:end-1,2:end-1,2:end-1]) .- (A[2:end-1,2:end-1,2:end-1] .- A[2:end-1,1:end-2,2:end-1]); + @test all(Array(A2) .== Array(A2_ref)) end - return - end - @parallel memopt=true d2_memopt!(A2, A, B); - A2_ref[:,2:end-1,:] .= (((A[:,3:end,:] .- 2*A[:,2:end-1,:]) .+ A[:,1:end-2,:] .+ B[:,3:end,:]) .- 2*B[:,2:end-1,:]) .+ B[:,1:end-2,:]; - @test all(Array(A2) .== Array(A2_ref)) - end - @testset "@parallel_indices (3D, memopt; 2 arrays, x-stencil)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - A2_ref = @zeros(nx, ny, nz); - B = @zeros(nx-1, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - copy!(B, 2 .* [ix + (iy-1)*size(B,1) + (iz-1)*size(B,1)*size(B,2) for ix=1:size(B,1), iy=1:size(B,2), iz=1:size(B,3)].^3); - @parallel_indices (ix,iy,iz) memopt=true function d2_memopt!(A2, A, B) - if (ix>1 && ix (3D, memopt, stencilranges=-1:1)" begin + lam=dt=_dx=_dy=_dz = $FloatDefault(1) + T = @Field(nxyz); + T2 = @Field(nxyz); + T2_ref = @Field(nxyz); + Ci = @Field(nxyz, @ones); + copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); + @parallel_indices (ix,iy,iz) memopt=true loopsize=3 function diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy, _dz) + if (ix>1 && ix1 && iy1 && iz (3D, memopt; 2 arrays, x-y-z- + z-stencil)" begin - lam=dt=_dx=_dy=_dz = $FloatDefault(1) - T = @zeros(nx, ny, nz); - T2 = @zeros(nx, ny, nz); - T2_ref = @zeros(nx, ny, nz); - Ci = @zeros(nx, ny, nz-1); - copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); - copy!(Ci, 2 .* [ix + (iy-1)*size(Ci,1) + (iz-1)*size(Ci,1)*size(Ci,2) for ix=1:size(Ci,1), iy=1:size(Ci,2), iz=1:size(Ci,3)].^3); - @parallel memopt=true loopsize=3 function diffusion3D_step_modified!(T2, T, Ci, lam, dt, _dx, _dy, _dz) - @inn(T2) = @inn(T) + dt*(lam*@d_zi(Ci)*(@d2_xi(T)*_dx^2 + @d2_yi(T)*_dy^2 + @d2_zi(T)*_dz^2)) - return - end - @parallel memopt=true diffusion3D_step_modified!(T2, T, Ci, lam, dt, _dx, _dy, _dz); - T2_ref[2:end-1,2:end-1,2:end-1] .= T[2:end-1,2:end-1,2:end-1] .+ dt.*(lam.*(Ci[2:end-1,2:end-1,2:end] .- Ci[2:end-1,2:end-1,1:end-1]).*( - ((T[3:end ,2:end-1,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[1:end-2,2:end-1,2:end-1])).*_dx^2 - + ((T[2:end-1,3:end ,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,1:end-2,2:end-1])).*_dy^2 - + ((T[2:end-1,2:end-1,3:end ] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,2:end-1,1:end-2])).*_dz^2) - ); - @test all(Array(T2) .== Array(T2_ref)) - end - @testset "@parallel (3D, memopt; 2 arrays, x-y-z- + x-stencil)" begin - lam=dt=_dx=_dy=_dz = $FloatDefault(1) - T = @zeros(nx, ny, nz); - T2 = @zeros(nx, ny, nz); - T2_ref = @zeros(nx, ny, nz); - Ci = @zeros(nx-1, ny, nz); - copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); - copy!(Ci, 2 .* [ix + (iy-1)*size(Ci,1) + (iz-1)*size(Ci,1)*size(Ci,2) for ix=1:size(Ci,1), iy=1:size(Ci,2), iz=1:size(Ci,3)].^3); - @parallel memopt=true loopsize=3 function diffusion3D_step_modified!(T2, T, Ci, lam, dt, _dx, _dy, _dz) - @inn(T2) = @inn(T) + dt*(lam*@d_xi(Ci)*(@d2_xi(T)*_dx^2 + @d2_yi(T)*_dy^2 + @d2_zi(T)*_dz^2)) - return - end - @parallel memopt=true diffusion3D_step_modified!(T2, T, Ci, lam, dt, _dx, _dy, _dz); - T2_ref[2:end-1,2:end-1,2:end-1] .= T[2:end-1,2:end-1,2:end-1] .+ dt.*(lam.*(Ci[2:end,2:end-1,2:end-1] .- Ci[1:end-1,2:end-1,2:end-1]).*( - ((T[3:end ,2:end-1,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[1:end-2,2:end-1,2:end-1])).*_dx^2 - + ((T[2:end-1,3:end ,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,1:end-2,2:end-1])).*_dy^2 - + ((T[2:end-1,2:end-1,3:end ] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,2:end-1,1:end-2])).*_dz^2) - ); - @test all(Array(T2) .== Array(T2_ref)) - end - @testset "@parallel (3D, memopt; 3 arrays, x-y-z- + y- + x-stencil)" begin - lam=dt=_dx=_dy=_dz = $FloatDefault(1) - T = @zeros(nx, ny, nz); - T2 = @zeros(nx, ny, nz); - T2_ref = @zeros(nx, ny, nz); - Ci = @zeros(nx-1, ny, nz); - B = @zeros(nx, ny, nz); - copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); - copy!(Ci, 2 .* [ix + (iy-1)*size(Ci,1) + (iz-1)*size(Ci,1)*size(Ci,2) for ix=1:size(Ci,1), iy=1:size(Ci,2), iz=1:size(Ci,3)].^3); - copy!(B, 3 .* [ix + (iy-1)*size(B,1) + (iz-1)*size(B,1)*size(B,2) for ix=1:size(B,1), iy=1:size(B,2), iz=1:size(B,3)].^3); - @parallel memopt=true loopsize=3 function diffusion3D_step_modified!(T2, T, Ci, B, lam, dt, _dx, _dy, _dz) - @inn(T2) = @inn(T) + dt*(lam*@d_xi(Ci)*(@d2_xi(T)*_dx^2 + @d2_yi(T)*_dy^2 + @d2_zi(T)*_dz^2)) + @d2_yi(B) - return - end - @parallel memopt=true diffusion3D_step_modified!(T2, T, Ci, B, lam, dt, _dx, _dy, _dz); - T2_ref[2:end-1,2:end-1,2:end-1] .= T[2:end-1,2:end-1,2:end-1] .+ dt.*(lam.*(Ci[2:end,2:end-1,2:end-1] .- Ci[1:end-1,2:end-1,2:end-1]).*( - ((T[3:end ,2:end-1,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[1:end-2,2:end-1,2:end-1])).*_dx^2 - + ((T[2:end-1,3:end ,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,1:end-2,2:end-1])).*_dy^2 - + ((T[2:end-1,2:end-1,3:end ] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,2:end-1,1:end-2])).*_dz^2) - ) + ((B[2:end-1,3:end ,2:end-1] .- B[2:end-1,2:end-1,2:end-1]) .- (B[2:end-1,2:end-1,2:end-1] .- B[2:end-1,1:end-2,2:end-1])); - @test all(Array(T2) .== Array(T2_ref)) - end - @testset "@parallel_indices (3D, memopt, stencilranges=(-4:-1, 2:2, -2:3); 3 arrays, x-z-stencil, y-shift)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - A2_ref = @zeros(nx, ny, nz); - B = @zeros(nx, ny, nz); - B2 = @zeros(nx, ny, nz); - B2_ref = @zeros(nx, ny, nz); - C = @zeros(nx, ny, nz); - C2 = @zeros(nx, ny, nz); - C2_ref = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - copy!(B, 2 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - copy!(C, 3 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - @parallel_indices (ix,iy,iz) memopt=true loopsize=3 function higher_order_memopt!(A2, B2, C2, A, B, C) - if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) - A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + @testset "@parallel (3D, memopt, stencilranges=0:2)" begin + lam=dt=_dx=_dy=_dz = 1 + T = @Field(nxyz); + T2 = @Field(nxyz); + T2_ref = @Field(nxyz); + Ci = @Field(nxyz, @ones); + copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); + @parallel memopt=true loopsize=3 function diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy, _dz) + @inn(T2) = @inn(T) + dt*(lam*@inn(Ci)*(@d2_xi(T)*_dx^2 + @d2_yi(T)*_dy^2 + @d2_zi(T)*_dz^2)) + return + end + @parallel memopt=true diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy, _dz); + T2_ref[2:end-1,2:end-1,2:end-1] .= T[2:end-1,2:end-1,2:end-1] .+ dt.*(lam.*Ci[2:end-1,2:end-1,2:end-1].*( + ((T[3:end ,2:end-1,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[1:end-2,2:end-1,2:end-1])).*_dx^2 + + ((T[2:end-1,3:end ,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,1:end-2,2:end-1])).*_dy^2 + + ((T[2:end-1,2:end-1,3:end ] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,2:end-1,1:end-2])).*_dz^2) + ); + @test all(Array(T2) .== Array(T2_ref)) end - if (ix-4>1 && ix-11 && iy+2<=size(B2,2) && iz-2>=1 && iz+3<=size(B2,3)) - B2[ix-1,iy+2,iz] = B[ix-1,iy+2,iz+3] - 2*B[ix-3,iy+2,iz] + B[ix-4,iy+2,iz-2] + @testset "@parallel_indices (3D, memopt, stencilranges=(-4:-1, 2:2, -2:3); x-z-stencil, y-shift)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + A2_ref = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + @parallel_indices (ix,iy,iz) memopt=true loopsize=3 function higher_order_memopt!(A2, A) + if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) + A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + end + return + end + @parallel memopt=true higher_order_memopt!(A2, A); + A2_ref[5:end-1,3:end,3:end-3] .= A[5:end-1,3:end,6:end] .- 2*A[3:end-3,3:end,3:end-3] .+ A[2:end-4,3:end,1:end-5]; + @test all(Array(A2) .== Array(A2_ref)) end - if (ix-4>1 && ix-11 && iy+2<=size(C2,2) && iz-2>=1 && iz+3<=size(C2,3)) - C2[ix-1,iy+2,iz] = C[ix-1,iy+2,iz+3] - 2*C[ix-3,iy+2,iz] + C[ix-4,iy+2,iz-2] + @testset "@parallel (3D, memopt, stencilranges=0:2; on-the-fly)" begin + lam=dt=_dx=_dy=_dz = $FloatDefault(1) + T = @Field(nxyz); + T2 = @Field(nxyz); + T2_ref = @Field(nxyz); + Ci = @Field(nxyz, @ones); + copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); + @parallel memopt=true loopsize=3 function diffusion3D_step!(T2, T, Ci, lam::Data.Number, dt::$FloatDefault, _dx, _dy, _dz) + @all(qx) = -lam*@d_xi(T)*_dx # Fourier's law of heat conduction + @all(qy) = -lam*@d_yi(T)*_dy # ... + @all(qz) = -lam*@d_zi(T)*_dz # ... + @all(dTdt) = @inn(Ci)*(-@d_xa(qx)*_dx - @d_ya(qy)*_dy - @d_za(qz)*_dz) # Conservation of energy + @inn(T2) = @inn(T) + dt*@all(dTdt) # Update of temperature + return + end + @parallel memopt=true diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy, _dz); + T2_ref[2:end-1,2:end-1,2:end-1] .= T[2:end-1,2:end-1,2:end-1] .+ dt.*(lam.*Ci[2:end-1,2:end-1,2:end-1].*( + ((T[3:end ,2:end-1,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[1:end-2,2:end-1,2:end-1])).*_dx^2 + + ((T[2:end-1,3:end ,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,1:end-2,2:end-1])).*_dy^2 + + ((T[2:end-1,2:end-1,3:end ] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,2:end-1,1:end-2])).*_dz^2) + ); + @test all(Array(T2) .== Array(T2_ref)) end - return - end - @parallel memopt=true higher_order_memopt!(A2, B2, C2, A, B, C); - A2_ref[5:end-1,3:end,3:end-3] .= A[5:end-1,3:end,6:end] .- 2*A[3:end-3,3:end,3:end-3] .+ A[2:end-4,3:end,1:end-5]; - B2_ref[5:end-1,3:end,3:end-3] .= B[5:end-1,3:end,6:end] .- 2*B[3:end-3,3:end,3:end-3] .+ B[2:end-4,3:end,1:end-5]; - C2_ref[5:end-1,3:end,3:end-3] .= C[5:end-1,3:end,6:end] .- 2*C[3:end-3,3:end,3:end-3] .+ C[2:end-4,3:end,1:end-5]; - @test all(Array(A2) .== Array(A2_ref)) - @test all(Array(B2) .== Array(B2_ref)) - @test all(Array(C2) .== Array(C2_ref)) - end - @testset "@parallel_indices (3D, memopt, stencilranges=(A=(-4:-1, 2:2, -2:3), B=(-4:-1, 2:2, 1:2), C=(-4:-1, 2:2, -1:0)); 3 arrays, x-z-stencil, y-shift)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - A2_ref = @zeros(nx, ny, nz); - B = @zeros(nx, ny, nz); - B2 = @zeros(nx, ny, nz); - B2_ref = @zeros(nx, ny, nz); - C = @zeros(nx, ny, nz); - C2 = @zeros(nx, ny, nz); - C2_ref = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - copy!(B, 2 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - copy!(C, 3 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - @parallel_indices (ix,iy,iz) memopt=true loopsize=3 function higher_order_memopt!(A2, B2, C2, A, B, C) - if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) - A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + @testset "@parallel (3D, memopt, stencilranges=0:0; 2 arrays)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + B = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + copy!(B, 2 .* [ix + (iy-1)*size(B,1) + (iz-1)*size(B,1)*size(B,2) for ix=1:size(B,1), iy=1:size(B,2), iz=1:size(B,3)].^3); + @parallel memopt=true loopsize=3 optvars=(A, B) optranges=(A=(0:0,0:0,0:0), B=(0:0,0:0,0:0)) function copy_memopt!(A2, A, B) + @all(A2) = @all(A) + @all(B) + return + end + @parallel memopt=true copy_memopt!(A2, A, B); + @test all(Array(A2) .== Array(A) .+ Array(B)) + end + @testset "@parallel_indices (3D, memopt; 2 arrays, z-stencil)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + A2_ref = @Field(nxyz); + B = @XXYYZField(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + copy!(B, 2 .* [ix + (iy-1)*size(B,1) + (iz-1)*size(B,1)*size(B,2) for ix=1:size(B,1), iy=1:size(B,2), iz=1:size(B,3)].^3); + @parallel_indices (ix,iy,iz) memopt=true loopsize=3 function d2_memopt!(A2, A, B) + if (iz>1 && iz1 && ix-11 && iy+2<=size(B2,2) && iz+1>=1 && iz+2<=size(B2,3)) - B2[ix-1,iy+2,iz+1] = B[ix-1,iy+2,iz+2] - 2*B[ix-3,iy+2,iz+1] + B[ix-4,iy+2,iz+1] + @testset "@parallel_indices (3D, memopt; 2 arrays, y-stencil)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + A2_ref = @Field(nxyz); + B = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + copy!(B, 2 .* [ix + (iy-1)*size(B,1) + (iz-1)*size(B,1)*size(B,2) for ix=1:size(B,1), iy=1:size(B,2), iz=1:size(B,3)].^3); + @parallel_indices (ix,iy,iz) memopt=true loopsize=3 function d2_memopt!(A2, A, B) + if (iy>1 && iy1 && ix-11 && iy+2<=size(C2,2) && iz-1>=1 && iz<=size(C2,3)) - C2[ix-1,iy+2,iz-1] = C[ix-1,iy+2,iz] - 2*C[ix-3,iy+2,iz-1] + C[ix-4,iy+2,iz-1] + @testset "@parallel_indices (3D, memopt; 2 arrays, x-stencil)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + A2_ref = @Field(nxyz); + B = @XYYZZField(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + copy!(B, 2 .* [ix + (iy-1)*size(B,1) + (iz-1)*size(B,1)*size(B,2) for ix=1:size(B,1), iy=1:size(B,2), iz=1:size(B,3)].^3); + @parallel_indices (ix,iy,iz) memopt=true function d2_memopt!(A2, A, B) + if (ix>1 && ix (3D, memopt, stencilranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)); 3 arrays, x-z-stencil, y-shift)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - A2_ref = @zeros(nx, ny, nz); - B = @zeros(nx, ny, nz); - B2 = @zeros(nx, ny, nz); - B2_ref = @zeros(nx, ny, nz); - C = @zeros(nx, ny, nz); - C2 = @zeros(nx, ny, nz); - C2_ref = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - copy!(B, 2 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - copy!(C, 3 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - @parallel_indices (ix,iy,iz) memopt=true loopsize=3 function higher_order_memopt!(A2, B2, C2, A, B, C) - if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) - A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] - end - if (ix-4>1 && ix+11 && iy+2<=size(B2,2) && iz+1>=1 && iz+2<=size(B2,3)) - B2[ix+1,iy+2,iz+1] = B[ix+1,iy+2,iz+2] - 2*B[ix-3,iy+2,iz+1] + B[ix-4,iy+2,iz+1] - end - if (ix-1>1 && ix-11 && iy+2<=size(C2,2) && iz-1>=1 && iz<=size(C2,3)) - C2[ix-1,iy+2,iz-1] = C[ix-1,iy+2,iz] - 2*C[ix-1,iy+2,iz-1] + C[ix-1,iy+2,iz-1] - end - return - end - @parallel memopt=true higher_order_memopt!(A2, B2, C2, A, B, C); - A2_ref[5:end-1,3:end,3:end-3] .= A[5:end-1,3:end,6:end] .- 2*A[3:end-3,3:end,3:end-3] .+ A[2:end-4,3:end,1:end-5]; - B2_ref[7:end-1,3:end,2:end-1] .= B[7:end-1,3:end,3:end] .- 2*B[3:end-5,3:end,2:end-1] .+ B[2:end-6,3:end,2:end-1]; - C2_ref[2:end-1,3:end,1:end-1] .= C[2:end-1,3:end,2:end] .- 2*C[2:end-1,3:end,1:end-1] .+ C[2:end-1,3:end,1:end-1]; - @test all(Array(A2) .== Array(A2_ref)) - @test all(Array(B2) .== Array(B2_ref)) - @test all(Array(C2) .== Array(C2_ref)) - end - @testset "@parallel_indices (3D, memopt, optvars=(A, C), loopdim=3, loopsize=3, optranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)); stencilranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)), 3 arrays, x-z-stencil, y-shift)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - A2_ref = @zeros(nx, ny, nz); - B = @zeros(nx, ny, nz); - B2 = @zeros(nx, ny, nz); - B2_ref = @zeros(nx, ny, nz); - C = @zeros(nx, ny, nz); - C2 = @zeros(nx, ny, nz); - C2_ref = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - copy!(B, 2 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - copy!(C, 3 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - kernel = @gorgeousstring @parallel_indices (ix,iy,iz) memopt=true optvars=(A, C) loopdim=3 loopsize=3 optranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)) function higher_order_memopt!(A2, B2, C2, A, B, C) - if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) - A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] - end - if (ix-4>1 && ix+11 && iy+2<=size(B2,2) && iz+1>=1 && iz+2<=size(B2,3)) - B2[ix+1,iy+2,iz+1] = B[ix+1,iy+2,iz+2] - 2*B[ix-3,iy+2,iz+1] + B[ix-4,iy+2,iz+1] - end - if (ix-1>1 && ix-11 && iy+2<=size(C2,2) && iz-1>=1 && iz<=size(C2,3)) - C2[ix-1,iy+2,iz-1] = C[ix-1,iy+2,iz] - 2*C[ix-1,iy+2,iz-1] + C[ix-1,iy+2,iz-1] - end - return - end - @static if $package == $PKG_CUDA - @test occursin("loopoffset = ((CUDA.blockIdx()).z - 1) * 3", kernel) - elseif $package == $PKG_AMDGPU - @test occursin("loopoffset = ((AMDGPU.workgroupIdx()).z - 1) * 3", kernel) - elseif $package == $PKG_METAL - @test occursin("loopoffset = ((Metal.threadgroup_position_in_grid_3d()).z - 1) * 3", kernel) - end - @test occursin("for i = -4:3", kernel) - @test occursin("tz = i + loopoffset", kernel) - @test occursin("A2[ix - 1, iy + 2, iz] = (A_ixm1_iyp2_izp3 - 2A_ixm3_iyp2_iz) + A_ixm4_iyp2_izm2", kernel) - @test occursin("B2[ix + 1, iy + 2, iz + 1] = (B[ix + 1, iy + 2, iz + 2] - 2 * B[ix - 3, iy + 2, iz + 1]) + B[ix - 4, iy + 2, iz + 1]", kernel) - @test occursin("C2[ix - 1, iy + 2, iz - 1] = (C_ixm1_iyp2_iz - 2C_ixm1_iyp2_izm1) + C_ixm1_iyp2_izm1", kernel) - @parallel_indices (ix,iy,iz) memopt=true optvars=(A, C) loopdim=3 loopsize=3 optranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)) function higher_order_memopt!(A2, B2, C2, A, B, C) - if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) - A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] - end - if (ix-4>1 && ix+11 && iy+2<=size(B2,2) && iz+1>=1 && iz+2<=size(B2,3)) - B2[ix+1,iy+2,iz+1] = B[ix+1,iy+2,iz+2] - 2*B[ix-3,iy+2,iz+1] + B[ix-4,iy+2,iz+1] - end - if (ix-1>1 && ix-11 && iy+2<=size(C2,2) && iz-1>=1 && iz<=size(C2,3)) - C2[ix-1,iy+2,iz-1] = C[ix-1,iy+2,iz] - 2*C[ix-1,iy+2,iz-1] + C[ix-1,iy+2,iz-1] - end - return - end - @parallel memopt=true higher_order_memopt!(A2, B2, C2, A, B, C); - A2_ref[5:end-1,3:end,3:end-3] .= A[5:end-1,3:end,6:end] .- 2*A[3:end-3,3:end,3:end-3] .+ A[2:end-4,3:end,1:end-5]; - B2_ref[7:end-1,3:end,2:end-1] .= B[7:end-1,3:end,3:end] .- 2*B[3:end-5,3:end,2:end-1] .+ B[2:end-6,3:end,2:end-1]; - C2_ref[2:end-1,3:end,1:end-1] .= C[2:end-1,3:end,2:end] .- 2*C[2:end-1,3:end,1:end-1] .+ C[2:end-1,3:end,1:end-1]; - @test all(Array(A2) .== Array(A2_ref)) - @test all(Array(B2) .== Array(B2_ref)) - @test all(Array(C2) .== Array(C2_ref)) - end - @testset "@parallel_indices (3D, memopt, optvars=(A, C), loopdim=3, loopsize=3, optranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)); stencilranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)), 3 arrays, x-z-stencil, y-shift)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - A2_ref = @zeros(nx, ny, nz); - B = @zeros(nx, ny, nz); - B2 = @zeros(nx, ny, nz); - B2_ref = @zeros(nx, ny, nz); - C = @zeros(nx, ny, nz); - C2 = @zeros(nx, ny, nz); - C2_ref = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - copy!(B, 2 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - copy!(C, 3 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - kernel = @gorgeousstring @parallel_indices (ix,iy,iz) memopt=true optvars=(A, C) loopdim=3 loopsize=3 optranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)) function higher_order_memopt!(A2, B2, C2, A, B, C) - if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) - A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + @testset "@parallel (3D, memopt; 2 arrays, x-y-z- + z-stencil)" begin + lam=dt=_dx=_dy=_dz = $FloatDefault(1) + T = @Field(nxyz); + T2 = @Field(nxyz); + T2_ref = @Field(nxyz); + Ci = @XXYYZField(nxyz); + copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); + copy!(Ci, 2 .* [ix + (iy-1)*size(Ci,1) + (iz-1)*size(Ci,1)*size(Ci,2) for ix=1:size(Ci,1), iy=1:size(Ci,2), iz=1:size(Ci,3)].^3); + @parallel memopt=true loopsize=3 function diffusion3D_step_modified!(T2, T, Ci, lam, dt, _dx, _dy, _dz) + @inn(T2) = @inn(T) + dt*(lam*@d_zi(Ci)*(@d2_xi(T)*_dx^2 + @d2_yi(T)*_dy^2 + @d2_zi(T)*_dz^2)) + return + end + @parallel memopt=true diffusion3D_step_modified!(T2, T, Ci, lam, dt, _dx, _dy, _dz); + T2_ref[2:end-1,2:end-1,2:end-1] .= T[2:end-1,2:end-1,2:end-1] .+ dt.*(lam.*(Ci[2:end-1,2:end-1,2:end] .- Ci[2:end-1,2:end-1,1:end-1]).*( + ((T[3:end ,2:end-1,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[1:end-2,2:end-1,2:end-1])).*_dx^2 + + ((T[2:end-1,3:end ,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,1:end-2,2:end-1])).*_dy^2 + + ((T[2:end-1,2:end-1,3:end ] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,2:end-1,1:end-2])).*_dz^2) + ); + @test all(Array(T2) .== Array(T2_ref)) end - if (ix-4>1 && ix+11 && iy+2<=size(B2,2) && iz+1>=1 && iz+2<=size(B2,3)) - B2[ix+1,iy+2,iz+1] = B[ix+1,iy+2,iz+2] - 2*B[ix-3,iy+2,iz+1] + B[ix-4,iy+2,iz+1] + @testset "@parallel (3D, memopt; 2 arrays, x-y-z- + x-stencil)" begin + lam=dt=_dx=_dy=_dz = $FloatDefault(1) + T = @Field(nxyz); + T2 = @Field(nxyz); + T2_ref = @Field(nxyz); + Ci = @XYYZZField(nxyz); + copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); + copy!(Ci, 2 .* [ix + (iy-1)*size(Ci,1) + (iz-1)*size(Ci,1)*size(Ci,2) for ix=1:size(Ci,1), iy=1:size(Ci,2), iz=1:size(Ci,3)].^3); + @parallel memopt=true loopsize=3 function diffusion3D_step_modified!(T2, T, Ci, lam, dt, _dx, _dy, _dz) + @inn(T2) = @inn(T) + dt*(lam*@d_xi(Ci)*(@d2_xi(T)*_dx^2 + @d2_yi(T)*_dy^2 + @d2_zi(T)*_dz^2)) + return + end + @parallel memopt=true diffusion3D_step_modified!(T2, T, Ci, lam, dt, _dx, _dy, _dz); + T2_ref[2:end-1,2:end-1,2:end-1] .= T[2:end-1,2:end-1,2:end-1] .+ dt.*(lam.*(Ci[2:end,2:end-1,2:end-1] .- Ci[1:end-1,2:end-1,2:end-1]).*( + ((T[3:end ,2:end-1,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[1:end-2,2:end-1,2:end-1])).*_dx^2 + + ((T[2:end-1,3:end ,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,1:end-2,2:end-1])).*_dy^2 + + ((T[2:end-1,2:end-1,3:end ] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,2:end-1,1:end-2])).*_dz^2) + ); + @test all(Array(T2) .== Array(T2_ref)) end - if (ix-1>1 && ix-11 && iy+2<=size(C2,2) && iz-1>=1 && iz<=size(C2,3)) - C2[ix-1,iy+2,iz-1] = C[ix-1,iy+2,iz] - 2*C[ix-1,iy+2,iz-1] + C[ix-1,iy+2,iz-1] + @testset "@parallel (3D, memopt; 3 arrays, x-y-z- + y- + x-stencil)" begin + lam=dt=_dx=_dy=_dz = $FloatDefault(1) + T = @Field(nxyz); + T2 = @Field(nxyz); + T2_ref = @Field(nxyz); + Ci = @XYYZZField(nxyz); + B = @Field(nxyz); + copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); + copy!(Ci, 2 .* [ix + (iy-1)*size(Ci,1) + (iz-1)*size(Ci,1)*size(Ci,2) for ix=1:size(Ci,1), iy=1:size(Ci,2), iz=1:size(Ci,3)].^3); + copy!(B, 3 .* [ix + (iy-1)*size(B,1) + (iz-1)*size(B,1)*size(B,2) for ix=1:size(B,1), iy=1:size(B,2), iz=1:size(B,3)].^3); + @parallel memopt=true loopsize=3 function diffusion3D_step_modified!(T2, T, Ci, B, lam, dt, _dx, _dy, _dz) + @inn(T2) = @inn(T) + dt*(lam*@d_xi(Ci)*(@d2_xi(T)*_dx^2 + @d2_yi(T)*_dy^2 + @d2_zi(T)*_dz^2)) + @d2_yi(B) + return + end + @parallel memopt=true diffusion3D_step_modified!(T2, T, Ci, B, lam, dt, _dx, _dy, _dz); + T2_ref[2:end-1,2:end-1,2:end-1] .= T[2:end-1,2:end-1,2:end-1] .+ dt.*(lam.*(Ci[2:end,2:end-1,2:end-1] .- Ci[1:end-1,2:end-1,2:end-1]).*( + ((T[3:end ,2:end-1,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[1:end-2,2:end-1,2:end-1])).*_dx^2 + + ((T[2:end-1,3:end ,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,1:end-2,2:end-1])).*_dy^2 + + ((T[2:end-1,2:end-1,3:end ] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,2:end-1,1:end-2])).*_dz^2) + ) + ((B[2:end-1,3:end ,2:end-1] .- B[2:end-1,2:end-1,2:end-1]) .- (B[2:end-1,2:end-1,2:end-1] .- B[2:end-1,1:end-2,2:end-1])); + @test all(Array(T2) .== Array(T2_ref)) end - return - end - @static if $package == $PKG_CUDA - @test occursin("loopoffset = ((CUDA.blockIdx()).z - 1) * 3", kernel) - elseif $package == $PKG_AMDGPU - @test occursin("loopoffset = ((AMDGPU.workgroupIdx()).z - 1) * 3", kernel) - elseif $package == $PKG_METAL - @test occursin("loopoffset = ((Metal.threadgroup_position_in_grid_3d()).z - 1) * 3", kernel) - end - @test occursin("for i = -4:3", kernel) - @test occursin("tz = i + loopoffset", kernel) - @test occursin("A2[ix - 1, iy + 2, iz] = (A_ixm1_iyp2_izp3 - 2A_ixm3_iyp2_iz) + A_ixm4_iyp2_izm2", kernel) - @test occursin("B2[ix + 1, iy + 2, iz + 1] = (B[ix + 1, iy + 2, iz + 2] - 2 * B[ix - 3, iy + 2, iz + 1]) + B[ix - 4, iy + 2, iz + 1]", kernel) - @test occursin("C2[ix - 1, iy + 2, iz - 1] = (C_ixm1_iyp2_iz - 2C_ixm1_iyp2_izm1) + C_ixm1_iyp2_izm1", kernel) - @parallel_indices (ix,iy,iz) memopt=true optvars=(A, C) loopdim=3 loopsize=3 optranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)) function higher_order_memopt!(A2, B2, C2, A, B, C) - if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) - A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + @testset "@parallel_indices (3D, memopt, stencilranges=(-4:-1, 2:2, -2:3); 3 arrays, x-z-stencil, y-shift)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + A2_ref = @Field(nxyz); + B = @Field(nxyz); + B2 = @Field(nxyz); + B2_ref = @Field(nxyz); + C = @Field(nxyz); + C2 = @Field(nxyz); + C2_ref = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + copy!(B, 2 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + copy!(C, 3 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + @parallel_indices (ix,iy,iz) memopt=true loopsize=3 function higher_order_memopt!(A2, B2, C2, A, B, C) + if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) + A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + end + if (ix-4>1 && ix-11 && iy+2<=size(B2,2) && iz-2>=1 && iz+3<=size(B2,3)) + B2[ix-1,iy+2,iz] = B[ix-1,iy+2,iz+3] - 2*B[ix-3,iy+2,iz] + B[ix-4,iy+2,iz-2] + end + if (ix-4>1 && ix-11 && iy+2<=size(C2,2) && iz-2>=1 && iz+3<=size(C2,3)) + C2[ix-1,iy+2,iz] = C[ix-1,iy+2,iz+3] - 2*C[ix-3,iy+2,iz] + C[ix-4,iy+2,iz-2] + end + return + end + @parallel memopt=true higher_order_memopt!(A2, B2, C2, A, B, C); + A2_ref[5:end-1,3:end,3:end-3] .= A[5:end-1,3:end,6:end] .- 2*A[3:end-3,3:end,3:end-3] .+ A[2:end-4,3:end,1:end-5]; + B2_ref[5:end-1,3:end,3:end-3] .= B[5:end-1,3:end,6:end] .- 2*B[3:end-3,3:end,3:end-3] .+ B[2:end-4,3:end,1:end-5]; + C2_ref[5:end-1,3:end,3:end-3] .= C[5:end-1,3:end,6:end] .- 2*C[3:end-3,3:end,3:end-3] .+ C[2:end-4,3:end,1:end-5]; + @test all(Array(A2) .== Array(A2_ref)) + @test all(Array(B2) .== Array(B2_ref)) + @test all(Array(C2) .== Array(C2_ref)) end - if (ix-4>1 && ix+11 && iy+2<=size(B2,2) && iz+1>=1 && iz+2<=size(B2,3)) - B2[ix+1,iy+2,iz+1] = B[ix+1,iy+2,iz+2] - 2*B[ix-3,iy+2,iz+1] + B[ix-4,iy+2,iz+1] + @testset "@parallel_indices (3D, memopt, stencilranges=(A=(-4:-1, 2:2, -2:3), B=(-4:-1, 2:2, 1:2), C=(-4:-1, 2:2, -1:0)); 3 arrays, x-z-stencil, y-shift)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + A2_ref = @Field(nxyz); + B = @Field(nxyz); + B2 = @Field(nxyz); + B2_ref = @Field(nxyz); + C = @Field(nxyz); + C2 = @Field(nxyz); + C2_ref = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + copy!(B, 2 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + copy!(C, 3 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + @parallel_indices (ix,iy,iz) memopt=true loopsize=3 function higher_order_memopt!(A2, B2, C2, A, B, C) + if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) + A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + end + if (ix-4>1 && ix-11 && iy+2<=size(B2,2) && iz+1>=1 && iz+2<=size(B2,3)) + B2[ix-1,iy+2,iz+1] = B[ix-1,iy+2,iz+2] - 2*B[ix-3,iy+2,iz+1] + B[ix-4,iy+2,iz+1] + end + if (ix-4>1 && ix-11 && iy+2<=size(C2,2) && iz-1>=1 && iz<=size(C2,3)) + C2[ix-1,iy+2,iz-1] = C[ix-1,iy+2,iz] - 2*C[ix-3,iy+2,iz-1] + C[ix-4,iy+2,iz-1] + end + return + end + @parallel memopt=true higher_order_memopt!(A2, B2, C2, A, B, C); + A2_ref[5:end-1,3:end,3:end-3] .= A[5:end-1,3:end,6:end] .- 2*A[3:end-3,3:end,3:end-3] .+ A[2:end-4,3:end,1:end-5]; + B2_ref[5:end-1,3:end,2:end-1] .= B[5:end-1,3:end,3:end] .- 2*B[3:end-3,3:end,2:end-1] .+ B[2:end-4,3:end,2:end-1]; + C2_ref[5:end-1,3:end,1:end-1] .= C[5:end-1,3:end,2:end] .- 2*C[3:end-3,3:end,1:end-1] .+ C[2:end-4,3:end,1:end-1]; + @test all(Array(A2) .== Array(A2_ref)) + @test all(Array(B2) .== Array(B2_ref)) + @test all(Array(C2) .== Array(C2_ref)) end - if (ix-1>1 && ix-11 && iy+2<=size(C2,2) && iz-1>=1 && iz<=size(C2,3)) - C2[ix-1,iy+2,iz-1] = C[ix-1,iy+2,iz] - 2*C[ix-1,iy+2,iz-1] + C[ix-1,iy+2,iz-1] + @testset "@parallel_indices (3D, memopt, stencilranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)); 3 arrays, x-z-stencil, y-shift)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + A2_ref = @Field(nxyz); + B = @Field(nxyz); + B2 = @Field(nxyz); + B2_ref = @Field(nxyz); + C = @Field(nxyz); + C2 = @Field(nxyz); + C2_ref = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + copy!(B, 2 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + copy!(C, 3 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + @parallel_indices (ix,iy,iz) memopt=true loopsize=3 function higher_order_memopt!(A2, B2, C2, A, B, C) + if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) + A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + end + if (ix-4>1 && ix+11 && iy+2<=size(B2,2) && iz+1>=1 && iz+2<=size(B2,3)) + B2[ix+1,iy+2,iz+1] = B[ix+1,iy+2,iz+2] - 2*B[ix-3,iy+2,iz+1] + B[ix-4,iy+2,iz+1] + end + if (ix-1>1 && ix-11 && iy+2<=size(C2,2) && iz-1>=1 && iz<=size(C2,3)) + C2[ix-1,iy+2,iz-1] = C[ix-1,iy+2,iz] - 2*C[ix-1,iy+2,iz-1] + C[ix-1,iy+2,iz-1] + end + return + end + @parallel memopt=true higher_order_memopt!(A2, B2, C2, A, B, C); + A2_ref[5:end-1,3:end,3:end-3] .= A[5:end-1,3:end,6:end] .- 2*A[3:end-3,3:end,3:end-3] .+ A[2:end-4,3:end,1:end-5]; + B2_ref[7:end-1,3:end,2:end-1] .= B[7:end-1,3:end,3:end] .- 2*B[3:end-5,3:end,2:end-1] .+ B[2:end-6,3:end,2:end-1]; + C2_ref[2:end-1,3:end,1:end-1] .= C[2:end-1,3:end,2:end] .- 2*C[2:end-1,3:end,1:end-1] .+ C[2:end-1,3:end,1:end-1]; + @test all(Array(A2) .== Array(A2_ref)) + @test all(Array(B2) .== Array(B2_ref)) + @test all(Array(C2) .== Array(C2_ref)) end - return - end - @parallel memopt=true higher_order_memopt!(A2, B2, C2, A, B, C); - A2_ref[5:end-1,3:end,3:end-3] .= A[5:end-1,3:end,6:end] .- 2*A[3:end-3,3:end,3:end-3] .+ A[2:end-4,3:end,1:end-5]; - B2_ref[7:end-1,3:end,2:end-1] .= B[7:end-1,3:end,3:end] .- 2*B[3:end-5,3:end,2:end-1] .+ B[2:end-6,3:end,2:end-1]; - C2_ref[2:end-1,3:end,1:end-1] .= C[2:end-1,3:end,2:end] .- 2*C[2:end-1,3:end,1:end-1] .+ C[2:end-1,3:end,1:end-1]; - @test all(Array(A2) .== Array(A2_ref)) - @test all(Array(B2) .== Array(B2_ref)) - @test all(Array(C2) .== Array(C2_ref)) - end - @testset "@parallel_indices (3D, memopt, optvars=(A, B), loopdim=3, loopsize=3, optranges=(A=(-1:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:1)); stencilranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)), 3 arrays, x-z-stencil, y-shift)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - A2_ref = @zeros(nx, ny, nz); - B = @zeros(nx, ny, nz); - B2 = @zeros(nx, ny, nz); - B2_ref = @zeros(nx, ny, nz); - C = @zeros(nx, ny, nz); - C2 = @zeros(nx, ny, nz); - C2_ref = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - copy!(B, 2 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - copy!(C, 3 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - kernel = @gorgeousstring @parallel_indices (ix,iy,iz) memopt=true optvars=(A, B) loopdim=3 loopsize=3 optranges=(A=(-1:-1, 2:2, -2:3), B=(-4:-3, 2:2, 1:1)) function higher_order_memopt!(A2, B2, C2, A, B, C) - if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) - A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + @testset "@parallel_indices (3D, memopt, optvars=(A, C), loopdim=3, loopsize=3, optranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)); stencilranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)), 3 arrays, x-z-stencil, y-shift)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + A2_ref = @Field(nxyz); + B = @Field(nxyz); + B2 = @Field(nxyz); + B2_ref = @Field(nxyz); + C = @Field(nxyz); + C2 = @Field(nxyz); + C2_ref = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + copy!(B, 2 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + copy!(C, 3 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + kernel = @gorgeousstring @parallel_indices (ix,iy,iz) memopt=true optvars=(A, C) loopdim=3 loopsize=3 optranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)) function higher_order_memopt!(A2, B2, C2, A, B, C) + if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) + A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + end + if (ix-4>1 && ix+11 && iy+2<=size(B2,2) && iz+1>=1 && iz+2<=size(B2,3)) + B2[ix+1,iy+2,iz+1] = B[ix+1,iy+2,iz+2] - 2*B[ix-3,iy+2,iz+1] + B[ix-4,iy+2,iz+1] + end + if (ix-1>1 && ix-11 && iy+2<=size(C2,2) && iz-1>=1 && iz<=size(C2,3)) + C2[ix-1,iy+2,iz-1] = C[ix-1,iy+2,iz] - 2*C[ix-1,iy+2,iz-1] + C[ix-1,iy+2,iz-1] + end + return + end + @static if $package == $PKG_CUDA + @test occursin("loopoffset = ((CUDA.blockIdx()).z - 1) * 3", kernel) + elseif $package == $PKG_AMDGPU + @test occursin("loopoffset = ((AMDGPU.workgroupIdx()).z - 1) * 3", kernel) + elseif $package == $PKG_METAL + @test occursin("loopoffset = ((Metal.threadgroup_position_in_grid_3d()).z - 1) * 3", kernel) + end + @test occursin("for i = -4:3", kernel) + @test occursin("tz = i + loopoffset", kernel) + @test occursin("A2[ix - 1, iy + 2, iz] = (A_ixm1_iyp2_izp3 - 2A_ixm3_iyp2_iz) + A_ixm4_iyp2_izm2", kernel) + @test occursin("B2[ix + 1, iy + 2, iz + 1] = (B[ix + 1, iy + 2, iz + 2] - 2 * B[ix - 3, iy + 2, iz + 1]) + B[ix - 4, iy + 2, iz + 1]", kernel) + @test occursin("C2[ix - 1, iy + 2, iz - 1] = (C_ixm1_iyp2_iz - 2C_ixm1_iyp2_izm1) + C_ixm1_iyp2_izm1", kernel) + @parallel_indices (ix,iy,iz) memopt=true optvars=(A, C) loopdim=3 loopsize=3 optranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)) function higher_order_memopt!(A2, B2, C2, A, B, C) + if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) + A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + end + if (ix-4>1 && ix+11 && iy+2<=size(B2,2) && iz+1>=1 && iz+2<=size(B2,3)) + B2[ix+1,iy+2,iz+1] = B[ix+1,iy+2,iz+2] - 2*B[ix-3,iy+2,iz+1] + B[ix-4,iy+2,iz+1] + end + if (ix-1>1 && ix-11 && iy+2<=size(C2,2) && iz-1>=1 && iz<=size(C2,3)) + C2[ix-1,iy+2,iz-1] = C[ix-1,iy+2,iz] - 2*C[ix-1,iy+2,iz-1] + C[ix-1,iy+2,iz-1] + end + return + end + @parallel memopt=true higher_order_memopt!(A2, B2, C2, A, B, C); + A2_ref[5:end-1,3:end,3:end-3] .= A[5:end-1,3:end,6:end] .- 2*A[3:end-3,3:end,3:end-3] .+ A[2:end-4,3:end,1:end-5]; + B2_ref[7:end-1,3:end,2:end-1] .= B[7:end-1,3:end,3:end] .- 2*B[3:end-5,3:end,2:end-1] .+ B[2:end-6,3:end,2:end-1]; + C2_ref[2:end-1,3:end,1:end-1] .= C[2:end-1,3:end,2:end] .- 2*C[2:end-1,3:end,1:end-1] .+ C[2:end-1,3:end,1:end-1]; + @test all(Array(A2) .== Array(A2_ref)) + @test all(Array(B2) .== Array(B2_ref)) + @test all(Array(C2) .== Array(C2_ref)) end - if (ix-4>1 && ix+11 && iy+2<=size(B2,2) && iz+1>=1 && iz+2<=size(B2,3)) - B2[ix+1,iy+2,iz+1] = B[ix+1,iy+2,iz+2] - 2*B[ix-3,iy+2,iz+1] + B[ix-4,iy+2,iz+1] + @testset "@parallel_indices (3D, memopt, optvars=(A, C), loopdim=3, loopsize=3, optranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)); stencilranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)), 3 arrays, x-z-stencil, y-shift)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + A2_ref = @Field(nxyz); + B = @Field(nxyz); + B2 = @Field(nxyz); + B2_ref = @Field(nxyz); + C = @Field(nxyz); + C2 = @Field(nxyz); + C2_ref = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + copy!(B, 2 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + copy!(C, 3 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + kernel = @gorgeousstring @parallel_indices (ix,iy,iz) memopt=true optvars=(A, C) loopdim=3 loopsize=3 optranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)) function higher_order_memopt!(A2, B2, C2, A, B, C) + if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) + A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + end + if (ix-4>1 && ix+11 && iy+2<=size(B2,2) && iz+1>=1 && iz+2<=size(B2,3)) + B2[ix+1,iy+2,iz+1] = B[ix+1,iy+2,iz+2] - 2*B[ix-3,iy+2,iz+1] + B[ix-4,iy+2,iz+1] + end + if (ix-1>1 && ix-11 && iy+2<=size(C2,2) && iz-1>=1 && iz<=size(C2,3)) + C2[ix-1,iy+2,iz-1] = C[ix-1,iy+2,iz] - 2*C[ix-1,iy+2,iz-1] + C[ix-1,iy+2,iz-1] + end + return + end + @static if $package == $PKG_CUDA + @test occursin("loopoffset = ((CUDA.blockIdx()).z - 1) * 3", kernel) + elseif $package == $PKG_AMDGPU + @test occursin("loopoffset = ((AMDGPU.workgroupIdx()).z - 1) * 3", kernel) + elseif $package == $PKG_METAL + @test occursin("loopoffset = ((Metal.threadgroup_position_in_grid_3d()).z - 1) * 3", kernel) + end + @test occursin("for i = -4:3", kernel) + @test occursin("tz = i + loopoffset", kernel) + @test occursin("A2[ix - 1, iy + 2, iz] = (A_ixm1_iyp2_izp3 - 2A_ixm3_iyp2_iz) + A_ixm4_iyp2_izm2", kernel) + @test occursin("B2[ix + 1, iy + 2, iz + 1] = (B[ix + 1, iy + 2, iz + 2] - 2 * B[ix - 3, iy + 2, iz + 1]) + B[ix - 4, iy + 2, iz + 1]", kernel) + @test occursin("C2[ix - 1, iy + 2, iz - 1] = (C_ixm1_iyp2_iz - 2C_ixm1_iyp2_izm1) + C_ixm1_iyp2_izm1", kernel) + @parallel_indices (ix,iy,iz) memopt=true optvars=(A, C) loopdim=3 loopsize=3 optranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)) function higher_order_memopt!(A2, B2, C2, A, B, C) + if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) + A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + end + if (ix-4>1 && ix+11 && iy+2<=size(B2,2) && iz+1>=1 && iz+2<=size(B2,3)) + B2[ix+1,iy+2,iz+1] = B[ix+1,iy+2,iz+2] - 2*B[ix-3,iy+2,iz+1] + B[ix-4,iy+2,iz+1] + end + if (ix-1>1 && ix-11 && iy+2<=size(C2,2) && iz-1>=1 && iz<=size(C2,3)) + C2[ix-1,iy+2,iz-1] = C[ix-1,iy+2,iz] - 2*C[ix-1,iy+2,iz-1] + C[ix-1,iy+2,iz-1] + end + return + end + @parallel memopt=true higher_order_memopt!(A2, B2, C2, A, B, C); + A2_ref[5:end-1,3:end,3:end-3] .= A[5:end-1,3:end,6:end] .- 2*A[3:end-3,3:end,3:end-3] .+ A[2:end-4,3:end,1:end-5]; + B2_ref[7:end-1,3:end,2:end-1] .= B[7:end-1,3:end,3:end] .- 2*B[3:end-5,3:end,2:end-1] .+ B[2:end-6,3:end,2:end-1]; + C2_ref[2:end-1,3:end,1:end-1] .= C[2:end-1,3:end,2:end] .- 2*C[2:end-1,3:end,1:end-1] .+ C[2:end-1,3:end,1:end-1]; + @test all(Array(A2) .== Array(A2_ref)) + @test all(Array(B2) .== Array(B2_ref)) + @test all(Array(C2) .== Array(C2_ref)) end - if (ix-1>1 && ix-11 && iy+2<=size(C2,2) && iz-1>=1 && iz<=size(C2,3)) - C2[ix-1,iy+2,iz-1] = C[ix-1,iy+2,iz] - 2*C[ix-1,iy+2,iz-1] + C[ix-1,iy+2,iz-1] + @testset "@parallel_indices (3D, memopt, optvars=(A, B), loopdim=3, loopsize=3, optranges=(A=(-1:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:1)); stencilranges=(A=(-4:-1, 2:2, -2:3), B=(-4:1, 2:2, 1:2), C=(-1:-1, 2:2, -1:0)), 3 arrays, x-z-stencil, y-shift)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + A2_ref = @Field(nxyz); + B = @Field(nxyz); + B2 = @Field(nxyz); + B2_ref = @Field(nxyz); + C = @Field(nxyz); + C2 = @Field(nxyz); + C2_ref = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + copy!(B, 2 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + copy!(C, 3 .* [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + kernel = @gorgeousstring @parallel_indices (ix,iy,iz) memopt=true optvars=(A, B) loopdim=3 loopsize=3 optranges=(A=(-1:-1, 2:2, -2:3), B=(-4:-3, 2:2, 1:1)) function higher_order_memopt!(A2, B2, C2, A, B, C) + if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) + A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + end + if (ix-4>1 && ix+11 && iy+2<=size(B2,2) && iz+1>=1 && iz+2<=size(B2,3)) + B2[ix+1,iy+2,iz+1] = B[ix+1,iy+2,iz+2] - 2*B[ix-3,iy+2,iz+1] + B[ix-4,iy+2,iz+1] + end + if (ix-1>1 && ix-11 && iy+2<=size(C2,2) && iz-1>=1 && iz<=size(C2,3)) + C2[ix-1,iy+2,iz-1] = C[ix-1,iy+2,iz] - 2*C[ix-1,iy+2,iz-1] + C[ix-1,iy+2,iz-1] + end + return + end + @test occursin("A2[ix - 1, iy + 2, iz] = (A_ixm1_iyp2_izp3 - 2 * A[ix - 3, iy + 2, iz]) + A[ix - 4, iy + 2, iz - 2]", kernel) + @test occursin("B2[ix + 1, iy + 2, iz + 1] = (B[ix + 1, iy + 2, iz + 2] - 2B_ixm3_iyp2_izp1) + B_ixm4_iyp2_izp1", kernel) # NOTE: when z is restricted to 1:1 then x cannot include +1, as else the x-y range does not include any z (result: IncoherentArgumentError: incoherent argument in memopt: optranges in z dimension do not include any array access.). + @test occursin("C2[ix - 1, iy + 2, iz - 1] = (C[ix - 1, iy + 2, iz] - 2 * C[ix - 1, iy + 2, iz - 1]) + C[ix - 1, iy + 2, iz - 1]", kernel) + @parallel_indices (ix,iy,iz) memopt=true optvars=(A, B) loopdim=3 loopsize=3 optranges=(A=(-1:-1, 2:2, -2:3), B=(-4:-3, 2:2, 1:1)) function higher_order_memopt!(A2, B2, C2, A, B, C) + if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) + A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + end + if (ix-4>1 && ix+11 && iy+2<=size(B2,2) && iz+1>=1 && iz+2<=size(B2,3)) + B2[ix+1,iy+2,iz+1] = B[ix+1,iy+2,iz+2] - 2*B[ix-3,iy+2,iz+1] + B[ix-4,iy+2,iz+1] + end + if (ix-1>1 && ix-11 && iy+2<=size(C2,2) && iz-1>=1 && iz<=size(C2,3)) + C2[ix-1,iy+2,iz-1] = C[ix-1,iy+2,iz] - 2*C[ix-1,iy+2,iz-1] + C[ix-1,iy+2,iz-1] + end + return + end + @parallel memopt=true higher_order_memopt!(A2, B2, C2, A, B, C); + A2_ref[5:end-1,3:end,3:end-3] .= A[5:end-1,3:end,6:end] .- 2*A[3:end-3,3:end,3:end-3] .+ A[2:end-4,3:end,1:end-5]; + B2_ref[7:end-1,3:end,2:end-1] .= B[7:end-1,3:end,3:end] .- 2*B[3:end-5,3:end,2:end-1] .+ B[2:end-6,3:end,2:end-1]; + C2_ref[2:end-1,3:end,1:end-1] .= C[2:end-1,3:end,2:end] .- 2*C[2:end-1,3:end,1:end-1] .+ C[2:end-1,3:end,1:end-1]; + @test all(Array(A2) .== Array(A2_ref)) + @test all(Array(B2) .== Array(B2_ref)) + @test all(Array(C2) .== Array(C2_ref)) end - return end - @test occursin("A2[ix - 1, iy + 2, iz] = (A_ixm1_iyp2_izp3 - 2 * A[ix - 3, iy + 2, iz]) + A[ix - 4, iy + 2, iz - 2]", kernel) - @test occursin("B2[ix + 1, iy + 2, iz + 1] = (B[ix + 1, iy + 2, iz + 2] - 2B_ixm3_iyp2_izp1) + B_ixm4_iyp2_izp1", kernel) # NOTE: when z is restricted to 1:1 then x cannot include +1, as else the x-y range does not include any z (result: IncoherentArgumentError: incoherent argument in memopt: optranges in z dimension do not include any array access.). - @test occursin("C2[ix - 1, iy + 2, iz - 1] = (C[ix - 1, iy + 2, iz] - 2 * C[ix - 1, iy + 2, iz - 1]) + C[ix - 1, iy + 2, iz - 1]", kernel) - @parallel_indices (ix,iy,iz) memopt=true optvars=(A, B) loopdim=3 loopsize=3 optranges=(A=(-1:-1, 2:2, -2:3), B=(-4:-3, 2:2, 1:1)) function higher_order_memopt!(A2, B2, C2, A, B, C) - if (ix-4>1 && ix-11 && iy+2<=size(A2,2) && iz-2>=1 && iz+3<=size(A2,3)) - A2[ix-1,iy+2,iz] = A[ix-1,iy+2,iz+3] - 2*A[ix-3,iy+2,iz] + A[ix-4,iy+2,iz-2] + @testset "@parallel memopt (nx, ny, nz != x .* threads)" begin + nxyz = (33, 7, 8) + @testset "@parallel_indices (3D, memopt, stencilranges=0:0)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + @parallel_indices (ix,iy,iz) memopt=true loopsize=3 optvars=A optranges=(A=(0:0,0:0,0:0),) function copy_memopt!(A2, A) + if ix>0 && ix<=size(A2,1) && iy>0 && iy<=size(A2,2) # TODO: needed when ranges is bigger than array + A2[ix,iy,iz] = A[ix,iy,iz] + end + return + end + ranges = (1:64,1:64,1:8) # TODO: must be a multiple of the number of threads + @parallel ranges memopt=true copy_memopt!(A2, A); + @test all(Array(A2) .== Array(A)) end - if (ix-4>1 && ix+11 && iy+2<=size(B2,2) && iz+1>=1 && iz+2<=size(B2,3)) - B2[ix+1,iy+2,iz+1] = B[ix+1,iy+2,iz+2] - 2*B[ix-3,iy+2,iz+1] + B[ix-4,iy+2,iz+1] + @testset "@parallel (3D, memopt, stencilranges=0:0)" begin + A = @Field(nxyz); + A2 = @Field(nxyz); + copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); + @parallel memopt=true loopsize=3 optvars=A optranges=(A=(0:0,0:0,0:0),) function copy_memopt!(A2, A) + @all(A2) = @all(A) + return + end + @parallel memopt=true copy_memopt!(A2, A); + @test all(Array(A2) .== Array(A)) end - if (ix-1>1 && ix-11 && iy+2<=size(C2,2) && iz-1>=1 && iz<=size(C2,3)) - C2[ix-1,iy+2,iz-1] = C[ix-1,iy+2,iz] - 2*C[ix-1,iy+2,iz-1] + C[ix-1,iy+2,iz-1] + @testset "@parallel (3D, memopt, stencilranges=0:2)" begin + lam=dt=_dx=_dy=_dz = $FloatDefault(1) + T = @Field(nxyz); + T2 = @Field(nxyz); + T2_ref = @Field(nxyz); + Ci = @Field(nxyz, @ones); + copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); + @parallel memopt=true loopsize=3 function diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy, _dz) + @inn(T2) = @inn(T) + dt*(lam*@inn(Ci)*(@d2_xi(T)*_dx^2 + @d2_yi(T)*_dy^2 + @d2_zi(T)*_dz^2)) + return + end + @parallel memopt=true diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy, _dz); + T2_ref[2:end-1,2:end-1,2:end-1] .= T[2:end-1,2:end-1,2:end-1] .+ dt.*(lam.*Ci[2:end-1,2:end-1,2:end-1].*( + ((T[3:end ,2:end-1,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[1:end-2,2:end-1,2:end-1])).*_dx^2 + + ((T[2:end-1,3:end ,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,1:end-2,2:end-1])).*_dy^2 + + ((T[2:end-1,2:end-1,3:end ] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,2:end-1,1:end-2])).*_dz^2) + ); + @test all(Array(T2) .== Array(T2_ref)) end - return - end - @parallel memopt=true higher_order_memopt!(A2, B2, C2, A, B, C); - A2_ref[5:end-1,3:end,3:end-3] .= A[5:end-1,3:end,6:end] .- 2*A[3:end-3,3:end,3:end-3] .+ A[2:end-4,3:end,1:end-5]; - B2_ref[7:end-1,3:end,2:end-1] .= B[7:end-1,3:end,3:end] .- 2*B[3:end-5,3:end,2:end-1] .+ B[2:end-6,3:end,2:end-1]; - C2_ref[2:end-1,3:end,1:end-1] .= C[2:end-1,3:end,2:end] .- 2*C[2:end-1,3:end,1:end-1] .+ C[2:end-1,3:end,1:end-1]; - @test all(Array(A2) .== Array(A2_ref)) - @test all(Array(B2) .== Array(B2_ref)) - @test all(Array(C2) .== Array(C2_ref)) - end - end - @testset "@parallel memopt (nx, ny, nz != x .* threads)" begin - nx, ny, nz = 33, 7, 8 - @testset "@parallel_indices (3D, memopt, stencilranges=0:0)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - @parallel_indices (ix,iy,iz) memopt=true loopsize=3 optvars=A optranges=(A=(0:0,0:0,0:0),) function copy_memopt!(A2, A) - if ix>0 && ix<=size(A2,1) && iy>0 && iy<=size(A2,2) # TODO: needed when ranges is bigger than array - A2[ix,iy,iz] = A[ix,iy,iz] + @testset "@parallel (3D, memopt; 3 arrays, x-y-z- + y- + x-stencil)" begin + lam=dt=_dx=_dy=_dz = $FloatDefault(1) + T = @Field(nxyz); + T2 = @Field(nxyz); + T2_ref = @Field(nxyz); + Ci = @XYYZZField(nxyz); + B = @Field(nxyz); + copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); + copy!(Ci, 2 .* [ix + (iy-1)*size(Ci,1) + (iz-1)*size(Ci,1)*size(Ci,2) for ix=1:size(Ci,1), iy=1:size(Ci,2), iz=1:size(Ci,3)].^3); + copy!(B, 3 .* [ix + (iy-1)*size(B,1) + (iz-1)*size(B,1)*size(B,2) for ix=1:size(B,1), iy=1:size(B,2), iz=1:size(B,3)].^3); + @parallel memopt=true loopsize=3 function diffusion3D_step_modified!(T2, T, Ci, B, lam, dt, _dx, _dy, _dz) + @inn(T2) = @inn(T) + dt*(lam*@d_xi(Ci)*(@d2_xi(T)*_dx^2 + @d2_yi(T)*_dy^2 + @d2_zi(T)*_dz^2)) + @d2_yi(B) + return + end + @parallel memopt=true diffusion3D_step_modified!(T2, T, Ci, B, lam, dt, _dx, _dy, _dz); + T2_ref[2:end-1,2:end-1,2:end-1] .= T[2:end-1,2:end-1,2:end-1] .+ dt.*(lam.*(Ci[2:end,2:end-1,2:end-1] .- Ci[1:end-1,2:end-1,2:end-1]).*( + ((T[3:end ,2:end-1,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[1:end-2,2:end-1,2:end-1])).*_dx^2 + + ((T[2:end-1,3:end ,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,1:end-2,2:end-1])).*_dy^2 + + ((T[2:end-1,2:end-1,3:end ] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,2:end-1,1:end-2])).*_dz^2) + ) + ((B[2:end-1,3:end ,2:end-1] .- B[2:end-1,2:end-1,2:end-1]) .- (B[2:end-1,2:end-1,2:end-1] .- B[2:end-1,1:end-2,2:end-1])); + @test all(Array(T2) .== Array(T2_ref)) end - return - end - ranges = (1:64,1:64,1:8) # TODO: must be a multiple of the number of threads - @parallel ranges memopt=true copy_memopt!(A2, A); - @test all(Array(A2) .== Array(A)) - end - @testset "@parallel (3D, memopt, stencilranges=0:0)" begin - A = @zeros(nx, ny, nz); - A2 = @zeros(nx, ny, nz); - copy!(A, [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)].^3); - @parallel memopt=true loopsize=3 optvars=A optranges=(A=(0:0,0:0,0:0),) function copy_memopt!(A2, A) - @all(A2) = @all(A) - return - end - @parallel memopt=true copy_memopt!(A2, A); - @test all(Array(A2) .== Array(A)) - end - @testset "@parallel (3D, memopt, stencilranges=0:2)" begin - lam=dt=_dx=_dy=_dz = $FloatDefault(1) - T = @zeros(nx, ny, nz); - T2 = @zeros(nx, ny, nz); - T2_ref = @zeros(nx, ny, nz); - Ci = @ones(nx, ny, nz); - copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); - @parallel memopt=true loopsize=3 function diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy, _dz) - @inn(T2) = @inn(T) + dt*(lam*@inn(Ci)*(@d2_xi(T)*_dx^2 + @d2_yi(T)*_dy^2 + @d2_zi(T)*_dz^2)) - return - end - @parallel memopt=true diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy, _dz); - T2_ref[2:end-1,2:end-1,2:end-1] .= T[2:end-1,2:end-1,2:end-1] .+ dt.*(lam.*Ci[2:end-1,2:end-1,2:end-1].*( - ((T[3:end ,2:end-1,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[1:end-2,2:end-1,2:end-1])).*_dx^2 - + ((T[2:end-1,3:end ,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,1:end-2,2:end-1])).*_dy^2 - + ((T[2:end-1,2:end-1,3:end ] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,2:end-1,1:end-2])).*_dz^2) - ); - @test all(Array(T2) .== Array(T2_ref)) - end - @testset "@parallel (3D, memopt; 3 arrays, x-y-z- + y- + x-stencil)" begin - lam=dt=_dx=_dy=_dz = $FloatDefault(1) - T = @zeros(nx, ny, nz); - T2 = @zeros(nx, ny, nz); - T2_ref = @zeros(nx, ny, nz); - Ci = @zeros(nx-1, ny, nz); - B = @zeros(nx, ny, nz); - copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); - copy!(Ci, 2 .* [ix + (iy-1)*size(Ci,1) + (iz-1)*size(Ci,1)*size(Ci,2) for ix=1:size(Ci,1), iy=1:size(Ci,2), iz=1:size(Ci,3)].^3); - copy!(B, 3 .* [ix + (iy-1)*size(B,1) + (iz-1)*size(B,1)*size(B,2) for ix=1:size(B,1), iy=1:size(B,2), iz=1:size(B,3)].^3); - @parallel memopt=true loopsize=3 function diffusion3D_step_modified!(T2, T, Ci, B, lam, dt, _dx, _dy, _dz) - @inn(T2) = @inn(T) + dt*(lam*@d_xi(Ci)*(@d2_xi(T)*_dx^2 + @d2_yi(T)*_dy^2 + @d2_zi(T)*_dz^2)) + @d2_yi(B) - return end - @parallel memopt=true diffusion3D_step_modified!(T2, T, Ci, B, lam, dt, _dx, _dy, _dz); - T2_ref[2:end-1,2:end-1,2:end-1] .= T[2:end-1,2:end-1,2:end-1] .+ dt.*(lam.*(Ci[2:end,2:end-1,2:end-1] .- Ci[1:end-1,2:end-1,2:end-1]).*( - ((T[3:end ,2:end-1,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[1:end-2,2:end-1,2:end-1])).*_dx^2 - + ((T[2:end-1,3:end ,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,1:end-2,2:end-1])).*_dy^2 - + ((T[2:end-1,2:end-1,3:end ] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,2:end-1,1:end-2])).*_dz^2) - ) + ((B[2:end-1,3:end ,2:end-1] .- B[2:end-1,2:end-1,2:end-1]) .- (B[2:end-1,2:end-1,2:end-1] .- B[2:end-1,1:end-2,2:end-1])); - @test all(Array(T2) .== Array(T2_ref)) end - end + ))) end end; - @testset "apply masks" begin + @testset "@within" begin + @test @prettystring(@within("@all", A)) == string(:(firstindex(A, 1) <= $ix <= lastindex(A, 1) && (firstindex(A, 2) <= $iy <= lastindex(A, 2) && firstindex(A, 3) <= $iz <= lastindex(A, 3)))) + @test @prettystring(@within("@inn", A)) == string(:(firstindex(A, 1) < $ixi < lastindex(A, 1) && (firstindex(A, 2) < $iyi < lastindex(A, 2) && firstindex(A, 3) < $izi < lastindex(A, 3)))) + end; + @testset "apply masks | handling padding (padding=false (default))" begin expansion = @prettystring(1, @parallel sum!(A, B) = (@all(A) = @all(A) + @all(B); return)) - @test @prettystring(@within("@all", A)) == string(:($ix <= lastindex(A, 1) && ($iy <= lastindex(A, 2) && $iz <= lastindex(A, 3)))) - @test occursin("if $(@prettystring(@within("@all", A)))", expansion) + @test occursin("if $ix_s <= size(A, 1) && ($iy_s <= size(A, 2) && $iz_s <= size(A, 3))", expansion) + expansion = @prettystring(1, @parallel sum!(A, B) = (@inn(A) = @inn(A) + @inn(B); return)) + @test occursin("if $ix_s < size(A, 1) - 1 && ($iy_s < size(A, 2) - 1 && $iz_s < size(A, 3) - 1)", expansion) + @test occursin("A[$ix_s + 1, $iy_s + 1, $iz_s + 1] = A[$ix_s + 1, $iy_s + 1, $iz_s + 1] + B[$ix_s + 1, $iy_s + 1, $iz_s + 1]", expansion) + end; + @testset "apply masks | handling padding (padding=true)" begin + expansion = @prettystring(1, @parallel padding=true sum!(A, B) = (@all(A) = @all(A) + @all(B); return)) + @test occursin("if (A.indices[1])[1] <= $ix_s <= (A.indices[1])[end] && ((A.indices[2])[1] <= $iy_s <= (A.indices[2])[end] && (A.indices[3])[1] <= $iz_s <= (A.indices[3])[end])", expansion) + expansion = @prettystring(1, @parallel padding=true sum!(A, B) = (@inn(A) = @inn(A) + @inn(B); return)) + @test occursin("if (A.indices[1])[1] < $ix_s < (A.indices[1])[end] && ((A.indices[2])[1] < $iy_s < (A.indices[2])[end] && (A.indices[3])[1] < $iz_s < (A.indices[3])[end])", expansion) + @test occursin("A.parent[$ix_s, $iy_s, $iz_s] = A.parent[$ix_s, $iy_s, $iz_s] + B.parent[$ix_s, $iy_s, $iz_s]", expansion) end; @reset_parallel_stencil() end; @testset "2. parallel macros (2D)" begin @require !@is_initialized() - @init_parallel_stencil($package, $FloatDefault, 2) + @init_parallel_stencil($package, $FloatDefault, 2, nonconst_metadata=true) @require @is_initialized() @static if $package in [$PKG_CUDA, $PKG_AMDGPU] # TODO add support for Metal - nx, ny, nz = 32, 8, 1 + nxyz = (32, 8, 1) @testset "@parallel_indices (2D, memopt, stencilranges=(-1:1,-1:1,0:0))" begin lam=dt=_dx=_dy = $FloatDefault(1) - T = @zeros(nx, ny, nz); - T2 = @zeros(nx, ny, nz); - T2_ref = @zeros(nx, ny, nz); - Ci = @ones(nx, ny, nz); + T = @Field(nxyz); + T2 = @Field(nxyz); + T2_ref = @Field(nxyz); + Ci = @Field(nxyz, @ones); copy!(T, [ix + (iy-1)*size(T,1) for ix=1:size(T,1), iy=1:size(T,2), iz=1:1]); @parallel_indices (ix,iy,iz) memopt=true function diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy) if (ix>1 && ix1 && iy (with Fields)" begin + @static if $package != $PKG_POLYESTER # TODO: this needs to be removed once Polyester supports padding + @require !@is_initialized() + @init_parallel_stencil($package, $FloatDefault, 3, padding=true) + @require @is_initialized() + @testset "padding" begin + @testset "@parallel (3D, @all)" begin + A = @Field((4, 5, 6)); + @parallel function write_indices!(A) + @all(A) = $ix + ($iy-1)*size(A,1) + ($iz-1)*size(A,1)*size(A,2); # NOTE: $ix, $iy, $iz come from ParallelStencil.INDICES. + return + end + @parallel write_indices!(A); + @test all(Array(A) .== [ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)]) + end + @testset "@parallel (3D, @inn)" begin + A = @Field((4, 5, 6)); + @parallel function write_indices!(A) + @inn(A) = $ixi + ($iyi-1)*size(A,1) + ($izi-1)*size(A,1)*size(A,2); # NOTE: $ix, $iy, $iz come from ParallelStencil.INDICES. + return + end + @parallel write_indices!(A); + @test all(Array(A)[2:end-1,2:end-1,2:end-1] .== ([ix + (iy-1)*size(A,1) + (iz-1)*size(A,1)*size(A,2) for ix=1:size(A,1), iy=1:size(A,2), iz=1:size(A,3)])[2:end-1,2:end-1,2:end-1]) + end + @testset "@parallel (3D; on-the-fly)" begin + nxyz = (32, 8, 8) + lam=dt=_dx=_dy=_dz = $FloatDefault(1) + T = @Field(nxyz); + T2 = @Field(nxyz); + T2_ref = @Field(nxyz); + Ci = @Field(nxyz, @ones); + copy!(T, [ix + (iy-1)*size(T,1) + (iz-1)*size(T,1)*size(T,2) for ix=1:size(T,1), iy=1:size(T,2), iz=1:size(T,3)].^3); + @parallel function diffusion3D_step!(T2, T, Ci, lam::Data.Number, dt::$FloatDefault, _dx, _dy, _dz) + @all(qx) = -lam*@d_xi(T)*_dx # Fourier's law of heat conduction + @all(qy) = -lam*@d_yi(T)*_dy # ... + @all(qz) = -lam*@d_zi(T)*_dz # ... + @all(dTdt) = @inn(Ci)*(-@d_xa(qx)*_dx - @d_ya(qy)*_dy - @d_za(qz)*_dz) # Conservation of energy + @inn(T2) = @inn(T) + dt*@all(dTdt) # Update of temperature + return + end + @parallel diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy, _dz); + T2_ref[2:end-1,2:end-1,2:end-1] .= T[2:end-1,2:end-1,2:end-1] .+ dt.*(lam.*Ci[2:end-1,2:end-1,2:end-1].*( + ((T[3:end ,2:end-1,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[1:end-2,2:end-1,2:end-1])).*_dx^2 + + ((T[2:end-1,3:end ,2:end-1] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,1:end-2,2:end-1])).*_dy^2 + + ((T[2:end-1,2:end-1,3:end ] .- T[2:end-1,2:end-1,2:end-1]) .- (T[2:end-1,2:end-1,2:end-1] .- T[2:end-1,2:end-1,1:end-2])).*_dz^2) + ); + @test all(Array(T2) .== Array(T2_ref)) + end + end; + @reset_parallel_stencil() + end + end; + @testset "4. global defaults" begin @testset "inbounds=true" begin @require !@is_initialized() @init_parallel_stencil($package, $FloatDefault, 1, inbounds=true) @@ -922,6 +982,28 @@ eval(:( @test !occursin("Base.@inbounds begin", expansion) @reset_parallel_stencil() end; + @testset "padding=true" begin + @static if $package != $PKG_POLYESTER # TODO: this needs to be removed once Polyester supports padding + @require !@is_initialized() + @init_parallel_stencil($package, $FloatDefault, 3, padding=true) + @require @is_initialized + @testset "apply masks | handling padding (padding=true (globally))" begin + expansion = @prettystring(1, @parallel sum!(A, B) = (@all(A) = @all(A) + @all(B); return)) + @test occursin("if (A.indices[1])[1] <= $ix_s <= (A.indices[1])[end] && ((A.indices[2])[1] <= $iy_s <= (A.indices[2])[end] && (A.indices[3])[1] <= $iz_s <= (A.indices[3])[end])", expansion) + expansion = @prettystring(1, @parallel sum!(A, B) = (@inn(A) = @inn(A) + @inn(B); return)) + @test occursin("if (A.indices[1])[1] < $ix_s < (A.indices[1])[end] && ((A.indices[2])[1] < $iy_s < (A.indices[2])[end] && (A.indices[3])[1] < $iz_s < (A.indices[3])[end])", expansion) + @test occursin("A.parent[$ix_s, $iy_s, $iz_s] = A.parent[$ix_s, $iy_s, $iz_s] + B.parent[$ix_s, $iy_s, $iz_s]", expansion) + end; + @testset "apply masks | handling padding (padding=false)" begin + expansion = @prettystring(1, @parallel padding=false sum!(A, B) = (@all(A) = @all(A) + @all(B); return)) + @test occursin("if $ix_s <= size(A, 1) && ($iy_s <= size(A, 2) && $iz_s <= size(A, 3))", expansion) + expansion = @prettystring(1, @parallel padding=false sum!(A, B) = (@inn(A) = @inn(A) + @inn(B); return)) + @test occursin("if $ix_s < size(A, 1) - 1 && ($iy_s < size(A, 2) - 1 && $iz_s < size(A, 3) - 1)", expansion) + @test occursin("A[$ix_s + 1, $iy_s + 1, $iz_s + 1] = A[$ix_s + 1, $iy_s + 1, $iz_s + 1] + B[$ix_s + 1, $iy_s + 1, $iz_s + 1]", expansion) + end; + @reset_parallel_stencil() + end + end; @testset "@parallel_indices (I...) (1D)" begin @require !@is_initialized() @init_parallel_stencil($package, $FloatDefault, 1) @@ -965,28 +1047,26 @@ eval(:( @reset_parallel_stencil() end; end; - @testset "4. parallel macros (numbertype and ndims ommited)" begin + @testset "5. parallel macros (numbertype and ndims ommited)" begin @require !@is_initialized() @init_parallel_stencil(package = $package) @require @is_initialized - @testset "Data.Array{T} to Data.Device.Array{T}" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel ndims=3 f(A::Data.Array{T}, B::Data.Array{T}, c::Integer) where T <: PSNumber = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::Data.Device.Array{T}, B::Data.Device.Array{T},", expansion) - end - end; - @testset "Data.Cell{T} to Data.Device.Cell{T}" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel ndims=2 f(A::Data.Cell{T}, B::Data.Cell{T}, c::Integer) where T <: PSNumber = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::Data.Device.Cell{T}, B::Data.Device.Cell{T},", expansion) - end - end; - @testset "Data.CellArray{T} to Data.Device.CellArray{T}" begin - @static if @isgpu($package) - expansion = @prettystring(1, @parallel ndims=1 f(A::Data.CellArray{T}, B::Data.CellArray{T}, c::Integer) where T <: PSNumber = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::Data.Device.CellArray{T}, B::Data.Device.CellArray{T},", expansion) - end - end; + $(interpolate(:__T__, ARRAYTYPES, :( + @testset "Data.__T__{T} to Data.Device.__T__{T}" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel ndims=3 f(A::Data.__T__{T}, B::Data.__T__{T}, c::Integer) where T <: PSNumber = (@all(A) = @all(B)^c; return)) + @test occursin("f(A::Data.Device.__T__{T}, B::Data.Device.__T__{T},", expansion) + end + end; + ))) + $(interpolate(:__T__, FIELDTYPES, :( + @testset "Data.Fields.__T__{T} to Data.Fields.Device.__T__{T}" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel ndims=3 f(A::Data.Fields.__T__{T}, B::Data.Fields.__T__{T}, c::Integer) where T <: PSNumber = (@all(A) = @all(B)^c; return)) + @test occursin("f(A::Data.Fields.Device.__T__{T}, B::Data.Fields.Device.__T__{T},", expansion) + end + end; + ))) @testset "N substitution | ndims tuple expansion" begin @testset "@parallel" begin @testset "N substitution (ndims=2, N=3)" begin @@ -1059,7 +1139,7 @@ eval(:( end; @reset_parallel_stencil() end; - @testset "5. Exceptions" begin + @testset "6. Exceptions" begin @init_parallel_stencil($package, $FloatDefault, 3) @require @is_initialized @testset "arguments @parallel" begin