From cb469134176c303fb81f8decad0f309d6e952bb1 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Wed, 2 Oct 2024 18:26:03 -0400 Subject: [PATCH 1/9] Implement an interface for set! --- src/Fields/set!.jl | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/src/Fields/set!.jl b/src/Fields/set!.jl index ebd07c8ae5..54bd557997 100644 --- a/src/Fields/set!.jl +++ b/src/Fields/set!.jl @@ -6,6 +6,21 @@ using Oceananigans.Grids: on_architecture, node_names using Oceananigans.Architectures: device, GPU, CPU using Oceananigans.Utils: work_layout +##### +##### Utilities +##### + +function tuple_string(tup::Tuple) + str = prod(string(t, ", ") for t in tup) + return str[1:end-2] # remove trailing ", " +end + +tuple_string(tup::Tuple{}) = "" + +##### +##### set! +##### + function set!(Φ::NamedTuple; kwargs...) for (fldname, value) in kwargs ϕ = getproperty(Φ, fldname) @@ -14,20 +29,21 @@ function set!(Φ::NamedTuple; kwargs...) return nothing end +# This interface helps us do things like set distributed fields +set!(u::Field, f::Function) = set_to_function!(u, f) +set!(u::Field, a::Union{Array, CuArray, OffsetArray}) = set_to_array!(u, a) +set!(u::Field, v::Field) = set_to_field!(u, v) + function set!(u::Field, v) u .= v # fallback return u end -function tuple_string(tup::Tuple) - str = prod(string(t, ", ") for t in tup) - return str[1:end-2] # remove trailing ", " -end - -tuple_string(tup::Tuple{}) = "" - -function set!(u::Field, f::Function) +##### +##### Setting to specific things +##### +function set_to_function!(u, f) # Determine cpu_grid and cpu_u if architecture(u) isa GPU cpu_grid = on_architecture(CPU(), u.grid) @@ -40,7 +56,7 @@ function set!(u::Field, f::Function) # Form a FunctionField from `f` f_field = field(location(u), f, cpu_grid) - # Try to set the FuncitonField to cpu_u + # Try to set the FunctionField to cpu_u try set!(cpu_u, f_field) catch err @@ -69,7 +85,7 @@ function set!(u::Field, f::Function) return u end -function set!(u::Field, f::Union{Array, CuArray, OffsetArray}) +function set_to_array!(u, f) f = on_architecture(architecture(u), f) try @@ -91,7 +107,7 @@ function set!(u::Field, f::Union{Array, CuArray, OffsetArray}) return u end -function set!(u::Field, v::Field) +function set_to_field!(u, v) # We implement some niceities in here that attempt to copy halo data, # and revert to copying just interior points if that fails. From e21d370246651fd1fa2eee06e14a5544396d063c Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Wed, 2 Oct 2024 18:26:23 -0400 Subject: [PATCH 2/9] Cleanup --- src/Oceananigans.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Oceananigans.jl b/src/Oceananigans.jl index 32bc2ce537..2abd97e7a0 100644 --- a/src/Oceananigans.jl +++ b/src/Oceananigans.jl @@ -213,7 +213,6 @@ include("DistributedComputations/DistributedComputations.jl") # TODO: move here #include("ImmersedBoundaries/ImmersedBoundaries.jl") -#include("Distributed/Distributed.jl") #include("MultiRegion/MultiRegion.jl") # Physics, time-stepping, and models @@ -226,8 +225,6 @@ include("Biogeochemistry.jl") # TODO: move above include("ImmersedBoundaries/ImmersedBoundaries.jl") -# include("DistributedComputations/DistributedComputations.jl") - include("Models/Models.jl") # Output and Physics, time-stepping, and models From a5efac0fabb42fde1f67615e45485fead2172e82 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Wed, 2 Oct 2024 23:28:11 -0400 Subject: [PATCH 3/9] Move towards better partition syntax --- src/MultiRegion/multi_region_grid.jl | 4 ++-- src/MultiRegion/x_partitions.jl | 8 ++++---- src/MultiRegion/y_partitions.jl | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/MultiRegion/multi_region_grid.jl b/src/MultiRegion/multi_region_grid.jl index 057975cd7f..0c67d75a49 100644 --- a/src/MultiRegion/multi_region_grid.jl +++ b/src/MultiRegion/multi_region_grid.jl @@ -203,8 +203,8 @@ Adapt an array `a` to be compatible with a `MultiRegionGrid`. function multi_region_object_from_array(a::AbstractArray, mrg::MultiRegionGrid) local_size = construct_regionally(size, mrg) arch = architecture(mrg) - a = on_architecture(CPU(), a) - ma = construct_regionally(partition_global_array, a, mrg.partition, local_size, Iterate(1:length(mrg)), arch) + a = on_architecture(CPU(), a) + ma = construct_regionally(partition, a, mrg.partition, local_size, Iterate(1:length(mrg)), arch) return ma end diff --git a/src/MultiRegion/x_partitions.jl b/src/MultiRegion/x_partitions.jl index 92fb3cf930..328de0d799 100644 --- a/src/MultiRegion/x_partitions.jl +++ b/src/MultiRegion/x_partitions.jl @@ -54,15 +54,15 @@ divide_direction(x::Tuple, p::XPartition) = divide_direction(x::AbstractArray, p::XPartition) = Tuple(x[1+sum(p.div[1:i-1]):1+sum(p.div[1:i])] for i in 1:length(p)) -partition_global_array(a::Function, args...) = a -partition_global_array(a::Field, p::EqualXPartition, args...) = partition_global_array(a.data, p, args...) +partition(a::Function, args...) = a +partition(a::Field, p::EqualXPartition, args...) = partition(a.data, p, args...) -function partition_global_array(a::AbstractArray, ::EqualXPartition, local_size, region, arch) +function partition(a::AbstractArray, ::EqualXPartition, local_size, region, arch) idxs = default_indices(length(size(a))) return on_architecture(arch, a[local_size[1]*(region-1)+1:local_size[1]*region, idxs[2:end]...]) end -function partition_global_array(a::OffsetArray, ::EqualXPartition, local_size, region, arch) +function partition(a::OffsetArray, ::EqualXPartition, local_size, region, arch) idxs = default_indices(length(size(a))) offsets = (a.offsets[1], Tuple(0 for i in 1:length(idxs)-1)...) return on_architecture(arch, OffsetArray(a[local_size[1]*(region-1)+1+offsets[1]:local_size[1]*region-offsets[1], idxs[2:end]...], offsets...)) diff --git a/src/MultiRegion/y_partitions.jl b/src/MultiRegion/y_partitions.jl index 06374a5a58..4780891568 100644 --- a/src/MultiRegion/y_partitions.jl +++ b/src/MultiRegion/y_partitions.jl @@ -50,15 +50,15 @@ function divide_direction(x::AbstractArray, p::EqualYPartition) return Tuple(x[1+(i-1)*nelem:1+i*nelem] for i in 1:length(p)) end -partition_global_array(a::Field, p::EqualYPartition, args...) = partition_global_array(a.data, p, args...) +partition(a::Field, p::EqualYPartition, args...) = partition(a.data, p, args...) -function partition_global_array(a::AbstractArray, ::EqualYPartition, local_size, region, arch) +function partition(a::AbstractArray, ::EqualYPartition, local_size, region, arch) idxs = default_indices(length(size(a))) offsets = (a.offsets[1], Tuple(0 for i in 1:length(idxs)-1)...) return on_architecture(arch, OffsetArray(a[local_size[1]*(region-1)+1+offsets[1]:local_size[1]*region-offsets[1], idxs[2:end]...], offsets...)) end -function partition_global_array(a::OffsetArray, ::EqualYPartition, local_size, region, arch) +function partition(a::OffsetArray, ::EqualYPartition, local_size, region, arch) idxs = default_indices(length(size(a))) offsets = (0, a.offsets[2], Tuple(0 for i in 1:length(idxs)-2)...) return on_architecture(arch, OffsetArray(a[idxs[1], local_size[2]*(region-1)+1+offsets[2]:local_size[2]*region-offsets[2], idxs[3:end]...], offsets...)) From 07be47d38cc7d162f6857daa1b054c6814427286 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Thu, 3 Oct 2024 00:21:14 -0400 Subject: [PATCH 4/9] Clean up distributed stuff --- .../distributed_fields.jl | 66 ++++---- .../distributed_grids.jl | 2 +- .../partition_assemble.jl | 148 ++++++++++-------- 3 files changed, 119 insertions(+), 97 deletions(-) diff --git a/src/DistributedComputations/distributed_fields.jl b/src/DistributedComputations/distributed_fields.jl index 026c452e4d..17f5ef930e 100644 --- a/src/DistributedComputations/distributed_fields.jl +++ b/src/DistributedComputations/distributed_fields.jl @@ -1,16 +1,20 @@ -import Oceananigans.Fields: Field, FieldBoundaryBuffers, location, set! -import Oceananigans.BoundaryConditions: fill_halo_regions! - using CUDA: CuArray +using OffsetArrays: OffsetArray using Oceananigans.Grids: topology -using Oceananigans.Fields: validate_field_data, indices, validate_boundary_conditions, validate_indices, recv_from_buffers! +using Oceananigans.Fields: validate_field_data, indices, validate_boundary_conditions +using Oceananigans.Fields: validate_indices, recv_from_buffers!, set_to_array!, set_to_field! + +import Oceananigans.Fields: Field, FieldBoundaryBuffers, location, set! +import Oceananigans.BoundaryConditions: fill_halo_regions! function Field((LX, LY, LZ)::Tuple, grid::DistributedGrid, data, old_bcs, indices::Tuple, op, status) - arch = architecture(grid) indices = validate_indices(indices, (LX, LY, LZ), grid) validate_field_data((LX, LY, LZ), data, grid, indices) validate_boundary_conditions((LX, LY, LZ), grid, old_bcs) - new_bcs = inject_halo_communication_boundary_conditions(old_bcs, arch.local_rank, arch.connectivity, topology(grid)) + + arch = architecture(grid) + rank = arch.local_rank + new_bcs = inject_halo_communication_boundary_conditions(old_bcs, rank, arch.connectivity, topology(grid)) buffers = FieldBoundaryBuffers(grid, data, new_bcs) return Field{LX, LY, LZ}(grid, data, new_bcs, indices, op, status, buffers) @@ -19,42 +23,36 @@ end const DistributedField = Field{<:Any, <:Any, <:Any, <:Any, <:DistributedGrid} const DistributedFieldTuple = NamedTuple{S, <:NTuple{N, DistributedField}} where {S, N} -function set!(u::DistributedField, f::Function) - arch = architecture(u) - if child_architecture(arch) isa GPU - cpu_grid = on_architecture(cpu_architecture(arch), u.grid) - u_cpu = Field(location(u), cpu_grid, eltype(u); indices = indices(u)) - f_field = field(location(u), f, cpu_grid) - set!(u_cpu, f_field) - set!(u, u_cpu) - elseif child_architecture(arch) isa CPU - f_field = field(location(u), f, u.grid) - set!(u, f_field) +global_size(f::DistributedField) = global_size(architecture(f), size(f)) + +# Automatically partition under the hood if sizes are compatible +function set!(u::DistributedField, V::Union{Array, CuArray, OffsetArray}) + NV = size(V) + Nu = global_size(u) + + # Suppress singleton indices + NV′ = filter(n -> n > 1, NV) + Nu′ = filter(n -> n > 1, Nu) + + if NV′ == Nu′ + v = partition(V, u) + else + v = V end - return u + return set_to_array!(u, v) end -# Automatically partition under the hood if sizes are compatible -function set!(u::DistributedField, v::Union{Array, CuArray}) - gsize = global_size(architecture(u), size(u)) - - if size(v) == gsize - f = partition_global_array(architecture(u), v, size(u)) - u .= f - return u +function set!(u::DistributedField, V::Field) + if size(V) == global_size(u) + v = partition(V, u) + return set_to_array!(u, v) else - try - f = on_architecture(architecture(u), v) - u .= f - return u - - catch - throw(ArgumentError("ERROR: DimensionMismatch: array could not be set to match destination field")) - end + return set_to_field!(u, V) end end + """ synchronize_communication!(field) diff --git a/src/DistributedComputations/distributed_grids.jl b/src/DistributedComputations/distributed_grids.jl index 686f9c9541..2e205ee95d 100644 --- a/src/DistributedComputations/distributed_grids.jl +++ b/src/DistributedComputations/distributed_grids.jl @@ -347,4 +347,4 @@ function reconstruct_global_topology(T, R, r, r1, r2, comm) else return Bounded end -end \ No newline at end of file +end diff --git a/src/DistributedComputations/partition_assemble.jl b/src/DistributedComputations/partition_assemble.jl index c56d0b2f22..00904016bc 100644 --- a/src/DistributedComputations/partition_assemble.jl +++ b/src/DistributedComputations/partition_assemble.jl @@ -1,8 +1,8 @@ -import Oceananigans.Architectures: on_architecture +using Oceananigans.Fields: Field -all_reduce(op, val, arch::Distributed) = - MPI.Allreduce(val, op, arch.communicator) +import Oceananigans.Architectures: on_architecture +all_reduce(op, val, arch::Distributed) = MPI.Allreduce(val, op, arch.communicator) all_reduce(op, val, arch) = val # MPI Barrier @@ -10,52 +10,57 @@ barrier!(arch) = nothing barrier!(arch::Distributed) = MPI.Barrier(arch.communicator) """ - concatenate_local_sizes(n, arch::Distributed) + concatenate_local_sizes(local_size, arch::Distributed) -Return a 3-Tuple containing a vector of `size(grid, idx)` for each rank in +Return a 3-Tuple containing a vector of `size(grid, dim)` for each rank in all 3 directions. """ -concatenate_local_sizes(n, arch::Distributed) = - Tuple(concatenate_local_sizes(n, arch, i) for i in 1:length(n)) +concatenate_local_sizes(local_size, arch::Distributed) = + Tuple(concatenate_local_sizes(local_size, arch, d) for d in 1:length(local_size)) + +concatenate_local_sizes(sz, arch, dim) = concatenate_local_sizes(sz[dim], arch, dim) -function concatenate_local_sizes(n, arch::Distributed, idx) - R = arch.ranks[idx] - r = arch.local_index[idx] - n = n isa Number ? n : n[idx] - l = zeros(Int, R) +function concatenate_local_sizes(n::Number, arch::Distributed, dim) + R = arch.ranks[dim] + r = arch.local_index[dim] + N = zeros(Int, R) - r1, r2 = arch.local_index[[1, 2, 3] .!= idx] + r1, r2 = arch.local_index[[1, 2, 3] .!= dim] if r1 == 1 && r2 == 1 - l[r] = n + N[r] = n end - MPI.Allreduce!(l, +, arch.communicator) + MPI.Allreduce!(N, +, arch.communicator) - return l + return N end -# Partitioning (localization of global objects) and assembly (global assembly of local objects) -# Used for grid constructors (cpu_face_constructor_x, cpu_face_constructor_y, cpu_face_constructor_z) -# We need to repeat the value at the right boundary -function partition_coordinate(c::AbstractVector, n, arch, idx) - nl = concatenate_local_sizes(n, arch, idx) - r = arch.local_index[idx] +""" + partition_coordinate(coordinate, n, arch, dim) + +Return the local component of the global `coordinate`, which has +local length `n` and is distributed on `arch`itecture +in the x-, y-, or z- `dim`ension. +""" +function partition_coordinate(c::AbstractVector, n, arch, dim) + nl = concatenate_local_sizes(n, arch, dim) + r = arch.local_index[dim] start_idx = sum(nl[1:r-1]) + 1 # sum of all previous rank's dimension + 1 - end_idx = if r == ranks(arch)[idx] + end_idx = if r == ranks(arch)[dim] length(c) else sum(nl[1:r]) + 1 end - return c[start_idx : end_idx] + return c[start_idx:end_idx] end -function partition_coordinate(c::Tuple, n, arch, idx) - nl = concatenate_local_sizes(n, arch, idx) +function partition_coordinate(c::Tuple, n, arch, dim) + nl = concatenate_local_sizes(n, arch, dim) N = sum(nl) - R = arch.ranks[idx] + R = arch.ranks[dim] Δl = (c[2] - c[1]) / N l = Tuple{Float64, Float64}[(c[1], c[1] + Δl * nl[1])] @@ -64,7 +69,7 @@ function partition_coordinate(c::Tuple, n, arch, idx) push!(l, (lp, lp + Δl * nl[i])) end - return l[arch.local_index[idx]] + return l[arch.local_index[dim]] end """ @@ -75,11 +80,11 @@ a local number of elements `Nc`, number of ranks `Nr`, rank `r`, and `arch`itecture. Since we use a global reduction, only ranks at positions 1 in the other two directions `r1 == 1` and `r2 == 1` fill the 1D array. """ -function assemble_coordinate(c_local::AbstractVector, n, arch, idx) - nl = concatenate_local_sizes(n, arch, idx) - R = arch.ranks[idx] - r = arch.local_index[idx] - r2 = [arch.local_index[i] for i in filter(x -> x != idx, (1, 2, 3))] +function assemble_coordinate(c_local::AbstractVector, n, arch, dim) + nl = concatenate_local_sizes(n, arch, dim) + R = arch.ranks[dim] + r = arch.local_index[dim] + r2 = [arch.local_index[i] for i in filter(x -> x != dim, (1, 2, 3))] c_global = zeros(eltype(c_local), sum(nl)+1) @@ -94,13 +99,13 @@ function assemble_coordinate(c_local::AbstractVector, n, arch, idx) end # Simple case, just take the first and the last core -function assemble_coordinate(c_local::Tuple, n, arch, idx) +function assemble_coordinate(c_local::Tuple, n, arch, dim) c_global = zeros(Float64, 2) rank = arch.local_index - R = arch.ranks[idx] - r = rank[idx] - r2 = [rank[i] for i in filter(x -> x != idx, (1, 2, 3))] + R = arch.ranks[dim] + r = rank[dim] + r2 = [rank[i] for i in filter(x -> x != dim, (1, 2, 3))] if rank[1] == 1 && rank[2] == 1 && rank[3] == 1 c_global[1] = c_local[1] @@ -113,42 +118,61 @@ function assemble_coordinate(c_local::Tuple, n, arch, idx) return tuple(c_global...) end -# TODO: partition_global_array and construct_global_array -# do not currently work for 3D parallelizations -# (They are not used anywhere in the code at the moment exept for immersed boundaries) +# TODO: make partition and construct_global_array work for 3D distribution. + """ - partition_global_array(arch, c_global, (nx, ny, nz)) + partition(A, b) + +Partition the globally-sized `A` into local arrays with the same size as `b`. +""" +partition(A, b::Field) = partition(A, architecture(b), size(b)) +partition(F::Field, b::Field) = partition(interior(F), b) +partition(f::Function, arch, n) = f -Partition a global array in local arrays of size `(nx, ny)` if 2D or `(nx, ny, nz)` is 3D. -Usefull for boundary arrays, forcings and initial conditions. """ -partition_global_array(arch, c_global::AbstractArray, n) = c_global -partition_global_array(arch, c_global::Function, n) = c_global + partition(A, arch, local_size) -# Here we assume that we cannot partition in z (we should remove support for that) -function partition_global_array(arch::Distributed, c_global::AbstractArray, n) - c_global = on_architecture(CPU(), c_global) +Partition the globally-sized `A` into local arrays with `local_size` on `arch`itecture. +""" +function partition(A::AbstractArray, arch::Distributed, local_size) + A = on_architecture(CPU(), A) ri, rj, rk = arch.local_index + dims = length(size(A)) - dims = length(size(c_global)) - nx, ny, nz = concatenate_local_sizes(n, arch) + # Vectors with the local size for every rank + nxs, nys, nzs = concatenate_local_sizes(local_size, arch) - nz = nz[1] + # The local size + nx = nxs[ri] + ny = nys[rj] + nz = nzs[1] + # @assert (nx, ny, nz) == local_size - if dims == 2 - c_local = zeros(eltype(c_global), nx[ri], ny[rj]) + up_to = nxs[1:ri-1] + including = nxs[1:ri] + i₁ = sum(up_to) + 1 + i₂ = sum(including) - c_local .= c_global[1 + sum(nx[1:ri-1]) : sum(nx[1:ri]), - 1 + sum(ny[1:rj-1]) : sum(ny[1:rj])] - else - c_local = zeros(eltype(c_global), nx[ri], ny[rj], nz) + up_to = nys[1:rj-1] + including = nys[1:rj] + j₁ = sum(up_to) + 1 + j₂ = sum(including) - c_local .= c_global[1 + sum(nx[1:ri-1]) : sum(nx[1:ri]), - 1 + sum(ny[1:rj-1]) : sum(ny[1:rj]), - 1:nz] + ii = UnitRange(i₁, i₂) + jj = UnitRange(j₁, j₂) + kk = 1:nz # no partitioning in z + + # TODO: undo this toxic assumption that all 2D arrays span x, y. + if dims == 2 + a = zeros(eltype(A), nx, ny) + a .= A[ii, jj] + else + a = zeros(eltype(A), nx, ny, nz) + a .= A[ii, jj, 1:nz] end - return on_architecture(child_architecture(arch), c_local) + + return on_architecture(child_architecture(arch), a) end """ @@ -160,7 +184,7 @@ Usefull for boundary arrays, forcings and initial conditions. construct_global_array(arch, c_local::AbstractArray, n) = c_local construct_global_array(arch, c_local::Function, N) = c_local -# TODO: This does not work for 3D parallelizations!!! +# TODO: This does not work for 3D parallelizations function construct_global_array(arch::Distributed, c_local::AbstractArray, n) c_local = on_architecture(CPU(), c_local) From 0a3f395490f455fd3047c003d395e8b2650e6764 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Thu, 3 Oct 2024 00:21:23 -0400 Subject: [PATCH 5/9] Generalize set! to encompass distributed architectures --- src/Fields/set!.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Fields/set!.jl b/src/Fields/set!.jl index 54bd557997..330a96b793 100644 --- a/src/Fields/set!.jl +++ b/src/Fields/set!.jl @@ -45,10 +45,10 @@ end function set_to_function!(u, f) # Determine cpu_grid and cpu_u - if architecture(u) isa GPU + if child_architecture(u) isa GPU cpu_grid = on_architecture(CPU(), u.grid) cpu_u = Field(location(u), cpu_grid; indices = indices(u)) - elseif architecture(u) isa CPU + elseif child_architecture(u) isa CPU cpu_grid = u.grid cpu_u = u end @@ -78,7 +78,7 @@ function set_to_function!(u, f) end # Transfer data to GPU if u is on the GPU - if architecture(u) isa GPU + if child_architecture(u) isa GPU set!(u, cpu_u) end @@ -111,7 +111,7 @@ function set_to_field!(u, v) # We implement some niceities in here that attempt to copy halo data, # and revert to copying just interior points if that fails. - if architecture(u) === architecture(v) + if child_architecture(u) === child_architecture(v) # Note: we could try to copy first halo point even when halo # regions are a different size. That's a bit more complicated than # the below so we leave it for the future. @@ -123,7 +123,7 @@ function set_to_field!(u, v) interior(u) .= interior(v) end else - v_data = on_architecture(architecture(u), v.data) + v_data = on_architecture(child_architecture(u), v.data) # As above, we permit ourselves a little ambition and try to copy halo data: try From 44893f67d3d5027a10669c8f57b0d80f782c49a0 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Thu, 3 Oct 2024 08:27:13 -0400 Subject: [PATCH 6/9] Define child_architecture for AbstractField --- src/Architectures.jl | 2 +- src/Fields/abstract_field.jl | 3 ++- src/Fields/set!.jl | 9 ++++++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index d45bbe4765..49743244f4 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -59,7 +59,7 @@ architecture(a::OffsetArray) = architecture(parent(a)) Return `arch`itecture of child processes. On single-process, non-distributed systems, return `arch`. """ -child_architecture(arch) = arch +child_architecture(arch::AbstractSerialArchitecture) = arch array_type(::CPU) = Array array_type(::GPU) = CuArray diff --git a/src/Fields/abstract_field.jl b/src/Fields/abstract_field.jl index 502e4c7097..c8c9b7b5f1 100644 --- a/src/Fields/abstract_field.jl +++ b/src/Fields/abstract_field.jl @@ -10,7 +10,7 @@ using Oceananigans.Grids: interior_indices, interior_parent_indices import Base: minimum, maximum, extrema import Oceananigans: location, instantiated_location -import Oceananigans.Architectures: architecture +import Oceananigans.Architectures: architecture, child_architecture import Oceananigans.Grids: interior_x_indices, interior_y_indices, interior_z_indices import Oceananigans.Grids: total_size, topology, nodes, xnodes, ynodes, znodes, node, xnode, ynode, znode import Oceananigans.Utils: datatuple @@ -43,6 +43,7 @@ Base.eltype(::AbstractField{<:Any, <:Any, <:Any, <:Any, T}) where T = T "Returns the architecture of on which `f` is defined." architecture(f::AbstractField) = architecture(f.grid) +child_architecture(f::AbstractField) = child_architecture(architecture(f)) "Returns the topology of a fields' `grid`." @inline topology(f::AbstractField, args...) = topology(f.grid, args...) diff --git a/src/Fields/set!.jl b/src/Fields/set!.jl index 330a96b793..e4b8a29cb6 100644 --- a/src/Fields/set!.jl +++ b/src/Fields/set!.jl @@ -3,7 +3,7 @@ using KernelAbstractions: @kernel, @index using Adapt: adapt_structure using Oceananigans.Grids: on_architecture, node_names -using Oceananigans.Architectures: device, GPU, CPU +using Oceananigans.Architectures: child_architecture, device, GPU, CPU using Oceananigans.Utils: work_layout ##### @@ -44,11 +44,14 @@ end ##### function set_to_function!(u, f) + # Supports serial and distributed + arch = child_architecture(u) + # Determine cpu_grid and cpu_u - if child_architecture(u) isa GPU + if arch isa GPU cpu_grid = on_architecture(CPU(), u.grid) cpu_u = Field(location(u), cpu_grid; indices = indices(u)) - elseif child_architecture(u) isa CPU + elseif arch isa CPU cpu_grid = u.grid cpu_u = u end From ecbbb78036d53888b64c92aff0eac26687c46438 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Thu, 3 Oct 2024 11:47:08 -0400 Subject: [PATCH 7/9] Clean up scatter_local_grids (but more is needed) --- .../DistributedComputations.jl | 2 +- src/DistributedComputations/distributed_grids.jl | 14 +++++++------- .../distributed_immersed_boundaries.jl | 10 +++++++--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/DistributedComputations/DistributedComputations.jl b/src/DistributedComputations/DistributedComputations.jl index f3c1880d61..205997ce24 100644 --- a/src/DistributedComputations/DistributedComputations.jl +++ b/src/DistributedComputations/DistributedComputations.jl @@ -2,7 +2,7 @@ module DistributedComputations export Distributed, Partition, Equal, Fractional, - child_architecture, reconstruct_global_grid, + child_architecture, reconstruct_global_grid, partition, inject_halo_communication_boundary_conditions, DistributedFFTBasedPoissonSolver diff --git a/src/DistributedComputations/distributed_grids.jl b/src/DistributedComputations/distributed_grids.jl index 2e205ee95d..1b52477149 100644 --- a/src/DistributedComputations/distributed_grids.jl +++ b/src/DistributedComputations/distributed_grids.jl @@ -176,7 +176,7 @@ function reconstruct_global_grid(grid::DistributedRectilinearGrid) nx, ny, nz = n = size(grid) Hx, Hy, Hz = H = halo_size(grid) - Nx, Ny, Nz = map(sum, concatenate_local_sizes(n, arch)) + Nx, Ny, Nz = global_size(arch, n) TX, TY, TZ = topology(grid) @@ -219,7 +219,7 @@ function reconstruct_global_grid(grid::DistributedLatitudeLongitudeGrid) nλ, nφ, nz = n = size(grid) Hλ, Hφ, Hz = H = halo_size(grid) - Nλ, Nφ, Nz = map(sum, concatenate_local_sizes(n, arch)) + Nλ, Nφ, Nz = global_size(arch, n) TX, TY, TZ = topology(grid) @@ -266,12 +266,12 @@ end # take precedence on `DistributedGrid` function with_halo(new_halo, grid::DistributedRectilinearGrid) new_grid = with_halo(new_halo, reconstruct_global_grid(grid)) - return scatter_local_grids(architecture(grid), new_grid, size(grid)) + return scatter_local_grids(new_grid, architecture(grid), size(grid)) end function with_halo(new_halo, grid::DistributedLatitudeLongitudeGrid) new_grid = with_halo(new_halo, reconstruct_global_grid(grid)) - return scatter_local_grids(architecture(grid), new_grid, size(grid)) + return scatter_local_grids(new_grid, architecture(grid), size(grid)) end # Extending child_architecture for grids @@ -295,13 +295,13 @@ function scatter_grid_properties(global_grid) return x, y, z, topo, halo end -function scatter_local_grids(arch::Distributed, global_grid::RectilinearGrid, local_size) +function scatter_local_grids(global_grid::RectilinearGrid, arch::Distributed, local_size) x, y, z, topo, halo = scatter_grid_properties(global_grid) global_sz = global_size(arch, local_size) return RectilinearGrid(arch, eltype(global_grid); size=global_sz, x=x, y=y, z=z, halo=halo, topology=topo) end -function scatter_local_grids(arch::Distributed, global_grid::LatitudeLongitudeGrid, local_size) +function scatter_local_grids(global_grid::LatitudeLongitudeGrid, arch::Distributed, local_size) x, y, z, topo, halo = scatter_grid_properties(global_grid) global_sz = global_size(arch, local_size) return LatitudeLongitudeGrid(arch, eltype(global_grid); size=global_sz, longitude=x, @@ -347,4 +347,4 @@ function reconstruct_global_topology(T, R, r, r1, r2, comm) else return Bounded end -end +end \ No newline at end of file diff --git a/src/ImmersedBoundaries/distributed_immersed_boundaries.jl b/src/ImmersedBoundaries/distributed_immersed_boundaries.jl index 726edda8be..4b19202b92 100644 --- a/src/ImmersedBoundaries/distributed_immersed_boundaries.jl +++ b/src/ImmersedBoundaries/distributed_immersed_boundaries.jl @@ -26,12 +26,16 @@ function with_halo(new_halo, grid::DistributedImmersedBoundaryGrid) return ImmersedBoundaryGrid(new_underlying_grid, new_immersed_boundary) end -function scatter_local_grids(arch::Distributed, global_grid::ImmersedBoundaryGrid, local_size) +function scatter_local_grids(global_grid::ImmersedBoundaryGrid, arch::Distributed, local_size) ib = global_grid.immersed_boundary ug = global_grid.underlying_grid - local_ug = scatter_local_grids(arch, ug, local_size) - local_ib = getnamewrapper(ib)(partition_global_array(arch, ib.bottom_height, local_size)) + local_ug = scatter_local_grids(ug, arch, local_size) + + # Kinda hacky + local_bottom_height = partition(ib.bottom_height, arch, local_size) + ImmersedBoundaryConstructor = getnamewrapper(ib) + local_ib = ImmersedBoundaryConstructor(local_bottom_height) return ImmersedBoundaryGrid(local_ug, local_ib) end From 7c8b8808c269d72464b8a68b1c3c890de85d0b5b Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Thu, 3 Oct 2024 11:52:10 -0400 Subject: [PATCH 8/9] Update tests --- ...ostatic_free_turbulence_regression_test.jl | 10 ++-- ...n_large_eddy_simulation_regression_test.jl | 42 +++++++-------- .../rayleigh_benard_regression_test.jl | 54 +++++++++---------- .../thermal_bubble_regression_test.jl | 35 ++++++------ test/test_distributed_hydrostatic_model.jl | 12 ++--- 5 files changed, 76 insertions(+), 77 deletions(-) diff --git a/test/regression_tests/hydrostatic_free_turbulence_regression_test.jl b/test/regression_tests/hydrostatic_free_turbulence_regression_test.jl index 7b52aaadfc..8e52a910d5 100644 --- a/test/regression_tests/hydrostatic_free_turbulence_regression_test.jl +++ b/test/regression_tests/hydrostatic_free_turbulence_regression_test.jl @@ -6,7 +6,7 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels: HydrostaticFreeSurfaceMo using Oceananigans.TurbulenceClosures: HorizontalScalarDiffusivity using Oceananigans.DistributedComputations: Distributed, DistributedGrid, DistributedComputations, all_reduce -using Oceananigans.DistributedComputations: reconstruct_global_topology, partition_global_array, cpu_architecture +using Oceananigans.DistributedComputations: reconstruct_global_topology, partition, cpu_architecture using JLD2 @@ -121,10 +121,10 @@ function run_hydrostatic_free_turbulence_regression_test(grid, free_surface; reg # Data was saved with 2 halos per direction (see issue #3260) H = 2 truth_fields = ( - u = partition_global_array(cpu_arch, file["timeseries/u/$stop_iteration"][H+1:end-H, H+1:end-H, H+1:end-H], size(u)), - v = partition_global_array(cpu_arch, file["timeseries/v/$stop_iteration"][H+1:end-H, H+1:end-H, H+1:end-H], size(v)), - w = partition_global_array(cpu_arch, file["timeseries/w/$stop_iteration"][H+1:end-H, H+1:end-H, H+1:end-H], size(w)), - η = partition_global_array(cpu_arch, file["timeseries/η/$stop_iteration"][H+1:end-H, H+1:end-H, :], size(η)) + u = partition(file["timeseries/u/$stop_iteration"][H+1:end-H, H+1:end-H, H+1:end-H], cpu_arch, size(u)), + v = partition(file["timeseries/v/$stop_iteration"][H+1:end-H, H+1:end-H, H+1:end-H], cpu_arch, size(v)), + w = partition(file["timeseries/w/$stop_iteration"][H+1:end-H, H+1:end-H, H+1:end-H], cpu_arch, size(w)), + η = partition(file["timeseries/η/$stop_iteration"][H+1:end-H, H+1:end-H, :], cpu_arch, size(η)) ) close(file) diff --git a/test/regression_tests/ocean_large_eddy_simulation_regression_test.jl b/test/regression_tests/ocean_large_eddy_simulation_regression_test.jl index 5dfa63bae5..8a9ac391e9 100644 --- a/test/regression_tests/ocean_large_eddy_simulation_regression_test.jl +++ b/test/regression_tests/ocean_large_eddy_simulation_regression_test.jl @@ -1,6 +1,6 @@ using Oceananigans.TurbulenceClosures: AnisotropicMinimumDissipation using Oceananigans.TimeSteppers: update_state! -using Oceananigans.DistributedComputations: cpu_architecture, partition_global_array +using Oceananigans.DistributedComputations: cpu_architecture, partition function run_ocean_large_eddy_simulation_regression_test(arch, grid_type, closure) name = "ocean_large_eddy_simulation_" * string(typeof(first(closure)).name.wrapper) @@ -85,23 +85,23 @@ function run_ocean_large_eddy_simulation_regression_test(arch, grid_type, closur cpu_arch = cpu_architecture(architecture(grid)) - u₀ = partition_global_array(cpu_arch, ArrayType(solution₀.u[2:end-1, 2:end-1, 2:end-1]), size(u)) - v₀ = partition_global_array(cpu_arch, ArrayType(solution₀.v[2:end-1, 2:end-1, 2:end-1]), size(v)) - w₀ = partition_global_array(cpu_arch, ArrayType(solution₀.w[2:end-1, 2:end-1, 2:end-1]), size(w)) - T₀ = partition_global_array(cpu_arch, ArrayType(solution₀.T[2:end-1, 2:end-1, 2:end-1]), size(T)) - S₀ = partition_global_array(cpu_arch, ArrayType(solution₀.S[2:end-1, 2:end-1, 2:end-1]), size(S)) + u₀ = partition(ArrayType(solution₀.u[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(u)) + v₀ = partition(ArrayType(solution₀.v[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(v)) + w₀ = partition(ArrayType(solution₀.w[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(w)) + T₀ = partition(ArrayType(solution₀.T[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(T)) + S₀ = partition(ArrayType(solution₀.S[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(S)) - Gⁿu₀ = partition_global_array(cpu_arch, ArrayType(Gⁿ₀.u)[2:end-1, 2:end-1, 2:end-1], size(u)) - Gⁿv₀ = partition_global_array(cpu_arch, ArrayType(Gⁿ₀.v)[2:end-1, 2:end-1, 2:end-1], size(v)) - Gⁿw₀ = partition_global_array(cpu_arch, ArrayType(Gⁿ₀.w)[2:end-1, 2:end-1, 2:end-1], size(w)) - GⁿT₀ = partition_global_array(cpu_arch, ArrayType(Gⁿ₀.T)[2:end-1, 2:end-1, 2:end-1], size(T)) - GⁿS₀ = partition_global_array(cpu_arch, ArrayType(Gⁿ₀.S)[2:end-1, 2:end-1, 2:end-1], size(S)) + Gⁿu₀ = partition(ArrayType(Gⁿ₀.u)[2:end-1, 2:end-1, 2:end-1], cpu_arch, size(u)) + Gⁿv₀ = partition(ArrayType(Gⁿ₀.v)[2:end-1, 2:end-1, 2:end-1], cpu_arch, size(v)) + Gⁿw₀ = partition(ArrayType(Gⁿ₀.w)[2:end-1, 2:end-1, 2:end-1], cpu_arch, size(w)) + GⁿT₀ = partition(ArrayType(Gⁿ₀.T)[2:end-1, 2:end-1, 2:end-1], cpu_arch, size(T)) + GⁿS₀ = partition(ArrayType(Gⁿ₀.S)[2:end-1, 2:end-1, 2:end-1], cpu_arch, size(S)) - G⁻u₀ = partition_global_array(cpu_arch, ArrayType(G⁻₀.u)[2:end-1, 2:end-1, 2:end-1], size(u)) - G⁻v₀ = partition_global_array(cpu_arch, ArrayType(G⁻₀.v)[2:end-1, 2:end-1, 2:end-1], size(v)) - G⁻w₀ = partition_global_array(cpu_arch, ArrayType(G⁻₀.w)[2:end-1, 2:end-1, 2:end-1], size(w)) - G⁻T₀ = partition_global_array(cpu_arch, ArrayType(G⁻₀.T)[2:end-1, 2:end-1, 2:end-1], size(T)) - G⁻S₀ = partition_global_array(cpu_arch, ArrayType(G⁻₀.S)[2:end-1, 2:end-1, 2:end-1], size(S)) + G⁻u₀ = partition(ArrayType(G⁻₀.u)[2:end-1, 2:end-1, 2:end-1], cpu_arch, size(u)) + G⁻v₀ = partition(ArrayType(G⁻₀.v)[2:end-1, 2:end-1, 2:end-1], cpu_arch, size(v)) + G⁻w₀ = partition(ArrayType(G⁻₀.w)[2:end-1, 2:end-1, 2:end-1], cpu_arch, size(w)) + G⁻T₀ = partition(ArrayType(G⁻₀.T)[2:end-1, 2:end-1, 2:end-1], cpu_arch, size(T)) + G⁻S₀ = partition(ArrayType(G⁻₀.S)[2:end-1, 2:end-1, 2:end-1], cpu_arch, size(S)) interior(model.velocities.u) .= u₀ interior(model.velocities.v) .= v₀ @@ -142,11 +142,11 @@ function run_ocean_large_eddy_simulation_regression_test(arch, grid_type, closur T = Array(interior(model.tracers.T)), S = Array(interior(model.tracers.S))) - u₁ = partition_global_array(cpu_arch, Array(solution₁.u)[2:end-1, 2:end-1, 2:end-1], size(u)) - v₁ = partition_global_array(cpu_arch, Array(solution₁.v)[2:end-1, 2:end-1, 2:end-1], size(v)) - w₁ = partition_global_array(cpu_arch, Array(solution₁.w)[2:end-1, 2:end-1, 2:end-2], size(test_fields.w)) - T₁ = partition_global_array(cpu_arch, Array(solution₁.T)[2:end-1, 2:end-1, 2:end-1], size(T)) - S₁ = partition_global_array(cpu_arch, Array(solution₁.S)[2:end-1, 2:end-1, 2:end-1], size(S)) + u₁ = partition(Array(solution₁.u)[2:end-1, 2:end-1, 2:end-1], cpu_arch, size(u)) + v₁ = partition(Array(solution₁.v)[2:end-1, 2:end-1, 2:end-1], cpu_arch, size(v)) + w₁ = partition(Array(solution₁.w)[2:end-1, 2:end-1, 2:end-2], cpu_arch, size(test_fields.w)) + T₁ = partition(Array(solution₁.T)[2:end-1, 2:end-1, 2:end-1], cpu_arch, size(T)) + S₁ = partition(Array(solution₁.S)[2:end-1, 2:end-1, 2:end-1], cpu_arch, size(S)) @show size(test_fields.w), size(w₁) diff --git a/test/regression_tests/rayleigh_benard_regression_test.jl b/test/regression_tests/rayleigh_benard_regression_test.jl index dfaa78ef1d..30395690ac 100644 --- a/test/regression_tests/rayleigh_benard_regression_test.jl +++ b/test/regression_tests/rayleigh_benard_regression_test.jl @@ -1,6 +1,6 @@ using Oceananigans.Grids: xnode, znode using Oceananigans.TimeSteppers: update_state! -using Oceananigans.DistributedComputations: cpu_architecture, partition_global_array, reconstruct_global_grid +using Oceananigans.DistributedComputations: cpu_architecture, partition, reconstruct_global_grid function run_rayleigh_benard_regression_test(arch, grid_type) @@ -107,23 +107,23 @@ function run_rayleigh_benard_regression_test(arch, grid_type) cpu_arch = cpu_architecture(architecture(grid)) - u₀ = partition_global_array(cpu_arch, ArrayType(solution₀.u[2:end-1, 2:end-1, 2:end-1]), size(u)) - v₀ = partition_global_array(cpu_arch, ArrayType(solution₀.v[2:end-1, 2:end-1, 2:end-1]), size(v)) - w₀ = partition_global_array(cpu_arch, ArrayType(solution₀.w[2:end-1, 2:end-1, 2:end-1]), size(w)) - b₀ = partition_global_array(cpu_arch, ArrayType(solution₀.b[2:end-1, 2:end-1, 2:end-1]), size(b)) - c₀ = partition_global_array(cpu_arch, ArrayType(solution₀.c[2:end-1, 2:end-1, 2:end-1]), size(c)) + u₀ = partition(ArrayType(solution₀.u[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(u)) + v₀ = partition(ArrayType(solution₀.v[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(v)) + w₀ = partition(ArrayType(solution₀.w[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(w)) + b₀ = partition(ArrayType(solution₀.b[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(b)) + c₀ = partition(ArrayType(solution₀.c[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(c)) - Gⁿu₀ = partition_global_array(cpu_arch, ArrayType(Gⁿ₀.u[2:end-1, 2:end-1, 2:end-1]), size(u)) - Gⁿv₀ = partition_global_array(cpu_arch, ArrayType(Gⁿ₀.v[2:end-1, 2:end-1, 2:end-1]), size(v)) - Gⁿw₀ = partition_global_array(cpu_arch, ArrayType(Gⁿ₀.w[2:end-1, 2:end-1, 2:end-1]), size(w)) - Gⁿb₀ = partition_global_array(cpu_arch, ArrayType(Gⁿ₀.b[2:end-1, 2:end-1, 2:end-1]), size(b)) - Gⁿc₀ = partition_global_array(cpu_arch, ArrayType(Gⁿ₀.c[2:end-1, 2:end-1, 2:end-1]), size(c)) + Gⁿu₀ = partition(ArrayType(Gⁿ₀.u[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(u)) + Gⁿv₀ = partition(ArrayType(Gⁿ₀.v[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(v)) + Gⁿw₀ = partition(ArrayType(Gⁿ₀.w[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(w)) + Gⁿb₀ = partition(ArrayType(Gⁿ₀.b[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(b)) + Gⁿc₀ = partition(ArrayType(Gⁿ₀.c[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(c)) - G⁻u₀ = partition_global_array(cpu_arch, ArrayType(G⁻₀.u[2:end-1, 2:end-1, 2:end-1]), size(u)) - G⁻v₀ = partition_global_array(cpu_arch, ArrayType(G⁻₀.v[2:end-1, 2:end-1, 2:end-1]), size(v)) - G⁻w₀ = partition_global_array(cpu_arch, ArrayType(G⁻₀.w[2:end-1, 2:end-1, 2:end-1]), size(w)) - G⁻b₀ = partition_global_array(cpu_arch, ArrayType(G⁻₀.b[2:end-1, 2:end-1, 2:end-1]), size(b)) - G⁻c₀ = partition_global_array(cpu_arch, ArrayType(G⁻₀.c[2:end-1, 2:end-1, 2:end-1]), size(c)) + G⁻u₀ = partition(ArrayType(G⁻₀.u[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(u)) + G⁻v₀ = partition(ArrayType(G⁻₀.v[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(v)) + G⁻w₀ = partition(ArrayType(G⁻₀.w[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(w)) + G⁻b₀ = partition(ArrayType(G⁻₀.b[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(b)) + G⁻c₀ = partition(ArrayType(G⁻₀.c[2:end-1, 2:end-1, 2:end-1]), cpu_arch, size(c)) set!(model, u = u₀, v = v₀, w = w₀, b = b₀, c = c₀) @@ -171,20 +171,20 @@ function run_rayleigh_benard_regression_test(arch, grid_type) b₁ = interior(solution₁.b, global_grid) c₁ = interior(solution₁.c, global_grid) - correct_fields = (u = partition_global_array(cpu_arch, Array(u₁), size(u)), - v = partition_global_array(cpu_arch, Array(v₁), size(v)), - w = partition_global_array(cpu_arch, Array(w₁), size(test_fields.w)), - b = partition_global_array(cpu_arch, Array(b₁), size(b)), - c = partition_global_array(cpu_arch, Array(c₁), size(c))) + reference_fields = (u = partition(Array(u₁), cpu_arch, size(u)), + v = partition(Array(v₁), cpu_arch, size(v)), + w = partition(Array(w₁), cpu_arch, size(test_fields.w)), + b = partition(Array(b₁), cpu_arch, size(b)), + c = partition(Array(c₁), cpu_arch, size(c))) - summarize_regression_test(test_fields, correct_fields) + summarize_regression_test(test_fields, reference_fields) CUDA.allowscalar(true) - @test all(test_fields.u .≈ correct_fields.u) - @test all(test_fields.v .≈ correct_fields.v) - @test all(test_fields.w .≈ correct_fields.w) - @test all(test_fields.b .≈ correct_fields.b) - @test all(test_fields.c .≈ correct_fields.c) + @test all(test_fields.u .≈ reference_fields.u) + @test all(test_fields.v .≈ reference_fields.v) + @test all(test_fields.w .≈ reference_fields.w) + @test all(test_fields.b .≈ reference_fields.b) + @test all(test_fields.c .≈ reference_fields.c) CUDA.allowscalar(false) return nothing diff --git a/test/regression_tests/thermal_bubble_regression_test.jl b/test/regression_tests/thermal_bubble_regression_test.jl index 151346e317..24e6c90dbf 100644 --- a/test/regression_tests/thermal_bubble_regression_test.jl +++ b/test/regression_tests/thermal_bubble_regression_test.jl @@ -1,4 +1,4 @@ -using Oceananigans.DistributedComputations: cpu_architecture, partition_global_array +using Oceananigans.DistributedComputations: cpu_architecture, partition function run_thermal_bubble_regression_test(arch, grid_type) Nx, Ny, Nz = 16, 16, 16 @@ -73,28 +73,27 @@ function run_thermal_bubble_regression_test(arch, grid_type) copyto!(test_fields.T, interior(model.tracers.T)) copyto!(test_fields.S, interior(model.tracers.S)) - correct_fields = (u = ds["u"][:, :, :, end], - v = ds["v"][:, :, :, end], - w = ds["w"][:, :, :, end], - T = ds["T"][:, :, :, end], - S = ds["S"][:, :, :, end]) + reference_fields = (u = ds["u"][:, :, :, end], + v = ds["v"][:, :, :, end], + w = ds["w"][:, :, :, end], + T = ds["T"][:, :, :, end], + S = ds["S"][:, :, :, end]) cpu_arch = cpu_architecture(architecture(grid)) - correct_fields = (u = partition_global_array(cpu_arch, correct_fields.u, size(correct_fields.u)), - v = partition_global_array(cpu_arch, correct_fields.v, size(correct_fields.v)), - w = partition_global_array(cpu_arch, correct_fields.w, size(correct_fields.w)), - T = partition_global_array(cpu_arch, correct_fields.T, size(correct_fields.T)), - S = partition_global_array(cpu_arch, correct_fields.S, size(correct_fields.S)) - ) + reference_fields = (u = partition(reference_fields.u, cpu_arch, size(reference_fields.u)), + v = partition(reference_fields.v, cpu_arch, size(reference_fields.v)), + w = partition(reference_fields.w, cpu_arch, size(reference_fields.w)), + T = partition(reference_fields.T, cpu_arch, size(reference_fields.T)), + S = partition(reference_fields.S, cpu_arch, size(reference_fields.S))) - summarize_regression_test(test_fields, correct_fields) + summarize_regression_test(test_fields, reference_fields) - @test all(test_fields.u .≈ correct_fields.u) - @test all(test_fields.v .≈ correct_fields.v) - @test all(test_fields.w .≈ correct_fields.w) - @test all(test_fields.T .≈ correct_fields.T) - @test all(test_fields.S .≈ correct_fields.S) + @test all(test_fields.u .≈ reference_fields.u) + @test all(test_fields.v .≈ reference_fields.v) + @test all(test_fields.w .≈ reference_fields.w) + @test all(test_fields.T .≈ reference_fields.T) + @test all(test_fields.S .≈ reference_fields.S) return nothing end diff --git a/test/test_distributed_hydrostatic_model.jl b/test/test_distributed_hydrostatic_model.jl index d1cf7fd622..c11d3c5248 100644 --- a/test/test_distributed_hydrostatic_model.jl +++ b/test/test_distributed_hydrostatic_model.jl @@ -26,7 +26,7 @@ MPI.Initialized() || MPI.Init() # to initialize MPI. using Oceananigans.Operators: hack_cosd -using Oceananigans.DistributedComputations: partition_global_array, all_reduce, cpu_architecture, reconstruct_global_grid +using Oceananigans.DistributedComputations: partition, all_reduce, cpu_architecture, reconstruct_global_grid function Δ_min(grid) Δx_min = minimum_xspacing(grid, Center(), Center(), Center()) @@ -116,11 +116,11 @@ for arch in archs c = interior(on_architecture(cpu_arch, c)) η = interior(on_architecture(cpu_arch, η)) - us = partition_global_array(cpu_arch, us, size(u)) - vs = partition_global_array(cpu_arch, vs, size(v)) - ws = partition_global_array(cpu_arch, ws, size(w)) - cs = partition_global_array(cpu_arch, cs, size(c)) - ηs = partition_global_array(cpu_arch, ηs, size(η)) + us = partition(us, cpu_arch, size(u)) + vs = partition(vs, cpu_arch, size(v)) + ws = partition(ws, cpu_arch, size(w)) + cs = partition(cs, cpu_arch, size(c)) + ηs = partition(ηs, cpu_arch, size(η)) atol = eps(eltype(grid)) rtol = sqrt(eps(eltype(grid))) From 71ff2d87f40b923a6408c128c81381f4693896fe Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Thu, 3 Oct 2024 13:36:39 -0400 Subject: [PATCH 9/9] Do nothing on serial architecture --- src/DistributedComputations/partition_assemble.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/DistributedComputations/partition_assemble.jl b/src/DistributedComputations/partition_assemble.jl index 00904016bc..c9bbe6b8d3 100644 --- a/src/DistributedComputations/partition_assemble.jl +++ b/src/DistributedComputations/partition_assemble.jl @@ -128,6 +128,7 @@ Partition the globally-sized `A` into local arrays with the same size as `b`. partition(A, b::Field) = partition(A, architecture(b), size(b)) partition(F::Field, b::Field) = partition(interior(F), b) partition(f::Function, arch, n) = f +partition(A::AbstractArray, arch::AbstractSerialArchitecture, local_size) = A """ partition(A, arch, local_size)