-
Notifications
You must be signed in to change notification settings - Fork 195
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimizes fill_halo_regions_open.jl #3792
Conversation
I was doing some profiling on a model with no open boundaries and discovered that this function was causing a big slow down. I guess this is because the compiler isn't managing to work out its just a load of nothing operations but this change appears to make it completely go away.
what was the slowdown? |
Might be good to profile with |
fill_open_boundary_regions!(fields::NTuple, boundary_conditions, indices, loc, grid, args...; kwargs...) = | ||
ntuple(n->fill_open_boundary_regions!(fields[n], boundary_conditions[n], indices, loc[n], grid, args...; kwargs...), Val(length(fields))) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fill_open_boundary_regions!(fields::NTuple, boundary_conditions, indices, loc, grid, args...; kwargs...) = | |
ntuple(n->fill_open_boundary_regions!(fields[n], boundary_conditions[n], indices, loc[n], grid, args...; kwargs...), Val(length(fields))) | |
function fill_open_boundary_regions!(fields::NTuple, bcs, indices, loc, args...; kw...) | |
N = length(fields) | |
return ntuple(n -> fill_open_boundary_regions!(fields[n], bcs[n], indices, loc[n], args...; kw...), Val(N)) | |
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we can't read the code, we've little to show for all our hard work
The profiles should be documented here. I'll tentatively approve but the documentation is important |
@@ -17,13 +17,19 @@ function fill_open_boundary_regions!(field, boundary_conditions, indices, loc, g | |||
open_fill, regular_fill = get_open_halo_filling_functions(loc) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a little convoluted. Why do you need a function to determine the size? Why can't we just use loc
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This comes from normal halo filling and I think its main purpose is for windowed fields:
Oceananigans.jl/src/BoundaryConditions/fill_halo_regions.jl
Lines 349 to 433 in 4dbec26
@inline fill_halo_size(::Tuple, ::WEB, args...) = :yz | |
@inline fill_halo_size(::Tuple, ::SNB, args...) = :xz | |
@inline fill_halo_size(::Tuple, ::TBB, args...) = :xy | |
# If indices are colon, and locations are _not_ Nothing, fill the whole boundary plane! | |
# If locations are _Nothing_, then the kwarg `reduced_dimensions` will allow the size `:xz` | |
# to be correctly interpreted inside `launch!`. | |
@inline fill_halo_size(::OffsetArray, ::WEB, ::Tuple{<:Any, <:Colon, <:Colon}, args...) = :yz | |
@inline fill_halo_size(::OffsetArray, ::SNB, ::Tuple{<:Colon, <:Any, <:Colon}, args...) = :xz | |
@inline fill_halo_size(::OffsetArray, ::TBB, ::Tuple{<:Colon, <:Colon, <:Any}, args...) = :xy | |
# If the index is a Colon and the location is _NOT_ a `Nothing` (i.e. not a `ReducedField`), | |
# then fill the whole boundary, otherwise fill the size of the corresponding array | |
@inline whole_halo(idx, loc) = false | |
@inline whole_halo(idx, ::Nothing) = false | |
@inline whole_halo(::Colon, ::Nothing) = false | |
@inline whole_halo(::Colon, loc) = true | |
# Calculate kernel size for windowed fields. This code is only called when | |
# one or more of the elements of `idx` is not Colon in the two direction perpendicular | |
# to the halo region and `bc` is not `PeriodicBoundaryCondition`. | |
@inline function fill_halo_size(c::OffsetArray, ::WEB, idx, bc, loc, grid) | |
@inbounds begin | |
whole_y_halo = whole_halo(idx[2], loc[2]) | |
whole_z_halo = whole_halo(idx[3], loc[3]) | |
end | |
_, Ny, Nz = size(grid) | |
_, Cy, Cz = size(c) | |
Sy = ifelse(whole_y_halo, Ny, Cy) | |
Sz = ifelse(whole_z_halo, Nz, Cz) | |
return (Sy, Sz) | |
end | |
@inline function fill_halo_size(c::OffsetArray, ::SNB, idx, bc, loc, grid) | |
@inbounds begin | |
whole_x_halo = whole_halo(idx[1], loc[1]) | |
whole_z_halo = whole_halo(idx[3], loc[3]) | |
end | |
Nx, _, Nz = size(grid) | |
Cx, _, Cz = size(c) | |
Sx = ifelse(whole_x_halo, Nx, Cx) | |
Sz = ifelse(whole_z_halo, Nz, Cz) | |
return (Sx, Sz) | |
end | |
@inline function fill_halo_size(c::OffsetArray, ::TBB, idx, bc, loc, grid) | |
@inbounds begin | |
whole_x_halo = whole_halo(idx[1], loc[1]) | |
whole_y_halo = whole_halo(idx[2], loc[2]) | |
end | |
Nx, Ny, _ = size(grid) | |
Cx, Cy, _ = size(c) | |
Sx = ifelse(whole_x_halo, Nx, Cx) | |
Sy = ifelse(whole_y_halo, Ny, Cy) | |
return (Sx, Sy) | |
end | |
# Remember that Periodic BCs also fill halo points! | |
@inline fill_halo_size(c::OffsetArray, ::WEB, idx, ::PBC, args...) = tuple(size(c, 2), size(c, 3)) | |
@inline fill_halo_size(c::OffsetArray, ::SNB, idx, ::PBC, args...) = tuple(size(c, 1), size(c, 3)) | |
@inline fill_halo_size(c::OffsetArray, ::TBB, idx, ::PBC, args...) = tuple(size(c, 1), size(c, 2)) | |
@inline function fill_halo_size(c::OffsetArray, ::WEB, ::Tuple{<:Any, <:Colon, <:Colon}, ::PBC, args...) | |
_, Cy, Cz = size(c) | |
return (Cy, Cz) | |
end | |
@inline function fill_halo_size(c::OffsetArray, ::SNB, ::Tuple{<:Colon, <:Any, <:Colon}, ::PBC, args...) | |
Cx, _, Cz = size(c) | |
return (Cx, Cz) | |
end | |
@inline function fill_halo_size(c::OffsetArray, ::TBB, ::Tuple{<:Colon, <:Colon, <:Any}, ::PBC, args...) | |
Cx, Cy, _ = size(c) | |
return (Cx, Cy) | |
end |
@@ -17,13 +17,19 @@ function fill_open_boundary_regions!(field, boundary_conditions, indices, loc, g | |||
open_fill, regular_fill = get_open_halo_filling_functions(loc) | |||
fill_size = fill_halo_size(field, regular_fill, indices, boundary_conditions, loc, grid) | |||
|
|||
launch!(arch, grid, fill_size, open_fill, field, left_bc, right_bc, loc, grid, args) | |||
fill_open_halo_event!(open_fill, field, left_bc, right_bc, fill_size, loc, arch, grid, args) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fill_open_halo_event!(open_fill, field, left_bc, right_bc, fill_size, loc, arch, grid, args) | |
fill_open_halo!(filling_function, field, left_bc, right_bc, fill_size, loc, arch, grid, args) |
I think it's good to go. The only thing that doesn't quite make sense to me is why fill_size = fill_halo_size(field, regular_fill_function, indices, boundary_conditions, loc, grid) depends on fill_function, regular_fill_function = get_open_halo_filling_functions(loc) and |
I'll stream line those two line before I merge |
Is this PR close to merging? |
I was doing some profiling on a model with no open boundaries and discovered that this function was causing a big slow down. I guess this is because the compiler isn't managing to work out its just a load of nothing operations but this change appears to make it completely go away.