-
Notifications
You must be signed in to change notification settings - Fork 92
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Initial ideas for a QuadraturePointIterator #883
base: master
Are you sure you want to change the base?
Conversation
I have a similar (but still distinct) design in Thunderbolt """
QuadraturePoint{dim, T}
A simple helper to carry quadrature point information.
"""
struct QuadraturePoint{dim, T}
i::Int
ξ::Vec{dim, T}
end
"""
QuadratureIterator(::QuadratureRule)
QuadratureIterator(::FaceQuadratureRule, local_face_idx::Int)
QuadratureIterator(::CellValues)
QuadratureIterator(::FaceValues)
A helper to loop over the quadrature points in some rule or cache with type [`QuadraturePoint`](@ref).
"""
struct QuadratureIterator{QR<:QuadratureRule}
qr::QR
end
QuadratureIterator(fqr::FaceQuadratureRule, local_face_idx::Int) = QuadratureIterator(fqr.face_rules[local_face_idx])
QuadratureIterator(cv::CellValues) = QuadratureIterator(cv.qr)
QuadratureIterator(fv::FaceValues) = QuadratureIterator(fv.fqr.face_rules[fv.current_face[]])
function Base.iterate(iterator::QuadratureIterator, i = 1)
i > getnquadpoints(iterator.qr) && return nothing
return (QuadraturePoint(i,Ferrite.getpoints(iterator.qr)[i]), i + 1)
end
Base.eltype(::Type{<:QuadraturePoint{<:QuadratureRule{<:AbstractRefShape, dim, T}}}) where {dim, T} = QuadraturePoint{dim, T}
Base.length(iterator::QuadratureIterator) = length(Ferrite.getnquadpoints(iterator.qr))
Ferrite.spatial_coordinate(fe_v::Ferrite.AbstractValues, qp::QuadraturePoint, x::AbstractVector{<:Vec}) = Ferrite.spatial_coordinate(fe_v, qp.i, x)
Ferrite.getdetJdV(cv::CellValues, qp::QuadraturePoint) = Ferrite.getdetJdV(cv, qp.i)
Ferrite.shape_value(cv::CellValues, qp::QuadraturePoint, base_fun_idx::Int) = Ferrite.shape_value(cv, qp.i, base_fun_idx)
Ferrite.shape_gradient(cv::CellValues, qp::QuadraturePoint, base_fun_idx::Int) = Ferrite.shape_gradient(cv, qp.i, base_fun_idx)
Ferrite.function_value(cv::CellValues, qp::QuadraturePoint, ue) = Ferrite.function_value(cv, qp.i, ue)
Ferrite.function_gradient(cv::CellValues, qp::QuadraturePoint, ue) = Ferrite.function_gradient(cv, qp.i, ue)
Ferrite.getdetJdV(fv::FaceValues, qp::QuadraturePoint) = Ferrite.getdetJdV(fv, qp.i)
Ferrite.shape_value(fv::FaceValues, qp::QuadraturePoint, base_fun_idx::Int) = Ferrite.shape_value(fv, qp.i, base_fun_idx)
Ferrite.shape_gradient(fv::FaceValues, qp::QuadraturePoint, base_fun_idx::Int) = Ferrite.shape_gradient(fv, qp.i, base_fun_idx)
Ferrite.function_value(fv::FaceValues, qp::QuadraturePoint, ue) = Ferrite.function_value(fv, qp.i, ue)
Ferrite.function_gradient(fv::FaceValues, qp::QuadraturePoint, ue) = Ferrite.function_gradient(fv, qp.i, ue)
Ferrite.getnormal(fv::FaceValues, qp::QuadraturePoint) = Ferrite.getnormal(fv, qp.i) which is used like this reinit!(cellvalues, cell)
for qp in QuadratureIterator(cellvalues)
D_loc = evaluate_coefficient(element_cache.integrator.D, cell, qp, time)
dΩ = getdetJdV(cellvalues, qp)
for i in 1:n_basefuncs
∇Nᵢ = shape_gradient(cellvalues, qp, i)
for j in 1:n_basefuncs
∇Nⱼ = shape_gradient(cellvalues, qp, j)
Kₑ[i,j] -= ((D_loc ⋅ ∇Nᵢ) ⋅ ∇Nⱼ) * dΩ
end
end
end |
Using your branch as foundation here is what I had roughly in mind using Ferrite, SparseArrays, StaticArrays, LIKWID
using ThreadPinning
pinthreads(:cores)
######################### ThreadedCellValues #########################
struct ThreadedCellValues{FV, GM, QR} <: AbstractCellValues
fun_values::FV # FunctionValues
geo_mapping::GM # GeometryMapping
qr::QR # QuadratureRule
end
ThreadedCellValues(qr::QuadratureRule, ip::Interpolation, args...; kwargs...) = ThreadedCellValues(Float64, qr, ip, args...; kwargs...)
function ThreadedCellValues(::Type{T}, qr, ip::Interpolation, ip_geo::ScalarInterpolation=Ferrite.default_geometric_interpolation(ip); kwargs...) where T
return ThreadedCellValues(T, qr, ip, VectorizedInterpolation(ip_geo); kwargs...)
end
function Base.copy(cv::ThreadedCellValues)
return ThreadedCellValues(copy(cv.fun_values), copy(cv.geo_mapping), copy(cv.qr))
end
# Access geometry values
Base.@propagate_inbounds Ferrite.getngeobasefunctions(cv::ThreadedCellValues) = Ferrite.getngeobasefunctions(cv.geo_mapping)
Base.@propagate_inbounds Ferrite.geometric_value(cv::ThreadedCellValues, args...) = Ferrite.geometric_value(cv.geo_mapping, args...)
Ferrite.geometric_interpolation(cv::ThreadedCellValues) = geometric_interpolation(cv.geo_mapping)
Ferrite.getdetJdV(::ThreadedCellValues, ::Int) = throw(ArgumentError("detJdV is not saved in ThreadedCellValues"))
# Accessors for function values
Ferrite.getnbasefunctions(cv::ThreadedCellValues) = getnbasefunctions(cv.fun_values)
Ferrite.function_interpolation(cv::ThreadedCellValues) = function_interpolation(cv.fun_values)
Ferrite.function_difforder(cv::ThreadedCellValues) = function_difforder(cv.fun_values)
Ferrite.shape_value_type(cv::ThreadedCellValues) = shape_value_type(cv.fun_values)
Ferrite.shape_gradient_type(cv::ThreadedCellValues) = shape_gradient_type(cv.fun_values)
Base.@propagate_inbounds Ferrite.shape_value(cv::ThreadedCellValues, q_point::Int, i::Int) = shape_value(cv.fun_values, q_point, i)
Base.@propagate_inbounds Ferrite.shape_gradient(cv::ThreadedCellValues, q_point::Int, i::Int) = shape_gradient(cv.fun_values, q_point, i)
Base.@propagate_inbounds Ferrite.shape_symmetric_gradient(cv::ThreadedCellValues, q_point::Int, i::Int) = shape_symmetric_gradient(cv.fun_values, q_point, i)
# Access quadrature rule values
Ferrite.getnquadpoints(cv::ThreadedCellValues) = getnquadpoints(cv.qr)
function ThreadedCellValues(::Type{T}, qr::QuadratureRule, ip_fun::Interpolation, ip_geo::VectorizedInterpolation;
update_gradients::Union{Bool,Nothing} = nothing, update_detJdV::Union{Bool,Nothing} = nothing) where T
_update_detJdV = update_detJdV === nothing ? true : update_detJdV
FunDiffOrder = update_gradients === nothing ? 1 : convert(Int, update_gradients) # Logic must change when supporting update_hessian kwargs
GeoDiffOrder = max(Ferrite.required_geo_diff_order(Ferrite.mapping_type(ip_fun), FunDiffOrder), _update_detJdV)
geo_mapping = Ferrite.GeometryMapping{GeoDiffOrder}(T, ip_geo.ip, qr)
fun_values = ThinFunctionValues{FunDiffOrder}(T, ip_fun, qr, ip_geo)
return ThreadedCellValues(fun_values, geo_mapping, qr)
end
######################### ThinFunctionValues #########################
struct ThinFunctionValues{DiffOrder, IP, N_t, dNdξ_t}
ip::IP # ::Interpolation
Nξ::N_t # ::AbstractMatrix{Union{<:Tensor,<:Number}}
dNdξ::dNdξ_t # ::AbstractMatrix{Union{<:Tensor,<:StaticArray}} or Nothing
function ThinFunctionValues(ip::Interpolation, Nξ::N_t, ::Nothing) where {N_t<:AbstractMatrix}
return new{0, typeof(ip), N_t, Nothing}(ip, Nξ, nothing)
end
function ThinFunctionValues(ip::Interpolation, Nξ::N_t, dNdξ::AbstractMatrix) where {N_t<:AbstractMatrix}
return new{1, typeof(ip), N_t, typeof(dNdξ)}(ip, Nξ, dNdξ)
end
end
function ThinFunctionValues{DiffOrder}(::Type{T}, ip::Interpolation, qr::QuadratureRule, ip_geo::VectorizedInterpolation) where {DiffOrder, T}
n_shape = Ferrite.getnbasefunctions(ip)
n_qpoints = Ferrite.getnquadpoints(qr)
Nξ = zeros(Ferrite.typeof_N(T, ip, ip_geo), n_shape, n_qpoints)
if DiffOrder == 0
dNdξ = nothing
elseif DiffOrder == 1
dNdξ = zeros(Ferrite.typeof_dNdξ(T, ip, ip_geo), n_shape, n_qpoints)
else
throw(ArgumentError("Currently only values and gradients can be updated in ThinFunctionValues"))
end
fv = ThinFunctionValues(ip, Nξ, dNdξ)
Ferrite.precompute_values!(fv, Ferrite.getpoints(qr)) # Separate function for qr point update in PointValues
return fv
end
function Ferrite.precompute_values!(fv::ThinFunctionValues{0}, qr_points::Vector{<:Vec})
Ferrite.shape_values!(fv.Nξ, fv.ip, qr_points)
end
function Ferrite.precompute_values!(fv::ThinFunctionValues{1}, qr_points::Vector{<:Vec})
Ferrite.shape_gradients_and_values!(fv.dNdξ, fv.Nξ, fv.ip, qr_points)
end
Base.@propagate_inbounds Ferrite.shape_value(fv::ThinFunctionValues, q_point::Int, i::Int) = fv.Nξ[i, q_point]
Base.@propagate_inbounds Ferrite.shape_gradient(fv::ThinFunctionValues{1}, q_point::Int, i::Int) = fv.dNdξ[i, q_point]
Ferrite.mapping_type(fv::ThinFunctionValues) = Ferrite.mapping_type(fv.ip)
@inline function Ferrite.apply_mapping!(dNdx, funvals::ThinFunctionValues, q_point::Int, args...)
return Ferrite.apply_mapping!(dNdx, funvals, Ferrite.mapping_type(funvals), q_point, args...)
end
# Identity mapping
@inline function Ferrite.apply_mapping!(dNdx, ::ThinFunctionValues{0}, ::Ferrite.IdentityMapping, ::Int, mapping_values, args...)
return nothing
end
@inline function Ferrite.apply_mapping!(dNdx, funvals::ThinFunctionValues{1}, ::Ferrite.IdentityMapping, q_point::Int, mapping_values, args...)
Jinv = Ferrite.calculate_Jinv(Ferrite.getjacobian(mapping_values))
@inbounds for j in 1:Ferrite.getnbasefunctions(funvals)
#dNdx[j] = funvals.dNdξ[j, q_point] ⋅ Jinv # TODO via Tensors.jl
dNdx[j] = Ferrite.dothelper(funvals.dNdξ[j, q_point], Jinv)
end
return nothing
end
@inline Ferrite.getnbasefunctions(fv::ThinFunctionValues) = getnbasefunctions(fv.ip)
######################### Modified variant from QuadratureValuesIterator from PR 883 #########################
struct QuadratureValuesIterator{VT<:Ferrite.AbstractValues, XT}
v::VT
cell_coords::XT
end
function Base.iterate(iterator::QuadratureValuesIterator, q_point=1)
checkbounds(Bool, 1:Ferrite.getnquadpoints(iterator.v), q_point) || return nothing
qp_v = @inbounds quadrature_point_values(iterator.v, q_point, iterator.cell_coords)
return (qp_v, q_point+1)
end
Base.IteratorEltype(::Type{<:QuadratureValuesIterator}) = Base.EltypeUnknown()
Base.length(iterator::QuadratureValuesIterator) = getnquadpoints(iterator.v)
struct QuadratureValues{VT<:Ferrite.AbstractValues, dNdxT, detT, CCT}
v::VT
dNdx::dNdxT
detJdV::detT
q_point::Int
cell_coords::CCT
Base.Base.@propagate_inbounds function QuadratureValues(v::ThreadedCellValues{<:ThinFunctionValues{1}}, q_point::Int, cell_coords)
@boundscheck checkbounds(1:Ferrite.getnbasefunctions(v), q_point)
geo_mapping = v.geo_mapping
fun_values = v.fun_values
n_shape = getnbasefunctions(fun_values.ip)
n_geom_basefuncs = Ferrite.getngeobasefunctions(geo_mapping)
mapping = Ferrite.calculate_mapping(geo_mapping, q_point, cell_coords)
detJ = Ferrite.calculate_detJ(Ferrite.getjacobian(mapping))
# TODO eliminate allocation
# TODO recover Float64 programmatically
dNdx = fill(zero(Ferrite.typeof_dNdx(Float64, fun_values.ip, geo_mapping.ip^3)) * Float64(NaN), n_shape)
# TODO grab cell
cell = nothing
Ferrite.apply_mapping!(dNdx, fun_values, q_point, mapping, cell)
w = Ferrite.getweights(v.qr)[q_point]
return new{typeof(v), typeof(dNdx), typeof(detJ), typeof(cell_coords)}(v, dNdx, w*detJ, q_point, cell_coords)
end
end
@inline quadrature_point_values(fe_v::Ferrite.AbstractValues, q_point, cell_coords) = QuadratureValues(fe_v, q_point, cell_coords)
Base.@propagate_inbounds Ferrite.getngeobasefunctions(qv::QuadratureValues) = Ferrite.getngeobasefunctions(qv.v)
Base.@propagate_inbounds Ferrite.geometric_value(qv::QuadratureValues, i) = Ferrite.geometric_value(qv.v, qv.q_point, i)
Ferrite.geometric_interpolation(qv::QuadratureValues) = Ferrite.geometric_interpolation(qv.v)
Ferrite.getdetJdV(qv::QuadratureValues) = qv.detJdV
# Accessors for function values
Ferrite.getnbasefunctions(qv::QuadratureValues) = Ferrite.getnbasefunctions(qv.v)
Ferrite.function_interpolation(qv::QuadratureValues) = Ferrite.function_interpolation(qv.v)
Ferrite.function_difforder(qv::QuadratureValues) = Ferrite.function_difforder(qv.v)
Ferrite.shape_value_type(qv::QuadratureValues) = Ferrite.shape_value_type(qv.v)
Ferrite.shape_gradient_type(qv::QuadratureValues) = Ferrite.shape_gradient_type(qv.v)
Base.@propagate_inbounds Ferrite.shape_value(qv::QuadratureValues, i::Int) = Ferrite.shape_value(qv.v, qv.q_point, i)
Base.@propagate_inbounds Ferrite.shape_gradient(qv::QuadratureValues, i::Int) = qv.dNdx[i]
# Base.@propagate_inbounds Ferrite.shape_symmetric_gradient(qv::QuadratureValues, i::Int) = Ferrite.shape_symmetric_gradient(qv.v, qv.q_point, i)
# function_<something> overloads without q_point input
@inline Ferrite.function_value(qv::QuadratureValues, args...) = Ferrite.function_value(qv.v, qv.q_point, args...)
@inline Ferrite.function_gradient(qv::QuadratureValues, args...) = Ferrite.function_gradient(qv.v, qv.q_point, args...)
@inline Ferrite.function_symmetric_gradient(qv::QuadratureValues, args...) = Ferrite.function_symmetric_gradient(qv.v, qv.q_point, args...)
@inline Ferrite.function_divergence(qv::QuadratureValues, args...) = Ferrite.function_divergence(qv.v, qv.q_point, args...)
@inline Ferrite.function_curl(qv::QuadratureValues, args...) = Ferrite.function_curl(qv.v, qv.q_point, args...)
# TODO: Interface things not included yet
@inline Ferrite.spatial_coordinate(qv::QuadratureValues, x) = Ferrite.spatial_coordinate(qv.v, qv.q_point, x)
##################################################################################################################
function create_colored_cantilever_grid(celltype, n)
grid = generate_grid(celltype, (10*n, n, n), Vec{3}((0.0, 0.0, 0.0)), Vec{3}((10.0, 1.0, 1.0)))
colors = create_coloring(grid)
return grid, colors
end;
# #### DofHandler
function create_dofhandler(grid::Grid{dim}) where {dim}
dh = DofHandler(grid)
push!(dh, :u, dim) # Add a displacement field
close!(dh)
end;
# ### Stiffness tensor for linear elasticity
function create_stiffness(::Val{dim}) where {dim}
E = 200e9
ν = 0.3
λ = E*ν / ((1+ν) * (1 - 2ν))
μ = E / (2(1+ν))
δ(i,j) = i == j ? 1.0 : 0.0
g(i,j,k,l) = λ*δ(i,j)*δ(k,l) + μ*(δ(i,k)*δ(j,l) + δ(i,l)*δ(j,k))
C = SymmetricTensor{4, dim}(g);
return C
end;
# ## Threaded data structures
#
# ScratchValues is a thread-local collection of data that each thread needs to own,
# since we need to be able to mutate the data in the threads independently
struct ScratchValues{T, TT <: AbstractTensor, dim, Ti}
Ke::Matrix{T}
fe::Vector{T}
# cellvalues::CV
# facevalues::FV
# global_dofs::Vector{Int}
ɛ::Vector{TT}
coordinates::Vector{Vec{dim, T}}
assembler::Ferrite.AssemblerSparsityPattern{T, Ti}
end;
# Each thread need its own CellValues and FaceValues (although, for this example we don't use
# the FaceValues)
function create_values(refshape, dim, order::Int)
## Interpolations and values
interpolation_space = Lagrange{refshape, 1}()^dim
quadrature_rule = QuadratureRule{refshape}(order)
cellvalues = ThreadedCellValues(quadrature_rule, interpolation_space)
return cellvalues
end;
# Create a `ScratchValues` for each thread with the thread local data
function create_scratchvalues(K, f, dh::DofHandler{dim}) where {dim}
nthreads = Threads.nthreads()
assemblers = [start_assemble(K, f) for i in 1:nthreads]
cellvalues = create_values(RefHexahedron, dim, 2)
n_basefuncs = getnbasefunctions(cellvalues)
# global_dofs = [zeros(Int, ndofs_per_cell(dh)) for i in 1:nthreads]
fes = [zeros(n_basefuncs) for i in 1:nthreads] # Local force vector
Kes = [zeros(n_basefuncs, n_basefuncs) for i in 1:nthreads]
ɛs = [[zero(SymmetricTensor{2, dim}) for i in 1:n_basefuncs] for i in 1:nthreads]
coordinates = [[zero(Vec{dim}) for i in 1:length(dh.grid.cells[1].nodes)] for i in 1:nthreads]
return cellvalues, [ScratchValues(Kes[i], fes[i], ɛs[i], coordinates[i], assemblers[i]) for i in 1:nthreads]
end;
# ## Threaded assemble
# The assembly function loops over each color and does a threaded assembly for that color
function doassemble(K::SparseMatrixCSC, colors, grid::Grid, dh::DofHandler, C::SymmetricTensor{4, dim}, f::Vector{Float64}, cellvalues::ThreadedCellValues, scratches::Vector{SV}, b::Vec{dim}) where {dim, SV}
## Each color is safe to assemble threaded
for color in colors
## We try to equipartition the array to increase load per task.
chunk_size = max(1, 1 + length(color) ÷ Threads.nthreads())
color_partition = [color[i:min(i + chunk_size - 1, end)] for i in 1:chunk_size:length(color)]
## Now we should have a 1:1 correspondence between tasks and elements to assemble.
Threads.@threads :static for i in 1:length(color_partition)
# for i in 1:length(color_partition)
for cellid ∈ color_partition[i]
assemble_cell!(scratches[i], cellvalues, cellid, K, grid, dh, C, b)
end
end
end
return K, f
end
# The cell assembly function is written the same way as if it was a single threaded example.
# The only difference is that we unpack the variables from our `scratch`.
function assemble_cell!(scratch::ScratchValues, cellvalues::ThreadedCellValues, cell::Int, K::SparseMatrixCSC,
grid::Grid, dh::DofHandler, C::SymmetricTensor{4, dim}, b::Vec{dim}) where {dim}
## Unpack our stuff from the scratch
Ke, fe, ɛ, coordinates, assembler =
scratch.Ke, scratch.fe,
scratch.ɛ, scratch.coordinates, scratch.assembler
fill!(Ke, 0)
fill!(fe, 0)
n_basefuncs = getnbasefunctions(cellvalues)
## Fill up the coordinates
nodeids = grid.cells[cell].nodes
for j in 1:length(coordinates)
coordinates[j] = grid.nodes[nodeids[j]].x
end
for q_point in QuadratureValuesIterator(cellvalues, coordinates)
for i in 1:n_basefuncs
ɛ[i] = symmetric(shape_gradient(q_point, i))
end
dΩ = getdetJdV(q_point)
for i in 1:n_basefuncs
δu = shape_value(q_point, i)
fe[i] += (δu ⋅ b) * dΩ
ɛC = ɛ[i] ⊡ C
for j in 1:n_basefuncs
Ke[i, j] += (ɛC ⊡ ɛ[j]) * dΩ
end
end
end
assemble!(assembler, celldofs(dh, cell), fe, Ke)
end;
function run_assemble()
refshape = RefHexahedron
quadrature_order = 3
dim = 3
n = 30
grid, colors = create_colored_cantilever_grid(Hexahedron, n);
dh = create_dofhandler(grid);
K = create_sparsity_pattern(dh);
C = create_stiffness(Val{3}());
f = zeros(ndofs(dh))
cellvalues, scratches = create_scratchvalues(K, f, dh)
b = Vec{3}((0.0, 0.0, 0.0))
## compilation
doassemble(K, colors, grid, dh, C, f, cellvalues, scratches, b);
return @perfmon "FLOPS_DP" doassemble(K, colors, grid, dh, C, f, cellvalues, scratches, b);
end
metrics, events = run_assemble();
clocks = [res["Clock [MHz]"] for res in metrics["FLOPS_DP"]]
println("Clock [MHz] (min, avg, max): ", minimum(clocks), " | ", mean(clocks), " | " , maximum(clocks))
thread_times = [res["Runtime unhalted [s]"] for res in metrics["FLOPS_DP"]]
println("Runtime unhalted [s] (min, avg, max): ", minimum(thread_times), " | ", mean(thread_times), " | " , maximum(thread_times))
println("Total runtime [s] ", first([res["Runtime (RDTSC) [s]"] for res in metrics["FLOPS_DP"]])) 2 problems. First, type stability is not fully guaranteed and second the quadrature scratch is allocating. But I hope that I can get the discussion going for this one. :) |
Codecov ReportAttention: Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## master #883 +/- ##
==========================================
- Coverage 93.30% 91.05% -2.26%
==========================================
Files 36 38 +2
Lines 5257 5387 +130
==========================================
Hits 4905 4905
- Misses 352 482 +130 ☔ View full report in Codecov by Sentry. |
I procrastinated and updated this PR by introducing a Some of the code could also be used by not precalculating the unmapped values in |
Some loose ideas, based on discussions with @termi-official.
Could be beneficial to have the same syntax when using on-demand evaluation of the interpolation as for pre-cached values (e.g. CellValues)