diff --git a/firedrake/matrix_free/operators.py b/firedrake/matrix_free/operators.py index 93d86f00d9..90f4d08611 100644 --- a/firedrake/matrix_free/operators.py +++ b/firedrake/matrix_free/operators.py @@ -339,7 +339,6 @@ def view(self, mat, viewer=None): type(self).__name__) def getInfo(self, mat, info=None): - from mpi4py import MPI memory = self._x.dat.nbytes + self._y.dat.nbytes if hasattr(self, "_xbc"): memory += self._xbc.dat.nbytes diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 826e1f4447..6aba95b301 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -1,132 +1,168 @@ -from functools import lru_cache, partial +from functools import partial +from itertools import chain, product from firedrake.petsc import PETSc from firedrake.preconditioners.base import PCBase +from firedrake.preconditioners.patch import bcdofs +from firedrake.preconditioners.pmg import (prolongation_matrix_matfree, + evaluate_dual, + get_permutation_to_line_elements) +from firedrake.preconditioners.facet_split import split_dofs, restricted_dofs +from firedrake.formmanipulation import ExtractSubBlock +from firedrake.function import Function +from firedrake.functionspace import FunctionSpace +from firedrake.ufl_expr import TestFunction, TestFunctions, TrialFunctions +from firedrake.utils import cached_property +from firedrake_citations import Citations +from ufl.algorithms.ad import expand_derivatives +from ufl.algorithms.expand_indices import expand_indices +from tsfc.finatinterface import create_element +from pyop2.compilation import load +from pyop2.sparsity import get_preallocation +from pyop2.utils import get_petsc_dir + import firedrake.dmhooks as dmhooks -import firedrake -import numpy import ufl -from firedrake_citations import Citations +import FIAT +import finat +import numpy +import ctypes +import operator -Citations().add("Brubeck2021", """ -@misc{Brubeck2021, +Citations().add("Brubeck2022a", """ +@article{Brubeck2022a, title={A scalable and robust vertex-star relaxation for high-order {FEM}}, author={Brubeck, Pablo D. and Farrell, Patrick E.}, + journal = {SIAM J. Sci. Comput.}, + volume = {44}, + number = {5}, + pages = {A2991-A3017}, + year = {2022}, + doi = {10.1137/21M1444187} +""") + +Citations().add("Brubeck2022b", """ +@misc{Brubeck2022b, + title={{Multigrid solvers for the de Rham complex with optimal complexity in polynomial degree}}, + author={Brubeck, Pablo D. and Farrell, Patrick E.}, archiveprefix = {arXiv}, - eprint = {2107.14758}, + eprint = {2211.14284}, primaryclass = {math.NA}, - year={2021} -} + year={2022} """) -__all__ = ("FDMPC",) + +__all__ = ("FDMPC", "PoissonFDMPC") class FDMPC(PCBase): """ A preconditioner for tensor-product elements that changes the shape - functions so that the H^1 Riesz map is diagonalized in the interior of a - Cartesian cell, and assembles a global sparse matrix on which other + functions so that the H(d) (d in {grad, curl, div}) Riesz map is sparse on + Cartesian cells, and assembles a global sparse matrix on which other preconditioners, such as `ASMStarPC`, can be applied. Here we assume that the volume integrals in the Jacobian can be expressed as: - inner(grad(v), alpha(grad(u)))*dx + inner(v, beta(u))*dx + inner(d(v), alpha * d(u))*dx + inner(v, beta * u)*dx - where alpha and beta are linear functions (tensor contractions). - The sparse matrix is obtained by approximating alpha and beta by cell-wise - constants and discarding the coefficients in alpha that couple together - mixed derivatives and mixed components. + where alpha and beta are possibly tensor-valued. The sparse matrix is + obtained by approximating (v, alpha * u) and (v, beta * u) as diagonal mass + matrices. - For spaces that are not H^1-conforming, this preconditioner will use - the symmetric interior-penalty DG method. The penalty coefficient can be - provided in the application context, keyed on ``"eta"``. + The PETSc options inspected by this class are: + - 'fdm_mat_type': can be either 'aij' or 'sbaij' + - 'fdm_static_condensation': are we assembling the Schur complement on facets? """ _prefix = "fdm_" + _variant = "fdm" + _citation = "Brubeck2022b" + _cache = {} @PETSc.Log.EventDecorator("FDMInit") def initialize(self, pc): - from firedrake.assemble import allocate_matrix, assemble - from firedrake.preconditioners.pmg import prolongation_matrix_matfree - from firedrake.preconditioners.patch import bcdofs - Citations().register("Brubeck2021") - + Citations().register(self._citation) self.comm = pc.comm - + Amat, Pmat = pc.getOperators() prefix = pc.getOptionsPrefix() options_prefix = prefix + self._prefix + options = PETSc.Options(options_prefix) + + use_amat = options.getBool("pc_use_amat", True) + use_static_condensation = options.getBool("static_condensation", False) + pmat_type = options.getString("mat_type", PETSc.Mat.Type.AIJ) appctx = self.get_appctx(pc) - fcp = appctx.get("form_compiler_parameters") + fcp = appctx.get("form_compiler_parameters") or {} + self.appctx = appctx # Get original Jacobian form and bcs - octx = dmhooks.get_appctx(pc.getDM()) - mat_type = octx.mat_type - oproblem = octx._problem - J = oproblem.J - bcs = tuple(oproblem.bcs) + if Pmat.getType() == "python": + ctx = Pmat.getPythonContext() + J = ctx.a + bcs = tuple(ctx.bcs) + mat_type = "matfree" + else: + ctx = dmhooks.get_appctx(pc.getDM()) + J = ctx.Jp or ctx.J + bcs = tuple(ctx._problem.bcs) + mat_type = ctx.mat_type + + # TODO assemble Schur complements specified by a SLATE Tensor + # we might extract the form on the interface-interface block like this: + # + # if isinstance(J, firedrake.slate.TensorBase) and use_static_condensation: + # J = J.children[0].form + if not isinstance(J, ufl.Form): + raise ValueError("Expecting a ufl.Form, not a %r" % type(J)) # Transform the problem into the space with FDM shape functions - V = J.arguments()[0].function_space() + V = J.arguments()[-1].function_space() element = V.ufl_element() - e_fdm = element.reconstruct(variant="fdm") - - def interp_nullspace(I, nsp): - if not nsp: - return nsp - vectors = [] - for x in nsp.getVecs(): - y = I.createVecLeft() - I.mult(x, y) - vectors.append(y) - if nsp.hasConstant(): - y = I.createVecLeft() - x = I.createVecRight() - x.set(1.0E0) - I.mult(x, y) - vectors.append(y) - x.destroy() - return PETSc.NullSpace().create(constant=False, vectors=vectors, comm=nsp.getComm()) - - # Matrix-free assembly of the transformed Jacobian + e_fdm = element.reconstruct(variant=self._variant) + if element == e_fdm: V_fdm, J_fdm, bcs_fdm = (V, J, bcs) - Amat, _ = pc.getOperators() - self._ctx_ref = octx else: - V_fdm = firedrake.FunctionSpace(V.mesh(), e_fdm) - J_fdm = ufl.replace(J, {t: t.reconstruct(function_space=V_fdm) for t in J.arguments()}) - bcs_fdm = tuple(bc.reconstruct(V=V_fdm) for bc in bcs) - self.fdm_interp = prolongation_matrix_matfree(V, V_fdm, [], bcs_fdm) - self.A = allocate_matrix(J_fdm, bcs=bcs_fdm, form_compiler_parameters=fcp, mat_type=mat_type, - options_prefix=options_prefix) - self._assemble_A = partial(assemble, J_fdm, tensor=self.A, bcs=bcs_fdm, - form_compiler_parameters=fcp, mat_type=mat_type) - self._assemble_A() - Amat = self.A.petscmat - - omat, _ = pc.getOperators() - inject = prolongation_matrix_matfree(V_fdm, V, [], []) - Amat.setNullSpace(interp_nullspace(inject, omat.getNullSpace())) - Amat.setTransposeNullSpace(interp_nullspace(inject, omat.getTransposeNullSpace())) - Amat.setNearNullSpace(interp_nullspace(inject, omat.getNearNullSpace())) - self.work_vec_x = Amat.createVecLeft() - self.work_vec_y = Amat.createVecRight() - + # Reconstruct Jacobian and bcs with variant element + V_fdm = FunctionSpace(V.mesh(), e_fdm) + J_fdm = J(*(t.reconstruct(function_space=V_fdm) for t in J.arguments()), coefficients={}) + bcs_fdm = [] + for bc in bcs: + W = V_fdm + for index in bc._indices: + W = W.sub(index) + bcs_fdm.append(bc.reconstruct(V=W, g=0)) + + # Create a new _SNESContext in the variant space self._ctx_ref = self.new_snes_ctx(pc, J_fdm, bcs_fdm, mat_type, fcp=fcp, options_prefix=options_prefix) - if len(bcs) > 0: - self.bc_nodes = numpy.unique(numpy.concatenate([bcdofs(bc, ghost=False) for bc in bcs])) - else: - self.bc_nodes = numpy.empty(0, dtype=PETSc.IntType) + # Construct interpolation from variant to original spaces + self.fdm_interp = prolongation_matrix_matfree(V_fdm, V, bcs_fdm, []) + self.work_vec_x = Amat.createVecLeft() + self.work_vec_y = Amat.createVecRight() + if use_amat: + from firedrake.assemble import allocate_matrix, TwoFormAssembler + self.A = allocate_matrix(J_fdm, bcs=bcs_fdm, form_compiler_parameters=fcp, + mat_type=mat_type, options_prefix=options_prefix) + self._assemble_A = TwoFormAssembler(J_fdm, tensor=self.A, bcs=bcs_fdm, + form_compiler_parameters=fcp, + mat_type=mat_type).assemble + self._assemble_A() + Amat = self.A.petscmat + + if len(bcs) > 0: + self.bc_nodes = numpy.unique(numpy.concatenate([bcdofs(bc, ghost=False) for bc in bcs])) + else: + self.bc_nodes = numpy.empty(0, dtype=PETSc.IntType) # Assemble the FDM preconditioner with sparse local matrices - Pmat, self._assemble_P = self.assemble_fdm_op(V_fdm, J_fdm, bcs_fdm, appctx) - self._assemble_P() + Pmat, self.assembly_callables = self.allocate_matrix(V_fdm, J_fdm, bcs_fdm, fcp, pmat_type, use_static_condensation) Pmat.setNullSpace(Amat.getNullSpace()) Pmat.setTransposeNullSpace(Amat.getTransposeNullSpace()) Pmat.setNearNullSpace(Amat.getNearNullSpace()) + self._assemble_P() # Internally, we just set up a PC object that the user can configure # however from the PETSc command line. Since PC allows the user to specify @@ -136,18 +172,136 @@ def interp_nullspace(I, nsp): # We set a DM and an appropriate SNESContext on the constructed PC so one # can do e.g. multigrid or patch solves. - fdm_dm = V_fdm.dm - self._dm = fdm_dm - - fdmpc.setDM(fdm_dm) + self._dm = V_fdm.dm + fdmpc.setDM(self._dm) fdmpc.setOptionsPrefix(options_prefix) fdmpc.setOperators(A=Amat, P=Pmat) - fdmpc.setUseAmat(True) + fdmpc.setUseAmat(use_amat) self.pc = fdmpc - - with dmhooks.add_hooks(fdm_dm, self, appctx=self._ctx_ref, save=False): + if hasattr(self, "_ctx_ref"): + with dmhooks.add_hooks(self._dm, self, appctx=self._ctx_ref, save=False): + fdmpc.setFromOptions() + else: fdmpc.setFromOptions() + @PETSc.Log.EventDecorator("FDMPrealloc") + def allocate_matrix(self, V, J, bcs, fcp, pmat_type, use_static_condensation): + """ + Allocate the FDM sparse preconditioner. + + :arg V: the :class:`.FunctionSpace` of the form arguments + :arg J: the Jacobian bilinear form + :arg bcs: an iterable of boundary conditions on V + :arg fcp: form compiler parameters to assemble coefficients + :arg pmat_type: the `PETSc.Mat.Type` for the blocks in the diagonal + :arg use_static_condensation: are we assembling the statically-condensed Schur complement on facets? + + :returns: 2-tuple with the preconditioner :class:`PETSc.Mat` and a list of assembly callables + """ + symmetric = pmat_type.endswith("sbaij") + ifacet = [i for i, Vsub in enumerate(V) if is_restricted(Vsub.finat_element)[1]] + if len(ifacet) == 0: + Vfacet = None + Vbig = V + ebig = V.ufl_element() + _, fdofs = split_dofs(V.finat_element) + elif len(ifacet) == 1: + Vfacet = V[ifacet[0]] + ebig, = set(unrestrict_element(Vsub.ufl_element()) for Vsub in V) + Vbig = FunctionSpace(V.mesh(), ebig) + if len(V) > 1: + dims = [Vsub.finat_element.space_dimension() for Vsub in V] + assert sum(dims) == Vbig.finat_element.space_dimension() + fdofs = restricted_dofs(Vfacet.finat_element, Vbig.finat_element) + else: + raise ValueError("Expecting at most one FunctionSpace restricted onto facets.") + self.embedding_element = ebig + + if Vbig.value_size == 1: + self.fises = PETSc.IS().createGeneral(fdofs, comm=PETSc.COMM_SELF) + else: + self.fises = PETSc.IS().createBlock(Vbig.value_size, fdofs, comm=PETSc.COMM_SELF) + + # Dictionary with kernel to compute the Schur complement + self.schur_kernel = {} + if V == Vbig and Vbig.finat_element.formdegree == 0: + # If we are in H(grad), we just pad with zeros on the statically-condensed pattern + self.schur_kernel[V] = SchurComplementPattern + elif Vfacet and use_static_condensation: + # If we are in a facet space, we build the Schur complement on its diagonal block + if Vfacet.finat_element.formdegree == 0 and Vfacet.value_size == 1: + self.schur_kernel[Vfacet] = SchurComplementDiagonal + elif symmetric: + self.schur_kernel[Vfacet] = SchurComplementBlockCholesky + else: + self.schur_kernel[Vfacet] = SchurComplementBlockQR + + # Create data structures needed for assembly + self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} + self.coefficients, assembly_callables = self.assemble_coefficients(J, fcp) + self.assemblers = {} + + Pmats = {} + addv = PETSc.InsertMode.ADD_VALUES + # Store only off-diagonal blocks with more columns than rows to save memory + Vsort = sorted(V, key=lambda Vsub: Vsub.dim()) + # Loop over all pairs of subspaces + for Vrow, Vcol in product(Vsort, Vsort): + if symmetric and (Vcol, Vrow) in Pmats: + P = PETSc.Mat().createTranspose(Pmats[Vcol, Vrow]) + else: + on_diag = Vrow == Vcol + triu = on_diag and symmetric + ptype = pmat_type if on_diag else PETSc.Mat.Type.AIJ + sizes = tuple(Vsub.dof_dset.layout_vec.getSizes() for Vsub in (Vrow, Vcol)) + + preallocator = PETSc.Mat().create(comm=self.comm) + preallocator.setType(PETSc.Mat.Type.PREALLOCATOR) + preallocator.setSizes(sizes) + preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False) + preallocator.setUp() + self.set_values(preallocator, Vrow, Vcol, addv, triu=triu) + + preallocator.assemble() + d_nnz, o_nnz = get_preallocation(preallocator, sizes[0][0]) + preallocator.destroy() + if on_diag: + numpy.maximum(d_nnz, 1, out=d_nnz) + + P = PETSc.Mat().create(comm=self.comm) + P.setType(ptype) + P.setSizes(sizes) + P.setPreallocationNNZ((d_nnz, o_nnz)) + P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) + if on_diag: + P.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, True) + if ptype.endswith("sbaij"): + P.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True) + P.setUp() + # append callables to zero entries, insert element matrices, and apply BCs + assembly_callables.append(P.zeroEntries) + assembly_callables.append(partial(self.set_values, P, Vrow, Vcol, addv)) + if on_diag: + own = Vrow.dof_dset.layout_vec.getLocalSize() + bdofs = numpy.flatnonzero(self.lgmaps[Vrow].indices[:own] < 0).astype(PETSc.IntType)[:, None] + Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs) + if len(bdofs) > 0: + vals = numpy.ones(bdofs.shape, dtype=PETSc.RealType) + assembly_callables.append(partial(P.setValuesRCV, bdofs, bdofs, vals, addv)) + Pmats[Vrow, Vcol] = P + + if len(V) == 1: + Pmat = Pmats[V, V] + else: + Pmat = PETSc.Mat().createNest([[Pmats[Vrow, Vcol] for Vcol in V] for Vrow in V], comm=self.comm) + assembly_callables.append(Pmat.assemble) + return Pmat, assembly_callables + + @PETSc.Log.EventDecorator("FDMAssemble") + def _assemble_P(self): + for _assemble in self.assembly_callables: + _assemble() + @PETSc.Log.EventDecorator("FDMUpdate") def update(self, pc): if hasattr(self, "A"): @@ -155,26 +309,24 @@ def update(self, pc): self._assemble_P() def apply(self, pc, x, y): - dm = self._dm - with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref): - if hasattr(self, "fdm_interp"): - self.fdm_interp.multTranspose(x, self.work_vec_x) + if hasattr(self, "fdm_interp"): + self.fdm_interp.multTranspose(x, self.work_vec_x) + with dmhooks.add_hooks(self._dm, self, appctx=self._ctx_ref): self.pc.apply(self.work_vec_x, self.work_vec_y) - self.fdm_interp.mult(self.work_vec_y, y) - y.array_w[self.bc_nodes] = x.array_r[self.bc_nodes] - else: - self.pc.apply(x, y) + self.fdm_interp.mult(self.work_vec_y, y) + y.array_w[self.bc_nodes] = x.array_r[self.bc_nodes] + else: + self.pc.apply(x, y) def applyTranspose(self, pc, x, y): - dm = self._dm - with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref): - if hasattr(self, "fdm_interp"): - self.fdm_interp.multTranspose(x, self.work_vec_y) + if hasattr(self, "fdm_interp"): + self.fdm_interp.multTranspose(x, self.work_vec_y) + with dmhooks.add_hooks(self._dm, self, appctx=self._ctx_ref): self.pc.applyTranspose(self.work_vec_y, self.work_vec_x) - self.fdm_interp.mult(self.work_vec_x, y) - y.array_w[self.bc_nodes] = x.array_r[self.bc_nodes] - else: - self.pc.applyTranspose(x, y) + self.fdm_interp.mult(self.work_vec_x, y) + y.array_w[self.bc_nodes] = x.array_r[self.bc_nodes] + else: + self.pc.applyTranspose(x, y) def view(self, pc, viewer=None): super(FDMPC, self).view(pc, viewer) @@ -182,119 +334,1090 @@ def view(self, pc, viewer=None): viewer.printfASCII("PC to apply inverse\n") self.pc.view(viewer) - def assemble_fdm_op(self, V, J, bcs, appctx): + def destroy(self, pc): + if hasattr(self, "A"): + self.A.petscmat.destroy() + if hasattr(self, "pc"): + self.pc.getOperators()[-1].destroy() + self.pc.destroy() + + @PETSc.Log.EventDecorator("FDMCoefficients") + def assemble_coefficients(self, J, fcp, block_diagonal=True): """ - Assemble the sparse preconditioner with cell-wise constant coefficients. + Obtain coefficients for the auxiliary operator as the diagonal of a + weighted mass matrix in broken(V^k) * broken(V^{k+1}). + See Section 3.2 of Brubeck2022b. - :arg V: the :class:`~.FunctionSpace` of the form arguments - :arg J: the Jacobian bilinear form - :arg bcs: an iterable of boundary conditions on V - :arg appctx: the application context + :arg J: the Jacobian bilinear :class:`ufl.Form`, + :arg fcp: form compiler parameters to assemble the diagonal of the mass matrices. + :arg block_diagonal: are we assembling the block diagonal of the mass matrices? + + :returns: a 2-tuple of a `dict` with the zero-th order and second + order coefficients keyed on ``"beta"`` and ``"alpha"``, + and a list of assembly callables. + """ + coefficients = {} + assembly_callables = [] + # Basic idea: take the original bilinear form and + # replace the exterior derivatives with arguments in broken(V^{k+1}). + # Then, replace the original arguments with arguments in broken(V^k). + # Where the broken spaces have L2-orthogonal FDM basis functions. + index = len(J.arguments()[-1].function_space())-1 + if index: + splitter = ExtractSubBlock() + J = splitter.split(J, argument_indices=(index, index)) + + args_J = J.arguments() + e = args_J[0].ufl_element() + mesh = args_J[0].function_space().mesh() + tdim = mesh.topological_dimension() + if isinstance(e, (ufl.VectorElement, ufl.TensorElement)): + e = e._sub_element + e = unrestrict_element(e) + sobolev = e.sobolev_space() + + # Replacement rule for the exterior derivative = grad(arg) * eps + map_grad = None + if sobolev == ufl.H1: + map_grad = lambda p: p + elif sobolev in [ufl.HCurl, ufl.HDiv]: + u = ufl.Coefficient(ufl.FunctionSpace(mesh, e)) + du = ufl.variable(ufl.grad(u)) + dku = ufl.div(u) if sobolev == ufl.HDiv else ufl.curl(u) + eps = expand_derivatives(ufl.diff(ufl.replace(expand_derivatives(dku), {ufl.grad(u): du}), du)) + if sobolev == ufl.HDiv: + map_grad = lambda p: ufl.outer(p, eps/tdim) + elif len(eps.ufl_shape) == 3: + map_grad = lambda p: ufl.dot(p, eps/2) + else: + map_grad = lambda p: p*(eps/2) + + # Construct Z = broken(V^k) * broken(V^{k+1}) + V = args_J[0].function_space() + fe = V.finat_element + formdegree = fe.formdegree + degree = fe.degree + if type(degree) != int: + degree, = set(degree) + qdeg = degree + if formdegree == tdim: + qfam = "DG" if tdim == 1 else "DQ" + qdeg = 0 + elif formdegree == 0: + qfam = "DG" if tdim == 1 else "RTCE" if tdim == 2 else "NCE" + elif formdegree == 1 and tdim == 3: + qfam = "NCF" + else: + qfam = "DQ L2" + qdeg = degree - 1 + + qvariant = "fdm_quadrature" + elements = [e.reconstruct(variant=qvariant), + ufl.FiniteElement(qfam, cell=mesh.ufl_cell(), degree=qdeg, variant=qvariant)] + elements = list(map(ufl.BrokenElement, elements)) + if V.shape: + elements = [ufl.TensorElement(ele, shape=V.shape) for ele in elements] + Z = FunctionSpace(mesh, ufl.MixedElement(elements)) + + # Transform the exterior derivative and the original arguments of J to arguments in Z + args = (TestFunctions(Z), TrialFunctions(Z)) + repargs = {t: v[0] for t, v in zip(args_J, args)} + repgrad = {ufl.grad(t): map_grad(v[1]) for t, v in zip(args_J, args)} if map_grad else {} + Jcell = expand_indices(expand_derivatives(ufl.Form(J.integrals_by_type("cell")))) + mixed_form = ufl.replace(ufl.replace(Jcell, repgrad), repargs) + + # Return coefficients and assembly callables + if block_diagonal and V.shape: + from firedrake.assemble import assemble + M = assemble(mixed_form, mat_type="matfree", form_compiler_parameters=fcp) + for iset, name in zip(Z.dof_dset.field_ises, ("beta", "alpha")): + sub = M.petscmat.createSubMatrix(iset, iset) + ctx = sub.getPythonContext() + coefficients[name] = ctx._block_diagonal + assembly_callables.append(ctx._assemble_block_diagonal) + else: + from firedrake.assemble import OneFormAssembler + tensor = Function(Z) + coefficients["beta"] = tensor.subfunctions[0] + coefficients["alpha"] = tensor.subfunctions[1] + assembly_callables.append(OneFormAssembler(mixed_form, tensor=tensor, diagonal=True, + form_compiler_parameters=fcp).assemble) + return coefficients, assembly_callables - :returns: 2-tuple with the preconditioner :class:`PETSc.Mat` and its assembly callable + @PETSc.Log.EventDecorator("FDMRefTensor") + def assemble_reference_tensor(self, V, transpose=False, sort_interior=False): """ - from pyop2.sparsity import get_preallocation - from firedrake.preconditioners.pmg import get_line_elements + Return the reference tensor used in the diagonal factorisation of the + sparse cell matrices. See Section 3.2 of Brubeck2022b. + + :arg V: a :class:`.FunctionSpace` + + :returns: a :class:`PETSc.Mat` interpolating V^k * d(V^k) onto + broken(V^k) * broken(V^{k+1}) on the reference element. + """ + value_size = V.value_size + fe = V.finat_element + tdim = fe.cell.get_spatial_dimension() + formdegree = fe.formdegree + degree = fe.degree + if type(degree) != int: + degree, = set(degree) + if formdegree == tdim: + degree = degree + 1 + is_interior, is_facet = is_restricted(fe) + key = (value_size, tdim, degree, formdegree, is_interior, is_facet, transpose, sort_interior) + cache = self._cache.setdefault("reference_tensor", {}) + try: + return cache[key] + except KeyError: + pass + + if transpose: + result = self.assemble_reference_tensor(V, transpose=False, sort_interior=sort_interior) + result = PETSc.Mat().createTranspose(result).convert(result.getType()) + return cache.setdefault(key, result) + + if sort_interior: + assert is_interior and not is_facet and not transpose + # Sort DOFs to make A00 block diagonal with blocks of increasing dimension along the diagonal + result = self.assemble_reference_tensor(V, transpose=transpose, sort_interior=False) + if formdegree != 0: + # Compute the stiffness matrix on the interior of a cell + A00 = self._element_mass_matrix.PtAP(result) + indptr, indices, _ = A00.getValuesCSR() + degree = numpy.diff(indptr) + # Sort by blocks + uniq, u_index = numpy.unique(indices, return_index=True) + perm = uniq[u_index.argsort(kind='stable')] + # Sort by degree + degree = degree[perm] + perm = perm[degree.argsort(kind='stable')] + A00.destroy() + + isperm = PETSc.IS().createGeneral(perm, comm=result.getComm()) + result = get_submat(result, iscol=isperm, permute=True) + isperm.destroy() + return cache.setdefault(key, result) + + short_key = key[:-3] + (False,) * 3 + try: + result = cache[short_key] + except KeyError: + # Get CG(k) and DG(k-1) 1D elements from V + elements = sorted(get_base_elements(fe), key=lambda e: e.formdegree) + e0 = elements[0] if elements[0].formdegree == 0 else None + e1 = elements[-1] if elements[-1].formdegree == 1 else None + if e0 and is_interior: + e0 = FIAT.RestrictedElement(e0, restriction_domain="interior") + + # Get broken(CG(k)) and DG(k-1) 1D elements from the coefficient spaces + Q0 = self.coefficients["beta"].function_space().finat_element.element + elements = sorted(get_base_elements(Q0), key=lambda e: e.formdegree) + q0 = elements[0] if elements[0].formdegree == 0 else None + q1 = elements[-1] + if q1.formdegree != 1: + Q1 = self.coefficients["alpha"].function_space().finat_element.element + q1 = sorted(get_base_elements(Q1), key=lambda e: e.formdegree)[-1] + + # Interpolate V * d(V) -> space(beta) * space(alpha) + comm = PETSc.COMM_SELF + zero = PETSc.Mat() + A00 = petsc_sparse(evaluate_dual(e0, q0), comm=comm) if e0 and q0 else zero + A11 = petsc_sparse(evaluate_dual(e1, q1), comm=comm) if e1 else zero + A10 = petsc_sparse(evaluate_dual(e0, q1, alpha=(1,)), comm=comm) if e0 else zero + B_blocks = mass_blocks(tdim, formdegree, A00, A11) + A_blocks = diff_blocks(tdim, formdegree, A00, A11, A10) + result = block_mat(B_blocks + A_blocks, destroy_blocks=True) + A00.destroy() + A10.destroy() + A11.destroy() + if value_size != 1: + eye = petsc_sparse(numpy.eye(value_size), comm=result.getComm()) + temp = result + result = temp.kron(eye) + temp.destroy() + eye.destroy() + + if is_facet: + cache[short_key] = result + result = get_submat(result, iscol=self.fises) + return cache.setdefault(key, result) + + @cached_property + def _element_mass_matrix(self): + Z = [self.coefficients[name].function_space() for name in ("beta", "alpha")] + shape = (sum(V.finat_element.space_dimension() for V in Z),) + Z[0].shape + data = numpy.ones(shape, dtype=PETSc.RealType) + shape += (1,) * (3-len(shape)) + nrows = shape[0] * shape[1] + ai = numpy.arange(nrows+1, dtype=PETSc.IntType) + aj = numpy.tile(ai[:-1].reshape((-1, shape[1])), (1, shape[2])) + if shape[2] > 1: + ai *= shape[2] + data = numpy.tile(numpy.eye(shape[2], dtype=data.dtype), shape[:1] + (1,)*(len(shape)-1)) + return PETSc.Mat().createAIJ((nrows, nrows), csr=(ai, aj, data), comm=PETSc.COMM_SELF) + + @PETSc.Log.EventDecorator("FDMSetValues") + def set_values(self, A, Vrow, Vcol, addv, triu=False): + """ + Assemble the stiffness matrix in the FDM basis using sparse reference + tensors and diagonal mass matrices. + + :arg A: the :class:`PETSc.Mat` to assemble + :arg Vrow: the :class:`.FunctionSpace` test space + :arg Vcol: the :class:`.FunctionSpace` trial space + :arg addv: a `PETSc.Mat.InsertMode` + :arg triu: are we assembling only the upper triangular part? + """ + key = (Vrow.ufl_element(), Vcol.ufl_element()) + try: + assembler = self.assemblers[key] + except KeyError: + # Interpolation of basis and exterior derivative onto broken spaces + C1 = self.assemble_reference_tensor(Vcol) + R1 = self.assemble_reference_tensor(Vrow, transpose=True) + M = self._element_mass_matrix + # Element stiffness matrix = R1 * M * C1, see Equation (3.9) of Brubeck2022b + element_kernel = TripleProductKernel(R1, M, C1, self.coefficients["beta"], self.coefficients["alpha"]) + + schur_kernel = None + if Vrow == Vcol: + schur_kernel = self.schur_kernel.get(Vrow) + if schur_kernel is not None: + V0 = FunctionSpace(Vrow.mesh(), restrict_element(self.embedding_element, "interior")) + C0 = self.assemble_reference_tensor(V0, sort_interior=True) + R0 = self.assemble_reference_tensor(V0, sort_interior=True, transpose=True) + # Only the facet block updates the coefficients in M + element_kernel = schur_kernel(element_kernel, + TripleProductKernel(R1, M, C0), + TripleProductKernel(R0, M, C1), + TripleProductKernel(R0, M, C0)) + + assembler = SparseAssembler(element_kernel, Vrow, Vcol, self.lgmaps[Vrow], self.lgmaps[Vcol]) + self.assemblers.setdefault(key, assembler) + assembler.assemble(A, addv=addv, triu=triu) + + +class SparseAssembler(object): + + _cache = {} + + @staticmethod + def setSubMatCSR(comm, triu=False): + """ + Compile C code to insert sparse submatrices and store in class cache + + :arg triu: are we inserting onto the upper triangular part of the matrix? + + :returns: a python wrapper for the matrix insertion function + """ + cache = SparseAssembler._cache.setdefault("setSubMatCSR", {}) + key = triu + try: + return cache[key] + except KeyError: + return cache.setdefault(key, load_setSubMatCSR(comm, triu)) + + def __init__(self, kernel, Vrow, Vcol, rmap, cmap): + self.kernel = kernel + m, n = kernel.result.getSize() + + spaces = [Vrow] + row_shape = tuple() if Vrow.value_size == 1 else (Vrow.value_size,) + map_rows = (self.map_block_indices, rmap) if row_shape else (rmap.apply,) + rows = numpy.empty((m, ), dtype=PETSc.IntType).reshape((-1,) + row_shape) + if Vcol == Vrow: + cols = rows + map_cols = (lambda *args, result=None: result, ) + else: + spaces.append(Vcol) + col_shape = tuple() if Vcol.value_size == 1 else (Vcol.value_size,) + map_cols = (self.map_block_indices, cmap) if col_shape else (cmap.apply, ) + cols = numpy.empty((n, ), dtype=PETSc.IntType).reshape((-1,) + col_shape) + spaces.extend(c.function_space() for c in kernel.coefficients) + + integral_type = kernel.integral_type + if integral_type in ["cell", "interior_facet_horiz"]: + get_map = operator.methodcaller("cell_node_map") + elif integral_type in ["interior_facet", "interior_facet_vert"]: + get_map = operator.methodcaller("interior_facet_node_map") + else: + raise NotImplementedError("Only for cell or interior facet integrals") + self.node_maps = tuple(map(get_map, spaces)) + + ncell = 2 if integral_type.startswith("interior_facet") else 1 + self.indices = tuple(numpy.empty((V.finat_element.space_dimension() * ncell,), dtype=PETSc.IntType) for V in spaces) + self.map_rows = partial(*map_rows, self.indices[spaces.index(Vrow)], result=rows) + self.map_cols = partial(*map_cols, self.indices[spaces.index(Vcol)], result=cols) + self.kernel_args = self.indices[-len(kernel.coefficients):] + self.set_indices = self.copy_indices + + node_map = self.node_maps[0] + self.nel = node_map.values.shape[0] + if node_map.offset is None: + layers = None + else: + layers = node_map.iterset.layers_array + layers = layers[:, 1]-layers[:, 0]-1 + if integral_type.endswith("horiz"): + layers -= 1 + self.set_indices = self.copy_indices_horiz + if layers.shape[0] != self.nel: + layers = numpy.repeat(layers, self.nel) + self.layers = layers + + def map_block_indices(self, lgmap, indices, result=None): + bsize = result.shape[-1] + numpy.copyto(result[:, 0], indices) + result[:, 0] *= bsize + numpy.add.outer(result[:, 0], numpy.arange(1, bsize, dtype=indices.dtype), out=result[:, 1:]) + return lgmap.apply(result, result=result) + + def copy_indices_horiz(self, e): + for index, node_map in zip(self.indices, self.node_maps): + index = index.reshape((2, -1)) + numpy.copyto(index, node_map.values_with_halo[e]) + index[1] += node_map.offset + + def copy_indices(self, e): + for index, node_map in zip(self.indices, self.node_maps): + numpy.copyto(index, node_map.values_with_halo[e]) + + def add_offsets(self): + for index, node_map in zip(self.indices, self.node_maps): + index += node_map.offset + + def assemble(self, A, addv=None, triu=False): + if A.getType() == PETSc.Mat.Type.PREALLOCATOR: + kernel = lambda *args, result=None: result + else: + kernel = self.kernel + result = self.kernel.result + insert = self.setSubMatCSR(PETSc.COMM_SELF, triu=triu) + + # Core assembly loop + if self.layers is None: + for e in range(self.nel): + self.set_indices(e) + insert(A, kernel(*self.kernel_args, result=result), + self.map_rows(), self.map_cols(), addv) + else: + for e in range(self.nel): + self.set_indices(e) + for _ in range(self.layers[e]): + insert(A, kernel(*self.kernel_args, result=result), + self.map_rows(), self.map_cols(), addv) + self.add_offsets() + + +class ElementKernel(object): + """ + A constant element kernel + """ + def __init__(self, A, *coefficients): + self.result = A + self.coefficients = coefficients + self.integral_type = "cell" + + def __call__(self, *args, result=None): + return result or self.result + + def __del__(self): + self.destroy() + + def destroy(self): + pass + + +class TripleProductKernel(ElementKernel): + """ + An element kernel to compute a triple matrix product A * B * C, where A and + C are constant matrices and B is a block diagonal matrix with entries given + by coefficients. + """ + def __init__(self, A, B, C, *coefficients): + self.work = None + if len(coefficients) == 0: + self.data = numpy.array([]) + self.update = lambda *args: args + else: + dshape = (-1, ) + coefficients[0].dat.data_ro.shape[1:] + if numpy.prod(dshape[1:]) == 1: + self.work = B.getDiagonal() + self.data = self.work.array_w.reshape(dshape) + self.update = partial(B.setDiagonal, self.work) + else: + indptr, indices, data = B.getValuesCSR() + self.data = data.reshape(dshape) + self.update = lambda *args: (B.setValuesCSR(indptr, indices, self.data), B.assemble()) + + stops = numpy.zeros((len(coefficients) + 1,), dtype=PETSc.IntType) + numpy.cumsum([c.function_space().finat_element.space_dimension() for c in coefficients], out=stops[1:]) + self.slices = [slice(*stops[k:k+2]) for k in range(len(coefficients))] + + self.product = partial(A.matMatMult, B, C) + super().__init__(self.product(), *coefficients) + + def __call__(self, *indices, result=None): + for c, i, z in zip(self.coefficients, indices, self.slices): + numpy.take(c.dat.data_ro, i, axis=0, out=self.data[z]) + self.update() + return self.product(result=result) + + def destroy(self): + self.result.destroy() + if isinstance(self.work, PETSc.Object): + self.work.destroy() + + +class SchurComplementKernel(ElementKernel): + """ + An element kernel to compute Schur complements that reuses work matrices and the + symbolic factorization of the interior block. + """ + def __init__(self, *kernels): + self.children = kernels + self.submats = [k.result for k in kernels] + + # Create dict of slices with the extents of the diagonal blocks + A00 = self.submats[-1] + degree = numpy.diff(A00.getValuesCSR()[0]) + istart = 0 + self.slices = {1: slice(0, 0)} + unique_degree, counts = numpy.unique(degree, return_counts=True) + for k, kdofs in sorted(zip(unique_degree, counts)): + self.slices[k] = slice(istart, istart + k * kdofs) + istart += k * kdofs + + self.blocks = sorted(degree for degree in self.slices if degree > 1) + + self.work = [None for _ in range(2)] + coefficients = [] + for k in self.children: + coefficients.extend(k.coefficients) + coefficients = list(dict.fromkeys(coefficients)) + super().__init__(self.condense(), *coefficients) + + def __call__(self, *args, result=None): + for k in self.children: + k(*args, result=k.result) + return self.condense(result=result) + + def destroy(self): + for k in self.children: + k.destroy() + self.result.destroy() + for obj in self.work: + if isinstance(obj, PETSc.Object): + obj.destroy() + + @PETSc.Log.EventDecorator("FDMCondense") + def condense(self, result=None): + return result + + +class SchurComplementPattern(SchurComplementKernel): + + def __call__(self, *args, result=None): + k = self.children[0] + k(*args, result=k.result) + return self.condense(result=result) + + @PETSc.Log.EventDecorator("FDMCondense") + def condense(self, result=None): + """By default pad with zeros the statically condensed pattern""" + structure = PETSc.Mat.Structure.SUBSET if result else None + if result is None: + _, A10, A01, A00 = self.submats + result = A10.matMatMult(A00, A01, result=result) + result.aypx(0.0, self.submats[0], structure=structure) + return result + + +class SchurComplementDiagonal(SchurComplementKernel): + + @PETSc.Log.EventDecorator("FDMCondense") + def condense(self, result=None): + structure = PETSc.Mat.Structure.SUBSET if result else None + A11, A10, A01, A00 = self.submats + self.work[0] = A00.getDiagonal(result=self.work[0]) + self.work[0].reciprocal() + self.work[0].scale(-1) + A01.diagonalScale(L=self.work[0]) + result = A10.matMult(A01, result=result) + result.axpy(1.0, A11, structure=structure) + return result + + +class SchurComplementBlockCholesky(SchurComplementKernel): + + def __init__(self, K11, K10, K01, K00): + # asssume that K10 = K01^T + super().__init__(K11, K01, K00) + + @PETSc.Log.EventDecorator("FDMCondense") + def condense(self, result=None): + structure = PETSc.Mat.Structure.SUBSET if result else None + A11, A01, A00 = self.submats + indptr, indices, R = A00.getValuesCSR() + + zlice = self.slices[1] + numpy.sqrt(R[zlice], out=R[zlice]) + numpy.reciprocal(R[zlice], out=R[zlice]) + flops = 2 * (zlice.stop - zlice.start) + for k in self.blocks: + Rk = R[self.slices[k]] + A = Rk.reshape((-1, k, k)) + rinv = numpy.linalg.inv(numpy.linalg.cholesky(A)) + numpy.copyto(Rk, rinv.flat) + flops += A.shape[0] * ((k**3)//3 + k**3) + + PETSc.Log.logFlops(flops) + A00.setValuesCSR(indptr, indices, R) + A00.assemble() + self.work[0] = A00.matMult(A01, result=self.work[0]) + result = self.work[0].transposeMatMult(self.work[0], result=result) + result.aypx(-1.0, A11, structure=structure) + return result + + +class SchurComplementBlockQR(SchurComplementKernel): + + @PETSc.Log.EventDecorator("FDMCondense") + def condense(self, result=None): + structure = PETSc.Mat.Structure.SUBSET if result else None + A11, A10, A01, A00 = self.submats + indptr, indices, R = A00.getValuesCSR() + Q = numpy.ones(R.shape, dtype=R.dtype) + + zlice = self.slices[1] + numpy.reciprocal(R[zlice], out=R[zlice]) + flops = zlice.stop - zlice.start + for k in self.blocks: + zlice = self.slices[k] + A = R[zlice].reshape((-1, k, k)) + q, r = numpy.linalg.qr(A, mode="complete") + numpy.copyto(Q[zlice], q.flat) + rinv = numpy.linalg.inv(r) + numpy.copyto(R[zlice], rinv.flat) + flops += A.shape[0] * ((4*k**3)//3 + k**3) + + PETSc.Log.logFlops(flops) + A00.setValuesCSR(indptr, indices, Q) + A00.assemble() + self.work[0] = A00.transposeMatMult(A01, result=self.work[0]) + A00.setValuesCSR(indptr, indices, R) + A00.assemble() + A00.scale(-1.0) + result = A10.matMatMult(A00, self.work[0], result=result) + result.axpy(1.0, A11, structure=structure) + return result + + +class SchurComplementBlockSVD(SchurComplementKernel): + + @PETSc.Log.EventDecorator("FDMCondense") + def condense(self, result=None): + structure = PETSc.Mat.Structure.SUBSET if result else None + A11, A10, A01, A00 = self.submats + indptr, indices, U = A00.getValuesCSR() + V = numpy.ones(U.shape, dtype=U.dtype) + self.work[0] = A00.getDiagonal(result=self.work[0]) + D = self.work[0] + dslice = self.slices[1] + numpy.sign(D.array_r[dslice], out=U[dslice]) + flops = dslice.stop - dslice.start + for k in self.blocks: + bslice = self.slices[k] + A = U[bslice].reshape((-1, k, k)) + u, s, v = numpy.linalg.svd(A, full_matrices=False) + dslice = slice(dslice.stop, dslice.stop + k * A.shape[0]) + numpy.copyto(D.array_w[dslice], s.flat) + numpy.copyto(U[bslice], numpy.transpose(u, axes=(0, 2, 1)).flat) + numpy.copyto(V[bslice], numpy.transpose(v, axes=(0, 2, 1)).flat) + flops += A.shape[0] * ((4*k**3)//3 + 4*k**3) + + PETSc.Log.logFlops(flops) + D.sqrtabs() + D.reciprocal() + A00.setValuesCSR(indptr, indices, V) + A00.assemble() + A00.diagonalScale(R=D) + self.work[1] = A10.matMult(A00, result=self.work[1]) + D.scale(-1.0) + A00.setValuesCSR(indptr, indices, U) + A00.assemble() + A00.diagonalScale(L=D) + result = self.work[1].matMatMult(A00, A01, result=result) + result.axpy(1.0, A11, structure=structure) + return result + + +class SchurComplementBlockInverse(SchurComplementKernel): + + @PETSc.Log.EventDecorator("FDMCondense") + def condense(self, result=None): + structure = PETSc.Mat.Structure.SUBSET if result else None + A11, A10, A01, A00 = self.submats + indptr, indices, R = A00.getValuesCSR() + + zlice = self.slices[1] + numpy.reciprocal(R[zlice], out=R[zlice]) + flops = zlice.stop - zlice.start + for k in self.blocks: + Rk = R[self.slices[k]] + A = Rk.reshape((-1, k, k)) + rinv = numpy.linalg.inv(A) + numpy.copyto(Rk, rinv.flat) + flops += A.shape[0] * (k**3) + + PETSc.Log.logFlops(flops) + A00.setValuesCSR(indptr, indices, R) + A00.assemble() + A00.scale(-1.0) + result = A10.matMatMult(A00, A01, result=result) + result.axpy(1.0, A11, structure=structure) + return result + + +@PETSc.Log.EventDecorator("LoadCode") +def load_c_code(code, name, **kwargs): + petsc_dir = get_petsc_dir() + cppargs = ["-I%s/include" % d for d in petsc_dir] + ldargs = (["-L%s/lib" % d for d in petsc_dir] + + ["-Wl,-rpath,%s/lib" % d for d in petsc_dir] + + ["-lpetsc", "-lm"]) + return load(code, "c", name, cppargs=cppargs, ldargs=ldargs, **kwargs) + + +def load_setSubMatCSR(comm, triu=False): + """Insert one sparse matrix into another sparse matrix. + Done in C for efficiency, since it loops over rows.""" + if triu: + name = "setSubMatCSR_SBAIJ" + select_cols = "icol -= (icol < irow) * (1 + icol);" + else: + name = "setSubMatCSR_AIJ" + select_cols = "" + code = f""" +#include + +PetscErrorCode {name}(Mat A, + Mat B, + PetscInt *rindices, + PetscInt *cindices, + InsertMode addv) +{{ + PetscInt ncols, irow, icol; + PetscInt *cols, *indices; + PetscScalar *vals; + + PetscInt m, n; + PetscErrorCode ierr; + PetscFunctionBeginUser; + MatGetSize(B, &m, NULL); + + n = 0; + for (PetscInt i = 0; i < m; i++) {{ + ierr = MatGetRow(B, i, &ncols, NULL, NULL);CHKERRQ(ierr); + n = ncols > n ? ncols : n; + ierr = MatRestoreRow(B, i, &ncols, NULL, NULL);CHKERRQ(ierr); + }} + PetscMalloc1(n, &indices); + for (PetscInt i = 0; i < m; i++) {{ + ierr = MatGetRow(B, i, &ncols, &cols, &vals);CHKERRQ(ierr); + irow = rindices[i]; + for (PetscInt j = 0; j < ncols; j++) {{ + icol = cindices[cols[j]]; + {select_cols} + indices[j] = icol; + }} + ierr = MatSetValues(A, 1, &irow, ncols, indices, vals, addv);CHKERRQ(ierr); + ierr = MatRestoreRow(B, i, &ncols, &cols, &vals);CHKERRQ(ierr); + }} + PetscFree(indices); + PetscFunctionReturn(0); +}} +""" + argtypes = [ctypes.c_voidp, ctypes.c_voidp, + ctypes.c_voidp, ctypes.c_voidp, ctypes.c_int] + funptr = load_c_code(code, name, comm=comm, argtypes=argtypes, + restype=ctypes.c_int) + + @PETSc.Log.EventDecorator(name) + def wrapper(A, B, rows, cols, addv): + return funptr(A.handle, B.handle, rows.ctypes.data, cols.ctypes.data, addv) + + return wrapper + + +def is_restricted(finat_element): + """Determine if an element is a restriction onto interior or facets""" + tdim = finat_element.cell.get_dimension() + idofs = len(finat_element.entity_dofs()[tdim][0]) + is_interior = idofs == finat_element.space_dimension() + is_facet = idofs == 0 + return is_interior, is_facet + + +def petsc_sparse(A_numpy, rtol=1E-10, comm=None): + """Convert dense numpy matrix into a sparse PETSc matrix""" + atol = rtol * abs(max(A_numpy.min(), A_numpy.max(), key=abs)) + sparsity = abs(A_numpy) > atol + nnz = numpy.count_nonzero(sparsity, axis=1).astype(PETSc.IntType) + A = PETSc.Mat().createAIJ(A_numpy.shape, nnz=(nnz, 0), comm=comm) + rows, cols = numpy.nonzero(sparsity) + rows = rows.astype(PETSc.IntType) + cols = cols.astype(PETSc.IntType) + vals = A_numpy[sparsity] + A.setValuesRCV(rows[:, None], cols[:, None], vals[:, None], PETSc.InsertMode.INSERT) + A.assemble() + return A + + +def kron3(A, B, C, scale=None): + """Returns scale * kron(A, kron(B, C))""" + temp = B.kron(C) + if scale is not None: + temp.scale(scale) + result = A.kron(temp) + temp.destroy() + return result + + +def get_submat(A, isrow=None, iscol=None, permute=False): + """Return the sub matrix A[isrow, iscol]""" + needs_rows = isrow is None + needs_cols = iscol is None + if needs_rows and needs_cols: + return A + size = A.getSize() + if needs_rows: + isrow = PETSc.IS().createStride(size[0], step=1, comm=A.getComm()) + if needs_cols: + iscol = PETSc.IS().createStride(size[1], step=1, comm=A.getComm()) + if permute: + submat = A.permute(isrow, iscol) + else: + submat = A.createSubMatrix(isrow, iscol) + if needs_rows: + isrow.destroy() + if needs_cols: + iscol.destroy() + return submat + + +def block_mat(A_blocks, destroy_blocks=False): + """Return a concrete Mat corresponding to a block matrix given as a list of lists. + Optionally, destroys the input Mats if a new Mat is created.""" + if len(A_blocks) == 1: + if len(A_blocks[0]) == 1: + return A_blocks[0][0] + + result = PETSc.Mat().createNest(A_blocks, comm=A_blocks[0][0].getComm()) + # A nest Mat would not allow us to take matrix-matrix products + result = result.convert(mat_type=A_blocks[0][0].getType()) + if destroy_blocks: + for row in A_blocks: + for mat in row: + mat.destroy() + return result + + +def mass_blocks(tdim, formdegree, B00, B11): + """Construct mass block matrix on reference cell from 1D mass matrices B00 and B11. + The 1D matrices may come with different test and trial spaces.""" + if tdim == 1: + B_diag = [B11 if formdegree else B00] + elif tdim == 2: + if formdegree == 0: + B_diag = [B00.kron(B00)] + elif formdegree == 1: + B_diag = [B00.kron(B11), B11.kron(B00)] + else: + B_diag = [B11.kron(B11)] + elif tdim == 3: + if formdegree == 0: + B_diag = [kron3(B00, B00, B00)] + elif formdegree == 1: + B_diag = [kron3(B00, B00, B11), kron3(B00, B11, B00), kron3(B11, B00, B00)] + elif formdegree == 2: + B_diag = [kron3(B00, B11, B11), kron3(B11, B00, B11), kron3(B11, B11, B00)] + else: + B_diag = [kron3(B11, B11, B11)] + + n = len(B_diag) + if n == 1: + return [B_diag] + else: + zero = PETSc.Mat() + return [[B_diag[i] if i == j else zero for j in range(n)] for i in range(n)] + + +def diff_blocks(tdim, formdegree, A00, A11, A10): + """Construct exterior derivative block matrix on reference cell from 1D + mass matrices A00 and A11, and exterior derivative moments A10. + The 1D matrices may come with different test and trial spaces.""" + if formdegree == tdim: + ncols = A10.shape[0]**tdim + zero = PETSc.Mat().createAIJ((1, ncols), nnz=(0, 0), comm=A10.getComm()) + zero.assemble() + A_blocks = [[zero]] + elif tdim == 1: + A_blocks = [[A10]] + elif tdim == 2: + if formdegree == 0: + A_blocks = [[A00.kron(A10)], [A10.kron(A00)]] + elif formdegree == 1: + A_blocks = [[A10.kron(A11), A11.kron(A10)]] + A_blocks[-1][-1].scale(-1) + elif tdim == 3: + if formdegree == 0: + A_blocks = [[kron3(A00, A00, A10)], [kron3(A00, A10, A00)], [kron3(A10, A00, A00)]] + elif formdegree == 1: + zero = PETSc.Mat() + A_blocks = [[kron3(A00, A10, A11, scale=-1), kron3(A00, A11, A10), zero], + [kron3(A10, A00, A11, scale=-1), zero, kron3(A11, A00, A10)], + [zero, kron3(A10, A11, A00), kron3(A11, A10, A00, scale=-1)]] + elif formdegree == 2: + A_blocks = [[kron3(A10, A11, A11, scale=-1), kron3(A11, A10, A11), kron3(A11, A11, A10)]] + return A_blocks + + +def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[], comm=None): + """ + Tabulate exterior derivative: Vc -> Vf as an explicit sparse matrix. + Works for any tensor-product basis. These are the same matrices one needs for HypreAMS and friends. + """ + if comm is None: + comm = Vf.comm + ec = Vc.finat_element + ef = Vf.finat_element + if ef.formdegree - ec.formdegree != 1: + raise ValueError("Expecting Vf = d(Vc)") + + elements = sorted(get_base_elements(ec), key=lambda e: e.formdegree) + c0, c1 = elements[::len(elements)-1] + elements = sorted(get_base_elements(ef), key=lambda e: e.formdegree) + f0, f1 = elements[::len(elements)-1] + if f0.formdegree != 0: + f0 = None + if c1.formdegree != 1: + c1 = None + + tdim = Vc.mesh().topological_dimension() + zero = PETSc.Mat() + A00 = petsc_sparse(evaluate_dual(c0, f0), comm=PETSc.COMM_SELF) if f0 else zero + A11 = petsc_sparse(evaluate_dual(c1, f1), comm=PETSc.COMM_SELF) if c1 else zero + A10 = petsc_sparse(evaluate_dual(c0, f1, alpha=(1,)), comm=PETSc.COMM_SELF) + Dhat = block_mat(diff_blocks(tdim, ec.formdegree, A00, A11, A10), destroy_blocks=True) + A00.destroy() + A10.destroy() + A11.destroy() + + if any(is_restricted(ec)) or any(is_restricted(ef)): + scalar_element = lambda e: e._sub_element if isinstance(e, (ufl.TensorElement, ufl.VectorElement)) else e + fdofs = restricted_dofs(ef, create_element(unrestrict_element(scalar_element(Vf.ufl_element())))) + cdofs = restricted_dofs(ec, create_element(unrestrict_element(scalar_element(Vc.ufl_element())))) + temp = Dhat + fises = PETSc.IS().createGeneral(fdofs, comm=temp.getComm()) + cises = PETSc.IS().createGeneral(cdofs, comm=temp.getComm()) + Dhat = temp.createSubMatrix(fises, cises) + temp.destroy() + fises.destroy() + cises.destroy() + + if Vf.value_size > 1: + temp = Dhat + eye = petsc_sparse(numpy.eye(Vf.value_size, dtype=PETSc.RealType), comm=temp.getComm()) + Dhat = temp.kron(eye) + temp.destroy() + eye.destroy() + + sizes = tuple(V.dof_dset.layout_vec.getSizes() for V in (Vf, Vc)) + block_size = Vf.dof_dset.layout_vec.getBlockSize() + preallocator = PETSc.Mat().create(comm=comm) + preallocator.setType(PETSc.Mat.Type.PREALLOCATOR) + preallocator.setSizes(sizes) + preallocator.setUp() + + insert = PETSc.InsertMode.INSERT + rmap = Vf.local_to_global_map(fbcs) + cmap = Vc.local_to_global_map(cbcs) + assembler = SparseAssembler(ElementKernel(Dhat), Vf, Vc, rmap, cmap) + assembler.assemble(preallocator, addv=insert) + preallocator.assemble() + + nnz = get_preallocation(preallocator, sizes[0][0]) + preallocator.destroy() + Dmat = PETSc.Mat().createAIJ(sizes, block_size, nnz=nnz, comm=comm) + Dmat.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) + + assembler.assemble(Dmat, addv=insert) + Dmat.assemble() + Dhat.destroy() + return Dmat + + +def restrict_element(ele, restriction_domain): + """Get an element that is not restricted and return the restricted element.""" + if isinstance(ele, ufl.VectorElement): + return type(ele)(restrict_element(ele._sub_element, restriction_domain), dim=ele.num_sub_elements()) + elif isinstance(ele, ufl.TensorElement): + return type(ele)(restrict_element(ele._sub_element, restriction_domain), shape=ele._shape, symmetry=ele.symmetry()) + elif isinstance(ele, ufl.MixedElement): + return type(ele)(*(restrict_element(e, restriction_domain) for e in ele.sub_elements())) + else: + return ele[restriction_domain] + + +def unrestrict_element(ele): + """Get an element that might or might not be restricted and + return the parent unrestricted element.""" + if isinstance(ele, ufl.VectorElement): + return type(ele)(unrestrict_element(ele._sub_element), dim=ele.num_sub_elements()) + elif isinstance(ele, ufl.TensorElement): + return type(ele)(unrestrict_element(ele._sub_element), shape=ele._shape, symmetry=ele.symmetry()) + elif isinstance(ele, ufl.MixedElement): + return type(ele)(*(unrestrict_element(e) for e in ele.sub_elements())) + elif isinstance(ele, ufl.RestrictedElement): + return unrestrict_element(ele._element) + else: + return ele + + +def get_base_elements(e): + if isinstance(e, finat.EnrichedElement): + return list(chain.from_iterable(map(get_base_elements, e.elements))) + elif isinstance(e, finat.TensorProductElement): + return list(chain.from_iterable(map(get_base_elements, e.factors))) + elif isinstance(e, finat.FlattenedDimensions): + return get_base_elements(e.product) + elif isinstance(e, (finat.HCurlElement, finat.HDivElement)): + return get_base_elements(e.wrappee) + elif isinstance(e, finat.finiteelementbase.FiniteElementBase): + return get_base_elements(e.fiat_equivalent) + elif isinstance(e, FIAT.RestrictedElement): + return get_base_elements(e._element) + return [e] + + +class PoissonFDMPC(FDMPC): + """ + A preconditioner for tensor-product elements that changes the shape + functions so that the H^1 Riesz map is sparse in the interior of a + Cartesian cell, and assembles a global sparse matrix on which other + preconditioners, such as `ASMStarPC`, can be applied. + + Here we assume that the volume integrals in the Jacobian can be expressed as: + + inner(grad(v), alpha(grad(u)))*dx + inner(v, beta(u))*dx + + where alpha and beta are possibly tensor-valued. + The sparse matrix is obtained by approximating alpha and beta by cell-wise + constants and discarding the coefficients in alpha that couple together + mixed derivatives and mixed components. + + For spaces that are not H^1-conforming, this preconditioner will use + the symmetric interior-penalty DG method. The penalty coefficient can be + provided in the application context, keyed on ``"eta"``. + """ + + _variant = "fdm_ipdg" + _citation = "Brubeck2022a" + + def assemble_reference_tensor(self, V): try: - line_elements = get_line_elements(V) + _, line_elements, shifts = get_permutation_to_line_elements(V) except ValueError: raise ValueError("FDMPC does not support the element %s" % V.ufl_element()) + line_elements, = line_elements + axes_shifts, = shifts + degree = max(e.degree() for e in line_elements) - eta = float(appctx.get("eta", (degree+1)**2)) - quad_degree = 2*degree+1 + eta = float(self.appctx.get("eta", degree*(degree+1))) element = V.finat_element is_dg = element.entity_dofs() == element.entity_closure_dofs() Afdm = [] # sparse interval mass and stiffness matrices for each direction Dfdm = [] # tabulation of normal derivatives at the boundary for each direction bdof = [] # indices of point evaluation dofs for each direction + cache = self._cache.setdefault("ipdg_reference_tensor", {}) for e in line_elements: - Afdm[:0], Dfdm[:0], bdof[:0] = tuple(zip(fdm_setup_ipdg(e, eta))) - if not (e.formdegree or is_dg): + key = (e.degree(), eta) + try: + rtensor = cache[key] + except KeyError: + rtensor = cache.setdefault(key, fdm_setup_ipdg(e, eta, comm=PETSc.COMM_SELF)) + Afdm[:0], Dfdm[:0], bdof[:0] = tuple(zip(rtensor)) + if not is_dg and e.degree() == degree: + # do not apply SIPG along continuous directions Dfdm[0] = None + return Afdm, Dfdm, bdof, axes_shifts - # coefficients w.r.t. the reference values - coefficients, self.assembly_callables = self.assemble_coef(J, quad_degree) - # set arbitrary non-zero coefficients for preallocation - for coef in coefficients.values(): - with coef.dat.vec as cvec: - cvec.set(1.0E0) - - bcflags = get_weak_bc_flags(J) - - # preallocate by calling the assembly routine on a PREALLOCATOR Mat - sizes = (V.dof_dset.layout_vec.getSizes(),)*2 - block_size = V.dof_dset.layout_vec.getBlockSize() - prealloc = PETSc.Mat().create(comm=self.comm) - prealloc.setType(PETSc.Mat.Type.PREALLOCATOR) - prealloc.setSizes(sizes) - prealloc.setUp() - self.assemble_kron(prealloc, V, bcs, eta, coefficients, Afdm, Dfdm, bdof, bcflags) - nnz = get_preallocation(prealloc, block_size * V.dof_dset.set.size) - Pmat = PETSc.Mat().createAIJ(sizes, block_size, nnz=nnz, comm=self.comm) - Pmat.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) - assemble_P = partial(self.assemble_kron, Pmat, V, bcs, eta, - coefficients, Afdm, Dfdm, bdof, bcflags) - prealloc.destroy() - return Pmat, assemble_P - - def assemble_kron(self, A, V, bcs, eta, coefficients, Afdm, Dfdm, bdof, bcflags): + @PETSc.Log.EventDecorator("FDMSetValues") + def set_values(self, A, Vrow, Vcol, addv, triu=False): """ Assemble the stiffness matrix in the FDM basis using Kronecker products of interval matrices :arg A: the :class:`PETSc.Mat` to assemble - :arg V: the :class:`~.FunctionSpace` of the form arguments - :arg bcs: an iterable of :class:`~.DirichletBC` s - :arg eta: a ``float`` penalty parameter for the symmetric interior penalty method - :arg coefficients: a ``dict`` mapping strings to :class:`firedrake.function.Function` s with the form coefficients - :arg Afdm: the list with sparse interval matrices - :arg Dfdm: the list with normal derivatives matrices - :arg bcflags: the :class:`numpy.ndarray` with BC facet flags returned by ``get_weak_bc_flags`` + :arg Vrow: the :class:`.FunctionSpace` test space + :arg Vcol: the :class:`.FunctionSpace` trial space + :arg addv: a `PETSc.Mat.InsertMode` + :arg triu: are we assembling only the upper triangular part? """ - from firedrake.preconditioners.pmg import get_axes_shift - Gq = coefficients.get("Gq") - Bq = coefficients.get("Bq") - Gq_facet = coefficients.get("Gq_facet") - PT_facet = coefficients.get("PT_facet") - - imode = PETSc.InsertMode.ADD_VALUES - lgmap = V.local_to_global_map(bcs) - + set_submat = SparseAssembler.setSubMatCSR(PETSc.COMM_SELF, triu=triu) + update_A = lambda A, Ae, rindices: set_submat(A, Ae, rindices, rindices, addv) + condense_element_mat = lambda x: x + + def cell_to_global(lgmap, cell_to_local, cell_index, result=None): + # Be careful not to create new arrays + result = cell_to_local(cell_index, result=result) + return lgmap.apply(result, result=result) + + bsize = Vrow.dof_dset.layout_vec.getBlockSize() + cell_to_local, nel = extrude_node_map(Vrow.cell_node_map(), bsize=bsize) + get_rindices = partial(cell_to_global, self.lgmaps[Vrow], cell_to_local) + Afdm, Dfdm, bdof, axes_shifts = self.assemble_reference_tensor(Vrow) + + Gq = self.coefficients.get("alpha") + Bq = self.coefficients.get("beta") + bcflags = self.coefficients.get("bcflags") + Gq_facet = self.coefficients.get("Gq_facet") + PT_facet = self.coefficients.get("PT_facet") + + V = Vrow bsize = V.value_size ncomp = V.ufl_element().reference_value_size() sdim = (V.finat_element.space_dimension() * bsize) // ncomp # dimension of a single component - ndim = V.ufl_domain().topological_dimension() - shift = get_axes_shift(V.finat_element) % ndim + tdim = V.mesh().topological_dimension() + shift = axes_shifts * bsize - index_cell, nel = glonum_fun(V.cell_node_map()) - index_coef, _ = glonum_fun(Gq.cell_node_map()) - flag2id = numpy.kron(numpy.eye(ndim, ndim, dtype=PETSc.IntType), [[1], [2]]) + index_coef, _ = extrude_node_map((Gq or Bq).cell_node_map()) + index_bc, _ = extrude_node_map(bcflags.cell_node_map()) + flag2id = numpy.kron(numpy.eye(tdim, tdim, dtype=PETSc.IntType), [[1], [2]]) # pshape is the shape of the DOFs in the tensor product pshape = tuple(Ak[0].size[0] for Ak in Afdm) - if shift: - assert ncomp == ndim - pshape = [tuple(numpy.roll(pshape, -shift*k)) for k in range(ncomp)] - - if A.getType() != PETSc.Mat.Type.PREALLOCATOR: - A.zeroEntries() - for assemble_coef in self.assembly_callables: - assemble_coef() + static_condensation = False + if sdim != numpy.prod(pshape): + static_condensation = True - # insert the identity in the Dirichlet rows and columns - for row in V.dof_dset.lgmap.indices[lgmap.indices < 0]: - A.setValue(row, row, 1.0E0, imode) + if set(shift) != {0}: + assert ncomp == tdim + pshape = [tuple(numpy.roll(pshape, -shift[k])) for k in range(ncomp)] # assemble zero-th order term separately, including off-diagonals (mixed components) # I cannot do this for hdiv elements as off-diagonals are not sparse, this is because - # the FDM eigenbases for GLL(N) and GLL(N-1) are not orthogonal to each other - use_diag_Bq = Bq is None or len(Bq.ufl_shape) != 2 + # the FDM eigenbases for CG(k) and CG(k-1) are not orthogonal to each other + use_diag_Bq = Bq is None or len(Bq.ufl_shape) != 2 or static_condensation + rindices = None if not use_diag_Bq: bshape = Bq.ufl_shape # Be = Bhat kron ... kron Bhat Be = Afdm[0][0].copy() - for k in range(1, ndim): + for k in range(1, tdim): Be = Be.kron(Afdm[k][0]) aptr = numpy.arange(0, (bshape[0]+1)*bshape[1], bshape[1], dtype=PETSc.IntType) @@ -304,64 +1427,80 @@ def assemble_kron(self, A, V, bcs, eta, coefficients, Afdm, Dfdm, bdof, bcflags) adata = numpy.sum(Bq.dat.data_ro[index_coef(e)], axis=0) Ae = PETSc.Mat().createAIJWithArrays(bshape, (aptr, aidx, adata), comm=PETSc.COMM_SELF) Ae = Be.kron(Ae) - - ie = index_cell(e) - ie = numpy.repeat(ie*bsize, bsize) + numpy.tile(numpy.arange(bsize, dtype=ie.dtype), len(ie)) - rows = lgmap.apply(ie) - set_submat_csr(A, Ae, rows, imode) + rindices = get_rindices(e, result=rindices) + update_A(A, Ae, rindices) Ae.destroy() Be.destroy() Bq = None # assemble the second order term and the zero-th order term if any, # discarding mixed derivatives and mixed components + ae = numpy.zeros((ncomp, tdim), dtype=PETSc.RealType) + be = numpy.zeros((ncomp,), dtype=PETSc.RealType) + je = None for e in range(nel): - ie = numpy.reshape(index_cell(e), (ncomp//bsize, -1)) - je = index_coef(e) - bce = bcflags[e] - - # get second order coefficient on this cell - mue = numpy.atleast_1d(numpy.sum(Gq.dat.data_ro[je], axis=0)) + je = index_coef(e, result=je) + bce = bcflags.dat.data_ro_with_halos[index_bc(e)] > 1E-8 + # get coefficients on this cell + if Gq is not None: + ae[:] = numpy.sum(Gq.dat.data_ro[je], axis=0) if Bq is not None: - # get zero-th order coefficient on this cell - bqe = numpy.atleast_1d(numpy.sum(Bq.dat.data_ro[je], axis=0)) + be[:] = numpy.sum(Bq.dat.data_ro[je], axis=0) + rindices = get_rindices(e, result=rindices) + rows = numpy.reshape(rindices, (-1, bsize)) + rows = numpy.transpose(rows) + rows = numpy.reshape(rows, (ncomp, -1)) + # for each component: compute the stiffness matrix Ae for k in range(ncomp): # permutation of axes with respect to the first vector component - axes = numpy.roll(numpy.arange(ndim), -shift*k) - # for each component: compute the stiffness matrix Ae - muk = mue[k] if len(mue.shape) == 2 else mue + axes = numpy.roll(numpy.arange(tdim), -shift[k]) bck = bce[:, k] if len(bce.shape) == 2 else bce fbc = numpy.dot(bck, flag2id) - # Ae = mue[k][0] Ahat + bqe[k] Bhat - Be = Afdm[axes[0]][0].copy() - Ae = Afdm[axes[0]][1+fbc[0]].copy() - Ae.scale(muk[0]) - if Bq is not None: - Ae.axpy(bqe[k], Be) - - if ndim > 1: - # Ae = Ae kron Bhat + mue[k][1] Bhat kron Ahat - Ae = Ae.kron(Afdm[axes[1]][0]) - Ae.axpy(muk[1], Be.kron(Afdm[axes[1]][1+fbc[1]])) - if ndim > 2: - # Ae = Ae kron Bhat + mue[k][2] Bhat kron Bhat kron Ahat - Be = Be.kron(Afdm[axes[1]][0]) - Ae = Ae.kron(Afdm[axes[2]][0]) - Ae.axpy(muk[2], Be.kron(Afdm[axes[2]][1+fbc[2]])) - - rows = lgmap.apply(ie[0]*bsize+k if bsize == ncomp else ie[k]) - set_submat_csr(A, Ae, rows, imode) + if Gq is not None: + # Ae = ae[k][0] Ahat + be[k] Bhat + Be = Afdm[axes[0]][0].copy() + Ae = Afdm[axes[0]][1+fbc[0]].copy() + Ae.scale(ae[k][0]) + if Bq is not None: + Ae.axpy(be[k], Be) + + if tdim > 1: + # Ae = Ae kron Bhat + ae[k][1] Bhat kron Ahat + Ae = Ae.kron(Afdm[axes[1]][0]) + if Gq is not None: + Ae.axpy(ae[k][1], Be.kron(Afdm[axes[1]][1+fbc[1]])) + + if tdim > 2: + # Ae = Ae kron Bhat + ae[k][2] Bhat kron Bhat kron Ahat + Be = Be.kron(Afdm[axes[1]][0]) + Ae = Ae.kron(Afdm[axes[2]][0]) + if Gq is not None: + Ae.axpy(ae[k][2], Be.kron(Afdm[axes[2]][1+fbc[2]])) + Be.destroy() + + elif Bq is not None: + Ae = Afdm[axes[0]][0] + for m in range(1, tdim): + Ae = Ae.kron(Afdm[axes[m]][0]) + Ae.scale(be[k]) + + Ae = condense_element_mat(Ae) + update_A(A, Ae, rows[k].astype(PETSc.IntType)) Ae.destroy() - Be.destroy() # assemble SIPG interior facet terms if the normal derivatives have been set up if any(Dk is not None for Dk in Dfdm): - if ndim < V.ufl_domain().geometric_dimension(): + if static_condensation: + raise NotImplementedError("Static condensation for SIPG not implemented") + if tdim < V.mesh().geometric_dimension(): raise NotImplementedError("SIPG on immersed meshes is not implemented") - index_facet, local_facet_data, nfacets = get_interior_facet_maps(V) - index_coef, _, _ = get_interior_facet_maps(Gq_facet or Gq) + eta = float(self.appctx.get("eta")) + + lgmap = self.lgmaps[V] + index_facet, local_facet_data, nfacets = extrude_interior_facet_maps(V) + index_coef, _, _ = extrude_interior_facet_maps(Gq_facet or Gq) rows = numpy.zeros((2, sdim), dtype=PETSc.IntType) for e in range(nfacets): @@ -373,8 +1512,8 @@ def assemble_kron(self, A, V, bcs, eta, coefficients, Afdm, Dfdm, bdof, bcflags) if PT_facet: icell = numpy.reshape(lgmap.apply(ie), (2, ncomp, -1)) - iord0 = numpy.insert(numpy.delete(numpy.arange(ndim), idir[0]), 0, idir[0]) - iord1 = numpy.insert(numpy.delete(numpy.arange(ndim), idir[1]), 0, idir[1]) + iord0 = numpy.insert(numpy.delete(numpy.arange(tdim), idir[0]), 0, idir[0]) + iord1 = numpy.insert(numpy.delete(numpy.arange(tdim), idir[1]), 0, idir[1]) je = je[[0, 1], lfd] Pfacet = PT_facet.dat.data_ro_with_halos[je] Gfacet = Gq_facet.dat.data_ro_with_halos[je] @@ -382,14 +1521,14 @@ def assemble_kron(self, A, V, bcs, eta, coefficients, Afdm, Dfdm, bdof, bcflags) Gfacet = numpy.sum(Gq.dat.data_ro_with_halos[je], axis=1) for k in range(ncomp): - axes = numpy.roll(numpy.arange(ndim), -shift*k) + axes = numpy.roll(numpy.arange(tdim), -shift[k]) Dfacet = Dfdm[axes[0]] if Dfacet is None: continue if PT_facet: - k0 = iord0[k] if shift != 1 else ndim-1-iord0[-k-1] - k1 = iord1[k] if shift != 1 else ndim-1-iord1[-k-1] + k0 = iord0[k] if shift[1] != 1 else tdim-1-iord0[-k-1] + k1 = iord1[k] if shift[1] != 1 else tdim-1-iord1[-k-1] Piola = Pfacet[[0, 1], [k0, k1]] mu = Gfacet[[0, 1], idir] else: @@ -424,10 +1563,10 @@ def assemble_kron(self, A, V, bcs, eta, coefficients, Afdm, Dfdm, bdof, bcflags) Adense[ii, j0:j1] -= smu[j] * Dfacet[:, jface % 2] Ae = numpy_to_petsc(Adense, dense_indices, diag=False) - if ndim > 1: + if tdim > 1: # assume that the mesh is oriented Ae = Ae.kron(Afdm[axes[1]][0]) - if ndim > 2: + if tdim > 2: Ae = Ae.kron(Afdm[axes[2]][0]) if bsize == ncomp: @@ -439,42 +1578,32 @@ def assemble_kron(self, A, V, bcs, eta, coefficients, Afdm, Dfdm, bdof, bcflags) rows[0] = pull_axis(icell[0][k0], pshape[k0], idir[0]) rows[1] = pull_axis(icell[1][k1], pshape[k1], idir[1]) - set_submat_csr(A, Ae, rows, imode) + update_A(A, Ae, rows) Ae.destroy() - A.assemble() - - def assemble_coef(self, J, quad_deg, discard_mixed=True, cell_average=True): - """ - Return the coefficients of the Jacobian form arguments and their gradient with respect to the reference coordinates. - :arg J: the Jacobian bilinear form - :arg quad_deg: the quadrature degree used for the coefficients - :arg discard_mixed: discard entries in second order coefficient with mixed derivatives and mixed components - :arg cell_average: to return the coefficients as DG_0 Functions - - :returns: a 2-tuple of - coefficients: a dictionary mapping strings to :class:`firedrake.function.Function` s with the coefficients of the form, - assembly_callables: a list of assembly callables for each coefficient of the form - """ - from ufl import inner, diff - from ufl.algorithms.ad import expand_derivatives + @PETSc.Log.EventDecorator("FDMCoefficients") + def assemble_coefficients(self, J, fcp): + from firedrake.assemble import OneFormAssembler coefficients = {} assembly_callables = [] - mesh = J.ufl_domain() + args_J = J.arguments() + V = args_J[-1].function_space() + mesh = V.mesh() tdim = mesh.topological_dimension() Finv = ufl.JacobianInverse(mesh) - dx = firedrake.dx(degree=quad_deg) - if cell_average: - family = "Discontinuous Lagrange" if tdim == 1 else "DQ" - degree = 0 - else: - family = "Quadrature" - degree = quad_deg + degree = V.ufl_element().degree() + try: + degree = max(degree) + except TypeError: + pass + quad_deg = fcp.get("degree", 2*degree+1) + dx = ufl.dx(degree=quad_deg, domain=mesh) + family = "Discontinuous Lagrange" if tdim == 1 else "DQ" + DG = ufl.FiniteElement(family, mesh.ufl_cell(), degree=0) # extract coefficients directly from the bilinear form - args_J = J.arguments() integrals_J = J.integrals_by_type("cell") mapping = args_J[0].ufl_element().mapping().lower() Piola = get_piola_tensor(mapping, mesh) @@ -485,86 +1614,103 @@ def assemble_coef(self, J, quad_deg, discard_mixed=True, cell_average=True): replace_grad = {ufl.grad(t): ufl.dot(Piola, ufl.dot(dt, Finv)) for t, dt in zip(args_J, ref_grad)} else: replace_grad = {ufl.grad(t): ufl.dot(dt, Finv) for t, dt in zip(args_J, ref_grad)} + alpha = expand_derivatives(sum([ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_grad), + ref_grad[0]), ref_grad[1]) for i in integrals_J])) + # discard mixed derivatives and mixed components + if len(alpha.ufl_shape) == 2: + alpha = ufl.diag_vector(alpha) + else: + ashape = alpha.ufl_shape + ashape = ashape[:len(ashape)//2] + alpha = ufl.as_tensor(numpy.reshape([alpha[i+i] for i in numpy.ndindex(ashape)], (ashape[0], -1))) - alpha = expand_derivatives(sum([diff(diff(ufl.replace(i.integrand(), replace_grad), - ref_grad[0]), ref_grad[1]) for i in integrals_J])) + # assemble second order coefficient + if not isinstance(alpha, ufl.constantvalue.Zero): + Q = FunctionSpace(mesh, ufl.TensorElement(DG, shape=alpha.ufl_shape)) + tensor = coefficients.setdefault("alpha", Function(Q)) + assembly_callables.append(OneFormAssembler(ufl.inner(TestFunction(Q), alpha)*dx, tensor=tensor, + form_compiler_parameters=fcp).assemble) # get zero-th order coefficent ref_val = [ufl.variable(t) for t in args_J] if Piola: - dummy_element = ufl.TensorElement("DQ", cell=mesh.ufl_cell(), degree=1, shape=Piola.ufl_shape) + dummy_element = ufl.TensorElement(family, cell=mesh.ufl_cell(), degree=1, shape=Piola.ufl_shape) dummy_Piola = ufl.Coefficient(ufl.FunctionSpace(mesh, dummy_element)) replace_val = {t: ufl.dot(dummy_Piola, s) for t, s in zip(args_J, ref_val)} else: replace_val = {t: s for t, s in zip(args_J, ref_val)} - - beta = expand_derivatives(sum([diff(diff(ufl.replace(i.integrand(), replace_val), - ref_val[0]), ref_val[1]) for i in integrals_J])) + beta = expand_derivatives(sum(ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_val), + ref_val[0]), ref_val[1]) for i in integrals_J)) if Piola: beta = ufl.replace(beta, {dummy_Piola: Piola}) - - G = alpha - if discard_mixed: - # discard mixed derivatives and mixed components - if len(G.ufl_shape) == 2: - G = ufl.diag_vector(G) - else: - Gshape = G.ufl_shape - Gshape = Gshape[:len(Gshape)//2] - G = ufl.as_tensor(numpy.reshape([G[i+i] for i in numpy.ndindex(Gshape)], (Gshape[0], -1))) - Qe = ufl.TensorElement(family, mesh.ufl_cell(), degree=degree, quad_scheme="default", shape=G.ufl_shape) - else: - Qe = ufl.TensorElement(family, mesh.ufl_cell(), degree=degree, quad_scheme="default", shape=G.ufl_shape, symmetry=True) - - # assemble second order coefficient - Q = firedrake.FunctionSpace(mesh, Qe) - q = firedrake.TestFunction(Q) - Gq = firedrake.Function(Q) - coefficients["Gq"] = Gq - assembly_callables.append(partial(firedrake.assemble, inner(G, q)*dx, Gq)) - # assemble zero-th order coefficient if not isinstance(beta, ufl.constantvalue.Zero): if Piola: # keep diagonal beta = ufl.diag_vector(beta) - shape = beta.ufl_shape - Qe = ufl.FiniteElement(family, mesh.ufl_cell(), degree=degree, quad_scheme="default") - if shape: - Qe = ufl.TensorElement(Qe, shape=shape) - Q = firedrake.FunctionSpace(mesh, Qe) - q = firedrake.TestFunction(Q) - Bq = firedrake.Function(Q) - coefficients["Bq"] = Bq - assembly_callables.append(partial(firedrake.assemble, inner(beta, q)*dx, Bq)) - + Q = FunctionSpace(mesh, ufl.TensorElement(DG, shape=beta.ufl_shape) if beta.ufl_shape else DG) + tensor = coefficients.setdefault("beta", Function(Q)) + assembly_callables.append(OneFormAssembler(ufl.inner(TestFunction(Q), beta)*dx, tensor=tensor, + form_compiler_parameters=fcp).assemble) + + family = "CG" if tdim == 1 else "DGT" + degree = 1 if tdim == 1 else 0 + DGT = ufl.BrokenElement(ufl.FiniteElement(family, cell=mesh.ufl_cell(), degree=degree)) if Piola: # make DGT functions with the second order coefficient # and the Piola tensor for each side of each facet extruded = mesh.cell_set._extruded - dS_int = firedrake.dS_h(degree=quad_deg) + firedrake.dS_v(degree=quad_deg) if extruded else firedrake.dS(degree=quad_deg) - ele = ufl.BrokenElement(ufl.FiniteElement("DGT", mesh.ufl_cell(), 0)) + dS_int = ufl.dS_h(degree=quad_deg) + ufl.dS_v(degree=quad_deg) if extruded else ufl.dS(degree=quad_deg) area = ufl.FacetArea(mesh) + ifacet_inner = lambda v, u: ((ufl.inner(v('+'), u('+')) + ufl.inner(v('-'), u('-')))/area)*dS_int replace_grad = {ufl.grad(t): ufl.dot(dt, Finv) for t, dt in zip(args_J, ref_grad)} - alpha = expand_derivatives(sum([diff(diff(ufl.replace(i.integrand(), replace_grad), - ref_grad[0]), ref_grad[1]) for i in integrals_J])) - vol = abs(ufl.JacobianDeterminant(mesh)) - G = vol * alpha + alpha = expand_derivatives(sum(ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_grad), + ref_grad[0]), ref_grad[1]) for i in integrals_J)) + G = alpha G = ufl.as_tensor([[[G[i, k, j, k] for i in range(G.ufl_shape[0])] for j in range(G.ufl_shape[2])] for k in range(G.ufl_shape[3])]) + G = G * abs(ufl.JacobianDeterminant(mesh)) - Q = firedrake.TensorFunctionSpace(mesh, ele, shape=G.ufl_shape) - q = firedrake.TestFunction(Q) - Gq_facet = firedrake.Function(Q) - coefficients["Gq_facet"] = Gq_facet - assembly_callables.append(partial(firedrake.assemble, ((inner(q('+'), G('+')) + inner(q('-'), G('-')))/area)*dS_int, Gq_facet)) - + Q = FunctionSpace(mesh, ufl.TensorElement(DGT, shape=G.ufl_shape)) + tensor = coefficients.setdefault("Gq_facet", Function(Q)) + assembly_callables.append(OneFormAssembler(ifacet_inner(TestFunction(Q), G), tensor=tensor, + form_compiler_parameters=fcp).assemble) PT = Piola.T - Q = firedrake.TensorFunctionSpace(mesh, ele, shape=PT.ufl_shape) - q = firedrake.TestFunction(Q) - PT_facet = firedrake.Function(Q) - coefficients["PT_facet"] = PT_facet - assembly_callables.append(partial(firedrake.assemble, ((inner(q('+'), PT('+')) + inner(q('-'), PT('-')))/area)*dS_int, PT_facet)) + Q = FunctionSpace(mesh, ufl.TensorElement(DGT, shape=PT.ufl_shape)) + tensor = coefficients.setdefault("PT_facet", Function(Q)) + assembly_callables.append(OneFormAssembler(ifacet_inner(TestFunction(Q), PT), tensor=tensor, + form_compiler_parameters=fcp).assemble) + + # make DGT functions with BC flags + shape = V.ufl_element().reference_value_shape() + Q = FunctionSpace(mesh, ufl.TensorElement(DGT, shape=shape) if shape else DGT) + test = TestFunction(Q) + + ref_args = [ufl.variable(t) for t in args_J] + replace_args = {t: s for t, s in zip(args_J, ref_args)} + + forms = [] + md = {"quadrature_degree": 0} + for it in J.integrals(): + itype = it.integral_type() + if itype.startswith("exterior_facet"): + beta = ufl.diff(ufl.diff(ufl.replace(it.integrand(), replace_args), ref_args[0]), ref_args[1]) + beta = expand_derivatives(beta) + if beta.ufl_shape: + beta = ufl.diag_vector(beta) + ds_ext = ufl.Measure(itype, domain=mesh, subdomain_id=it.subdomain_id(), metadata=md) + forms.append(ufl.inner(test, beta)*ds_ext) + + tensor = coefficients.setdefault("bcflags", Function(Q)) + if len(forms): + form = sum(forms) + if len(form.arguments()) == 1: + assembly_callables.append(OneFormAssembler(form, tensor=tensor, + form_compiler_parameters=fcp).assemble) + # set arbitrary non-zero coefficients for preallocation + for coef in coefficients.values(): + with coef.dat.vec as cvec: + cvec.set(1.0E0) return coefficients, assembly_callables @@ -586,16 +1732,7 @@ def pull_axis(x, pshape, idir): return numpy.reshape(numpy.moveaxis(numpy.reshape(x.copy(), pshape), idir, 0), x.shape) -def set_submat_csr(A_global, A_local, global_indices, imode): - """insert values from A_local to A_global on the diagonal block with indices global_indices""" - indptr, indices, data = A_local.getValuesCSR() - for i, row in enumerate(global_indices.flat): - i0 = indptr[i] - i1 = indptr[i+1] - A_global.setValues(row, global_indices.flat[indices[i0:i1]], data[i0:i1], imode) - - -def numpy_to_petsc(A_numpy, dense_indices, diag=True, block=False): +def numpy_to_petsc(A_numpy, dense_indices, diag=True, block=False, comm=None): """ Create a SeqAIJ Mat from a dense matrix using the diagonal and a subset of rows and columns. If dense_indices is empty, then also include the off-diagonal corners of the matrix. @@ -606,8 +1743,7 @@ def numpy_to_petsc(A_numpy, dense_indices, diag=True, block=False): nnz[dense_indices] = len(dense_indices) if block else n imode = PETSc.InsertMode.INSERT - A_petsc = PETSc.Mat().createAIJ(A_numpy.shape, nnz=(nnz, 0), comm=PETSc.COMM_SELF) - + A_petsc = PETSc.Mat().createAIJ(A_numpy.shape, nnz=(nnz, 0), comm=comm) idx = numpy.arange(n, dtype=PETSc.IntType) if block: values = A_numpy[dense_indices, :][:, dense_indices] @@ -616,39 +1752,36 @@ def numpy_to_petsc(A_numpy, dense_indices, diag=True, block=False): for j in dense_indices: A_petsc.setValues(j, idx, A_numpy[j, :], imode) A_petsc.setValues(idx, j, A_numpy[:, j], imode) - if diag: idx = idx[:, None] values = A_numpy.diagonal()[:, None] A_petsc.setValuesRCV(idx, idx, values, imode) - A_petsc.assemble() return A_petsc -@lru_cache(maxsize=10) -def fdm_setup_ipdg(fdm_element, eta): +def fdm_setup_ipdg(fdm_element, eta, comm=None): """ - Setup for the fast diagonalization method for the IP-DG formulation. + Setup for the fast diagonalisation method for the IP-DG formulation. Compute sparsified interval stiffness and mass matrices and tabulate the normal derivative of the shape functions. :arg fdm_element: a :class:`FIAT.FDMElement` :arg eta: penalty coefficient as a `float` + :arg comm: a :class:`PETSc.Comm` :returns: 3-tuple of: Afdm: a list of :class:`PETSc.Mats` with the sparse interval matrices Bhat, and bcs(Ahat) for every combination of either natural or weak Dirichlet BCs on each endpoint. Dfdm: the tabulation of the normal derivatives of the Dirichlet eigenfunctions. - bdof: the indices of PointEvaluation dofs. + bdof: the indices of the vertex degrees of freedom. """ - from FIAT.quadrature import GaussLegendreQuadratureLineRule - from FIAT.functional import PointEvaluation ref_el = fdm_element.get_reference_element() degree = fdm_element.degree() - rule = GaussLegendreQuadratureLineRule(ref_el, degree+1) - bdof = [k for k, f in enumerate(fdm_element.dual_basis()) if isinstance(f, PointEvaluation)] + rule = FIAT.quadrature.make_quadrature(ref_el, degree+1) + edof = fdm_element.entity_dofs() + bdof = edof[0][0] + edof[0][1] phi = fdm_element.tabulate(1, rule.get_points()) Jhat = phi[(0, )] @@ -661,7 +1794,7 @@ def fdm_setup_ipdg(fdm_element, eta): Dfacet = basis[(1,)] Dfacet[:, 0] = -Dfacet[:, 0] - Afdm = [numpy_to_petsc(Bhat, bdof, block=True)] + Afdm = [numpy_to_petsc(Bhat, bdof, block=True, comm=comm)] for bc in range(4): bcs = (bc % 2, bc//2) Abc = Ahat.copy() @@ -671,23 +1804,24 @@ def fdm_setup_ipdg(fdm_element, eta): Abc[:, j] -= Dfacet[:, k] Abc[j, :] -= Dfacet[:, k] Abc[j, j] += eta - Afdm.append(numpy_to_petsc(Abc, bdof)) + Afdm.append(numpy_to_petsc(Abc, bdof, comm=comm)) return Afdm, Dfacet, bdof -@lru_cache(maxsize=10) -def get_interior_facet_maps(V): +def extrude_interior_facet_maps(V): """ - Extrude V.interior_facet_node_map and V.ufl_domain().interior_facets.local_facet_dat + Extrude V.interior_facet_node_map and V.mesh().interior_facets.local_facet_dat - :arg V: a :class:`~.FunctionSpace` + :arg V: a :class:`.FunctionSpace` :returns: the 3-tuple of facet_to_nodes_fun: maps interior facets to the nodes of the two cells sharing it, local_facet_data_fun: maps interior facets to the local facet numbering in the two cells sharing it, nfacets: the total number of interior facets owned by this process """ - mesh = V.ufl_domain() + if isinstance(V, Function): + V = V.function_space() + mesh = V.mesh() intfacets = mesh.interior_facets facet_to_cells = intfacets.facet_cell_map.values local_facet_data = intfacets.local_facet_dat.data_ro @@ -747,90 +1881,63 @@ def get_interior_facet_maps(V): return facet_to_nodes_fun, local_facet_data_fun, nfacets -@lru_cache(maxsize=10) -def glonum_fun(node_map): +def extrude_node_map(node_map, bsize=1): """ - Return a function that maps each topological entity to its nodes and the total number of entities. + Construct a (possibly vector-valued) cell to node map from an un-extruded scalar map. - :arg node_map: a :class:`pyop2.Map` mapping entities to their nodes, including ghost entities. + :arg node_map: a :class:`pyop2.Map` mapping entities to their local dofs, including ghost entities. + :arg bsize: the block size - :returns: a 2-tuple with the map and the number of cells owned by this process + :returns: a 2-tuple with the cell to node map and the number of cells owned by this process """ - nelv = node_map.values.shape[0] + nel = node_map.values.shape[0] if node_map.offset is None: - return lambda e: node_map.values_with_halo[e], nelv + def _scalar_map(map_values, e, result=None): + if result is None: + result = numpy.empty_like(map_values[e]) + numpy.copyto(result, map_values[e]) + return result + + scalar_map = partial(_scalar_map, node_map.values_with_halo) else: layers = node_map.iterset.layers_array if layers.shape[0] == 1: + def _scalar_map(map_values, offset, nelz, e, result=None): + if result is None: + result = numpy.empty_like(offset) + numpy.copyto(result, offset) + result *= (e % nelz) + result += map_values[e // nelz] + return result + nelz = layers[0, 1]-layers[0, 0]-1 - nel = nelz*nelv - return lambda e: node_map.values_with_halo[e//nelz] + (e % nelz)*node_map.offset, nel + nel *= nelz + scalar_map = partial(_scalar_map, node_map.values_with_halo, node_map.offset, nelz) else: + def _scalar_map(map_values, offset, to_base, to_layer, e, result=None): + if result is None: + result = numpy.empty_like(offset) + numpy.copyto(result, offset) + result *= to_layer[e] + result += map_values[to_base[e]] + return result + nelz = layers[:, 1]-layers[:, 0]-1 - nel = sum(nelz[:nelv]) + nel = sum(nelz[:nel]) to_base = numpy.repeat(numpy.arange(node_map.values_with_halo.shape[0], dtype=node_map.offset.dtype), nelz) to_layer = numpy.concatenate([numpy.arange(nz, dtype=node_map.offset.dtype) for nz in nelz]) - return lambda e: node_map.values_with_halo[to_base[e]] + to_layer[e]*node_map.offset, nel - + scalar_map = partial(_scalar_map, node_map.values_with_halo, node_map.offset, to_base, to_layer) -def glonum(node_map): - """ - Return an array with the nodes of each topological entity of a certain kind. + if bsize == 1: + return scalar_map, nel - :arg node_map: a :class:`pyop2.Map` mapping entities to their nodes, including ghost entities. + def vector_map(bsize, ibase, e, result=None): + index = None + if result is not None: + index = result[:, 0] + index = scalar_map(e, result=index) + index *= bsize + return numpy.add.outer(index, ibase, out=result) - :returns: a :class:`numpy.ndarray` whose rows are the nodes for each cell - """ - if (node_map.offset is None) or (node_map.values_with_halo.size == 0): - return node_map.values_with_halo - else: - layers = node_map.iterset.layers_array - if layers.shape[0] == 1: - nelz = layers[0, 1]-layers[0, 0]-1 - to_layer = numpy.tile(numpy.arange(nelz, dtype=node_map.offset.dtype), len(node_map.values_with_halo)) - else: - nelz = layers[:, 1]-layers[:, 0]-1 - to_layer = numpy.concatenate([numpy.arange(nz, dtype=node_map.offset.dtype) for nz in nelz]) - return numpy.repeat(node_map.values_with_halo, nelz, axis=0) + numpy.kron(to_layer.reshape((-1, 1)), node_map.offset) - - -def get_weak_bc_flags(J): - """ - Return flags indicating whether the zero-th order coefficient on each facet of every cell is non-zero - """ - from ufl.algorithms.ad import expand_derivatives - mesh = J.ufl_domain() - args_J = J.arguments() - V = args_J[0].function_space() - rvs = V.ufl_element().reference_value_shape() - cell = mesh.ufl_cell() - family = "CG" if cell.topological_dimension() == 1 else "DGT" - degree = 1 if cell.topological_dimension() == 1 else 0 - Qe = ufl.FiniteElement(family, cell=cell, degree=degree) - if rvs: - Qe = ufl.TensorElement(Qe, shape=rvs) - Q = firedrake.FunctionSpace(mesh, Qe) - q = firedrake.TestFunction(Q) - - ref_args = [ufl.variable(t) for t in args_J] - replace_args = {t: s for t, s in zip(args_J, ref_args)} - - forms = [] - md = {"quadrature_degree": 0} - for it in J.integrals(): - itype = it.integral_type() - if itype.startswith("exterior_facet"): - beta = ufl.diff(ufl.diff(ufl.replace(it.integrand(), replace_args), ref_args[0]), ref_args[1]) - beta = expand_derivatives(beta) - if rvs: - beta = ufl.diag_vector(beta) - ds_ext = ufl.Measure(itype, domain=mesh, subdomain_id=it.subdomain_id(), metadata=md) - forms.append(ufl.inner(q, beta)*ds_ext) - - tol = 1E-8 - if len(forms): - bq = firedrake.assemble(sum(forms)) - fbc = bq.dat.data_with_halos[glonum(Q.cell_node_map())] - return (abs(fbc) > tol).astype(PETSc.IntType) - else: - return numpy.zeros(glonum(Q.cell_node_map()).shape, dtype=PETSc.IntType) + ibase = numpy.arange(bsize, dtype=node_map.values.dtype) + return partial(vector_map, bsize, ibase), nel diff --git a/firedrake/preconditioners/gtmg.py b/firedrake/preconditioners/gtmg.py index 35f1f1e570..d11cc548c1 100644 --- a/firedrake/preconditioners/gtmg.py +++ b/firedrake/preconditioners/gtmg.py @@ -29,7 +29,7 @@ def initialize(self, pc): if ctx is None: raise ValueError("No context found.") if not isinstance(ctx, _SNESContext): - raise ValueError("Don't know how to get form from %r", ctx) + raise ValueError("Don't know how to get form from %r" % ctx) prefix = pc.getOptionsPrefix() options_prefix = prefix + self._prefix @@ -41,7 +41,7 @@ def initialize(self, pc): if ictx is None: raise ValueError("No context found on matrix") if not isinstance(ictx, ImplicitMatrixContext): - raise ValueError("Don't know how to get form from %r", ictx) + raise ValueError("Don't know how to get form from %r" % ictx) fine_operator = ictx.a fine_bcs = ictx.row_bcs @@ -70,7 +70,7 @@ def initialize(self, pc): fine_petscmat.setTransposeNullSpace(fine_transpose_nullspace) # Handle the coarse operator - coarse_options_prefix = options_prefix + "mg_coarse" + coarse_options_prefix = options_prefix + "mg_coarse_" coarse_mat_type = opts.getString(coarse_options_prefix + "mat_type", parameters["default_matrix_type"]) diff --git a/firedrake/preconditioners/hiptmair.py b/firedrake/preconditioners/hiptmair.py index 317dc5614b..af1d1bab27 100644 --- a/firedrake/preconditioners/hiptmair.py +++ b/firedrake/preconditioners/hiptmair.py @@ -201,7 +201,7 @@ def coarsen(self, pc): if G_callback is None: interp_petscmat = chop(Interpolator(dminus(test), V, bcs=bcs + coarse_space_bcs).callable().handle) else: - interp_petscmat = G_callback(V, coarse_space, bcs, coarse_space_bcs) + interp_petscmat = G_callback(coarse_space, V, coarse_space_bcs, bcs) return coarse_operator, coarse_space_bcs, interp_petscmat diff --git a/firedrake/preconditioners/hypre_ads.py b/firedrake/preconditioners/hypre_ads.py index ca3728abdb..9cbc2537da 100644 --- a/firedrake/preconditioners/hypre_ads.py +++ b/firedrake/preconditioners/hypre_ads.py @@ -34,12 +34,12 @@ def initialize(self, obj): if G_callback is None: G = chop(Interpolator(grad(TestFunction(P1)), NC1).callable().handle) else: - G = G_callback(NC1, P1) + G = G_callback(P1, NC1) C_callback = appctx.get("get_curl", None) if C_callback is None: C = chop(Interpolator(curl(TestFunction(NC1)), V).callable().handle) else: - C = C_callback(V, NC1) + C = C_callback(NC1, V) pc = PETSc.PC().create(comm=obj.comm) pc.incrementTabLevel(1, parent=obj) diff --git a/firedrake/preconditioners/hypre_ams.py b/firedrake/preconditioners/hypre_ams.py index 8bfd14908e..a00334403b 100644 --- a/firedrake/preconditioners/hypre_ams.py +++ b/firedrake/preconditioners/hypre_ams.py @@ -54,7 +54,7 @@ def initialize(self, obj): if G_callback is None: G = chop(Interpolator(grad(TestFunction(P1)), V).callable().handle) else: - G = G_callback(V, P1) + G = G_callback(P1, V) pc = PETSc.PC().create(comm=obj.comm) pc.incrementTabLevel(1, parent=obj) diff --git a/firedrake/preconditioners/patch.py b/firedrake/preconditioners/patch.py index 08f538b876..f3e7e8123e 100644 --- a/firedrake/preconditioners/patch.py +++ b/firedrake/preconditioners/patch.py @@ -748,14 +748,14 @@ def initialize(self, obj): if ctx is None: raise ValueError("No context found on form") if not isinstance(ctx, _SNESContext): - raise ValueError("Don't know how to get form from %r", ctx) + raise ValueError("Don't know how to get form from %r" % ctx) if P.getType() == "python": ictx = P.getPythonContext() if ictx is None: raise ValueError("No context found on matrix") if not isinstance(ictx, ImplicitMatrixContext): - raise ValueError("Don't know how to get form from %r", ictx) + raise ValueError("Don't know how to get form from %r" % ictx) J = ictx.a bcs = ictx.row_bcs if bcs != ictx.col_bcs: diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 823849729c..ff302a94a2 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -1,20 +1,28 @@ -from functools import partial, lru_cache +from functools import partial from itertools import chain -from pyop2 import op2, PermutedMap -from firedrake.preconditioners.base import PCBase, SNESBase, PCSNESBase from firedrake.dmhooks import (attach_hooks, get_appctx, push_appctx, pop_appctx, add_hook, get_parent, push_parent, pop_parent, get_function_space, set_function_space) +from firedrake.petsc import PETSc +from firedrake.preconditioners.base import PCBase, SNESBase, PCSNESBase +from firedrake.nullspace import VectorSpaceBasis, MixedVectorSpaceBasis from firedrake.solving_utils import _SNESContext from firedrake.tsfc_interface import extract_numbered_coefficients -from firedrake.utils import ScalarType_c, IntType_c -from firedrake.petsc import PETSc +from firedrake.utils import ScalarType_c, IntType_c, cached_property +from tsfc.finatinterface import create_element +from tsfc import compile_expression_dual_evaluation +from pyop2 import op2 +from pyop2.caching import cached + import firedrake +import finat +import FIAT import ufl import loopy import numpy import os import tempfile +import weakref __all__ = ("PMGPC", "PMGSNES") @@ -30,9 +38,10 @@ class PMGBase(PCSNESBase): or any other solver in firedrake may be applied to the coarse problem. Other PETSc options inspected by this class are: - - 'pmg_coarse_degree': polynomial degree of the coarse level - - 'pmg_coarse_mat_type': can be either 'aij' or 'matfree' - - 'pmg_coarse_form_compiler_mode': can be 'spectral' (default), 'vanilla', 'coffee', or 'tensor' + - 'pmg_mg_coarse_degree': polynomial degree of the coarse level + - 'pmg_mg_coarse_mat_type': can be either a `PETSc.Mat.Type`, or 'matfree' + - 'pmg_mg_coarse_pmat_type': can be either a `PETSc.Mat.Type`, or 'matfree' + - 'pmg_mg_coarse_form_compiler_mode': can be 'spectral' (default), 'vanilla', 'coffee', or 'tensor' - 'pmg_mg_levels_transfer_mat_type': can be either 'aij' or 'matfree' The p-coarsening is implemented in the `coarsen_element` routine. @@ -48,6 +57,9 @@ class PMGBase(PCSNESBase): """ _prefix = "pmg_" + # This is parallel-safe because the keys are ids of a collective objects + _coarsen_cache = weakref.WeakKeyDictionary() + _transfer_cache = weakref.WeakKeyDictionary() def coarsen_element(self, ele): """ @@ -72,32 +84,39 @@ def coarsen_form(self, form, fine_to_coarse_map): """ return ufl.replace(form, fine_to_coarse_map) - def initialize(self, pc): + def initialize(self, obj): # Make a new DM. # Hook up a (new) coarsen routine on that DM. - # Make a new PC, of type MG. - # Assign the DM to that PC. + # Make a new PC, of type MG (or SNES of type FAS). + # Assign the DM to that PC (or SNES). - odm = pc.getDM() + odm = obj.getDM() ctx = get_appctx(odm) + if ctx is None: + raise ValueError("No context found.") + if not isinstance(ctx, _SNESContext): + raise ValueError("Don't know how to get form from %r" % ctx) + fcp = ctx._problem.form_compiler_parameters + mode = fcp.get("mode", "spectral") if fcp is not None else "spectral" test, trial = ctx.J.arguments() if test.function_space() != trial.function_space(): raise NotImplementedError("test and trial spaces must be the same") - prefix = pc.getOptionsPrefix() + prefix = obj.getOptionsPrefix() options_prefix = prefix + self._prefix - opts = PETSc.Options(options_prefix) - pdm = PETSc.DMShell().create(comm=pc.comm) + pdm = PETSc.DMShell().create(comm=obj.comm) pdm.setOptionsPrefix(options_prefix) + ppc = self.configure_pmg(obj, pdm) + self.is_snes = isinstance(obj, PETSc.SNES) + # Get the coarse degree from PETSc options - fcp = ctx._problem.form_compiler_parameters - mode = fcp.get("mode", "spectral") if fcp is not None else "spectral" - self.coarse_degree = opts.getInt("coarse_degree", default=1) - self.coarse_mat_type = opts.getString("coarse_mat_type", default=ctx.mat_type) - self.coarse_pmat_type = opts.getString("coarse_pmat_type", default=self.coarse_mat_type) - self.coarse_form_compiler_mode = opts.getString("coarse_form_compiler_mode", default=mode) + copts = PETSc.Options(ppc.getOptionsPrefix() + ppc.getType() + "_coarse_") + self.coarse_degree = copts.getInt("degree", default=1) + self.coarse_mat_type = copts.getString("mat_type", default=ctx.mat_type) + self.coarse_pmat_type = copts.getString("pmat_type", default=self.coarse_mat_type) + self.coarse_form_compiler_mode = copts.getString("form_compiler_mode", default=mode) # Construct a list with the elements we'll be using V = test.function_space() @@ -118,12 +137,10 @@ def initialize(self, pc): # Now overwrite some routines on the DM pdm.setRefine(None) pdm.setCoarsen(self.coarsen) - pdm.setCreateInterpolation(self.create_interpolation) - # We need this for p-FAS - pdm.setCreateInjection(self.create_injection) - pdm.setSNESJacobian(_SNESContext.form_jacobian) - pdm.setSNESFunction(_SNESContext.form_function) - pdm.setKSPComputeOperators(_SNESContext.compute_operators) + if self.is_snes: + pdm.setSNESFunction(_SNESContext.form_function) + pdm.setSNESJacobian(_SNESContext.form_jacobian) + pdm.setKSPComputeOperators(_SNESContext.compute_operators) set_function_space(pdm, get_function_space(odm)) @@ -132,28 +149,28 @@ def initialize(self, pc): add_hook(parent, setup=partial(push_parent, pdm, parent), teardown=partial(pop_parent, pdm, parent), call_setup=True) add_hook(parent, setup=partial(push_appctx, pdm, ctx), teardown=partial(pop_appctx, pdm, ctx), call_setup=True) - self.ppc = self.configure_pmg(pc, pdm) - self.ppc.setFromOptions() - self.ppc.setUp() + ppc.incrementTabLevel(1, parent=obj) + ppc.setFromOptions() + ppc.setUp() + self.ppc = ppc - def update(self, pc): - pass + def update(self, obj): + self.ppc.setUp() - def view(self, pc, viewer=None): + def view(self, obj, viewer=None): if viewer is None: viewer = PETSc.Viewer.STDOUT viewer.printfASCII("p-multigrid PC\n") - self.ppc.view(viewer) + if hasattr(self, "ppc"): + self.ppc.view(viewer=viewer) - def destroy(self, pc): + def destroy(self, obj): if hasattr(self, "ppc"): self.ppc.destroy() def coarsen(self, fdm, comm): # Coarsen the _SNESContext of a DM fdm # return the coarse DM cdm of the coarse _SNESContext - from firedrake.nullspace import VectorSpaceBasis, MixedVectorSpaceBasis - fctx = get_appctx(fdm) parent = get_parent(fdm) assert parent is not None @@ -190,9 +207,13 @@ def _coarsen_form(a): for f in a.integrals()]) return a - cF = _coarsen_form(fctx.F) cJ = _coarsen_form(fctx.J) - cJp = _coarsen_form(fctx.Jp) + cJp = cJ if fctx.Jp is fctx.J else _coarsen_form(fctx.Jp) + # This fixes a subtle bug where you are applying PMGPC on a mixed + # problem with geometric multigrid only on one block and an non-Lagrange element + # on the other block (gmg breaks for non-Lagrange elements) + cF = _coarsen_form(fctx.F) if self.is_snes else ufl.action(cJ, cu) + fcp = self.coarsen_quadrature(fproblem.form_compiler_parameters, fdeg, cdeg) cbcs = self.coarsen_bcs(fproblem.bcs, cV) @@ -218,11 +239,7 @@ def _coarsen_form(a): except ValueError: mat_type = self.coarse_mat_type pmat_type = self.coarse_pmat_type - if fcp is None: - fcp = dict() - elif fcp is fproblem.form_compiler_parameters: - fcp = dict(fcp) - fcp["mode"] = self.coarse_form_compiler_mode + fcp = dict(fcp or {}, mode=self.coarse_form_compiler_mode) # Coarsen the problem and the _SNESContext cproblem = firedrake.NonlinearVariationalProblem(cF, cu, bcs=cbcs, J=cJ, Jp=cJp, @@ -250,113 +267,123 @@ def _coarsen_form(a): cdm.setCreateInterpolation(self.create_interpolation) cdm.setCreateInjection(self.create_injection) - # injection of the initial state - def inject_state(mat): - with cu.dat.vec_wo as xc, fu.dat.vec_ro as xf: - mat.multTranspose(xf, xc) + if cu in cJ.coefficients(): + # Only inject state if the coarse state is a dependency of the coarse Jacobian. + inject = cdm.createInjection(fdm) + + def inject_state(): + with cu.dat.vec_wo as xc, fu.dat.vec_ro as xf: + inject.mult(xf, xc) + + add_hook(parent, setup=inject_state, call_setup=True) + + interpolate = None + if fctx._nullspace or fctx._nullspace_T or fctx._near_nullspace: + interpolate, _ = cdm.createInterpolation(fdm) + cctx._nullspace = self.coarsen_nullspace(fctx._nullspace, cV, interpolate) + cctx._nullspace_T = self.coarsen_nullspace(fctx._nullspace_T, cV, interpolate) + cctx._near_nullspace = self.coarsen_nullspace(fctx._near_nullspace, cV, interpolate) + cctx.set_nullspace(cctx._nullspace, cV._ises, transpose=False, near=False) + cctx.set_nullspace(cctx._nullspace_T, cV._ises, transpose=True, near=False) + cctx.set_nullspace(cctx._near_nullspace, cV._ises, transpose=False, near=True) + return cdm + + def coarsen_quadrature(self, metadata, fdeg, cdeg): + """Coarsen the quadrature degree in a dictionary preserving the ratio of + quadrature nodes to interpolation nodes (qdeg+1)//(fdeg+1).""" + try: + qdeg = metadata["quadrature_degree"] + coarse_qdeg = max(2*cdeg+1, ((qdeg+1)*(cdeg+1)+fdeg)//(fdeg+1)-1) + return dict(metadata, quadrature_degree=coarse_qdeg) + except (KeyError, TypeError): + return metadata - injection = self.create_injection(cdm, fdm) - add_hook(parent, setup=partial(inject_state, injection), call_setup=True) + def coarsen_bcs(self, fbcs, cV): + """Coarsen a list of bcs""" + cbcs = [] + for bc in fbcs: + cache = self._coarsen_cache.setdefault(bc, {}) + key = (cV.ufl_element(), self.is_snes) + try: + coarse_bc = cache[key] + except KeyError: + cV_ = cV + for index in bc._indices: + cV_ = cV_.sub(index) + cbc_value = self.coarsen_bc_value(bc, cV_) + if isinstance(bc, firedrake.DirichletBC): + coarse_bc = cache.setdefault(key, bc.reconstruct(V=cV_, g=cbc_value)) + else: + raise NotImplementedError("Unsupported BC type, please get in touch if you need this") + cbcs.append(coarse_bc) + return cbcs - # coarsen the nullspace basis - def coarsen_nullspace(coarse_V, mat, fine_nullspace): + def coarsen_nullspace(self, fine_nullspace, cV, interpolate): + """Coarsen a nullspace""" + if fine_nullspace is None: + return fine_nullspace + cache = self._coarsen_cache.setdefault(fine_nullspace, {}) + key = cV.ufl_element() + try: + return cache[key] + except KeyError: if isinstance(fine_nullspace, MixedVectorSpaceBasis): - if mat.type == 'python': - mat = mat.getPythonContext() - submats = [mat.getNestSubMatrix(i, i) for i in range(len(coarse_V))] + if interpolate.getType() == "python": + interpolate = interpolate.getPythonContext() + submats = [interpolate.getNestSubMatrix(i, i) for i in range(len(cV))] coarse_bases = [] - for fs, submat, basis in zip(coarse_V, submats, fine_nullspace._bases): + for fs, submat, basis in zip(cV, submats, fine_nullspace._bases): if isinstance(basis, VectorSpaceBasis): - coarse_bases.append(coarsen_nullspace(fs, submat, basis)) + coarse_bases.append(self.coarsen_nullspace(basis, fs, submat)) else: - coarse_bases.append(coarse_V.sub(basis.index)) - return MixedVectorSpaceBasis(coarse_V, coarse_bases) + coarse_bases.append(cV.sub(basis.index)) + coarse_nullspace = MixedVectorSpaceBasis(cV, coarse_bases) elif isinstance(fine_nullspace, VectorSpaceBasis): coarse_vecs = [] for xf in fine_nullspace._petsc_vecs: - wc = firedrake.Function(coarse_V) + wc = firedrake.Function(cV) with wc.dat.vec_wo as xc: - if mat.getSize()[1] == xf.getSize(): - mat.mult(xf, xc) - else: - mat.multTranspose(xf, xc) + # the nullspace basis is in the dual of V + interpolate.multTranspose(xf, xc) coarse_vecs.append(wc) - vsb = VectorSpaceBasis(coarse_vecs, constant=fine_nullspace._constant) - vsb.orthonormalize() - return vsb + coarse_nullspace = VectorSpaceBasis(coarse_vecs, constant=fine_nullspace._constant) + coarse_nullspace.orthonormalize() else: return fine_nullspace + return cache.setdefault(key, coarse_nullspace) - I, _ = self.create_interpolation(cdm, fdm) - ises = cV._ises - cctx._nullspace = coarsen_nullspace(cV, I, fctx._nullspace) - cctx.set_nullspace(cctx._nullspace, ises, transpose=False, near=False) - cctx._nullspace_T = coarsen_nullspace(cV, I, fctx._nullspace_T) - cctx.set_nullspace(cctx._nullspace_T, ises, transpose=True, near=False) - cctx._near_nullspace = coarsen_nullspace(cV, injection, fctx._near_nullspace) - cctx.set_nullspace(cctx._near_nullspace, ises, transpose=False, near=True) - return cdm - - def coarsen_quadrature(self, metadata, fdeg, cdeg): - if isinstance(metadata, dict): - # Coarsen the quadrature degree in a dictionary - # such that the ratio of quadrature nodes to interpolation nodes (qdeg+1)//(fdeg+1) is preserved - qdeg = metadata.get("quadrature_degree", None) - if qdeg is not None: - cmd = dict(metadata) - cmd["quadrature_degree"] = max(2*cdeg+1, ((qdeg+1)*(cdeg+1)+fdeg)//(fdeg+1)-1) - return cmd - return metadata - - def coarsen_bcs(self, fbcs, cV): - cbcs = [] - for bc in fbcs: - cV_ = cV - for index in bc._indices: - cV_ = cV_.sub(index) - cbc_value = self.coarsen_bc_value(bc, cV_) - if type(bc) == firedrake.DirichletBC: - cbcs.append(firedrake.DirichletBC(cV_, cbc_value, - bc.sub_domain)) + def create_transfer(self, mat_type, cctx, fctx, cbcs, fbcs): + """Create a transfer operator""" + cache = self._transfer_cache.setdefault(fctx, {}) + key = (mat_type, cctx, cbcs, fbcs) + try: + return cache[key] + except KeyError: + if mat_type == "matfree": + construct_mat = prolongation_matrix_matfree + elif mat_type == "aij": + construct_mat = prolongation_matrix_aij else: - raise NotImplementedError("Unsupported BC type, please get in touch if you need this") - return cbcs - - @staticmethod - @lru_cache(maxsize=20) - def create_transfer(cctx, fctx, mat_type, cbcs, fbcs, inject): - cbcs = cctx._problem.bcs if cbcs else [] - fbcs = fctx._problem.bcs if fbcs else [] - if inject: - cV = cctx._problem.u - fV = fctx._problem.u - else: + raise ValueError("Unknown matrix type") cV = cctx.J.arguments()[0].function_space() fV = fctx.J.arguments()[0].function_space() - - if mat_type == "matfree": - return prolongation_matrix_matfree(fV, cV, fbcs, cbcs) - elif mat_type == "aij": - return prolongation_matrix_aij(fV, cV, fbcs, cbcs) - else: - raise ValueError("Unknown matrix type") + cbcs = tuple(cctx._problem.bcs) if cbcs else tuple() + fbcs = tuple(fctx._problem.bcs) if fbcs else tuple() + return cache.setdefault(key, construct_mat(cV, fV, cbcs, fbcs)) def create_interpolation(self, dmc, dmf): prefix = dmc.getOptionsPrefix() mat_type = PETSc.Options(prefix).getString("mg_levels_transfer_mat_type", default="matfree") - return self.create_transfer(get_appctx(dmc), get_appctx(dmf), mat_type, True, False, False), None + return self.create_transfer(mat_type, get_appctx(dmc), get_appctx(dmf), True, False), None def create_injection(self, dmc, dmf): prefix = dmc.getOptionsPrefix() mat_type = PETSc.Options(prefix).getString("mg_levels_transfer_mat_type", default="matfree") - I = self.create_transfer(get_appctx(dmf), get_appctx(dmc), mat_type, False, False, True) - return PETSc.Mat().createTranspose(I) + return self.create_transfer(mat_type, get_appctx(dmf), get_appctx(dmc), False, False) @staticmethod def max_degree(ele): - """ - Return the maximum degree of a :class:`ufl.FiniteElement` - """ + """Return the maximum degree of a :class:`ufl.FiniteElement`""" if isinstance(ele, (ufl.VectorElement, ufl.TensorElement)): return PMGBase.max_degree(ele._sub_element) elif isinstance(ele, (ufl.MixedElement, ufl.TensorProductElement)): @@ -381,31 +408,33 @@ def reconstruct_degree(ele, degree): By default, reconstructed EnrichedElements, TensorProductElements, and MixedElements will have the degree of the sub-elements shifted - by the same amount so that the maximum degree is N. - This is useful to coarsen spaces like NCF(N) x DQ(N-1). + by the same amount so that the maximum degree is `degree`. + This is useful to coarsen spaces like NCF(k) x DQ(k-1). :arg ele: a :class:`ufl.FiniteElement` to reconstruct, - :arg N: an integer degree. + :arg degree: an integer degree. :returns: the reconstructed element """ if isinstance(ele, ufl.VectorElement): return type(ele)(PMGBase.reconstruct_degree(ele._sub_element, degree), dim=ele.num_sub_elements()) elif isinstance(ele, ufl.TensorElement): - return type(ele)(PMGBase.reconstruct_degree(ele._sub_element, degree), shape=ele.value_shape(), symmetry=ele.symmetry()) + return type(ele)(PMGBase.reconstruct_degree(ele._sub_element, degree), shape=ele._shape, symmetry=ele.symmetry()) elif isinstance(ele, ufl.EnrichedElement): - shift = degree-PMGBase.max_degree(ele) - return type(ele)(*(PMGBase.reconstruct_degree(e, PMGBase.max_degree(e)+shift) for e in ele._elements)) + shift = degree - PMGBase.max_degree(ele) + return type(ele)(*(PMGBase.reconstruct_degree(e, PMGBase.max_degree(e) + shift) for e in ele._elements)) elif isinstance(ele, ufl.TensorProductElement): - shift = degree-PMGBase.max_degree(ele) - return type(ele)(*(PMGBase.reconstruct_degree(e, PMGBase.max_degree(e)+shift) for e in ele.sub_elements()), cell=ele.cell()) + shift = degree - PMGBase.max_degree(ele) + return type(ele)(*(PMGBase.reconstruct_degree(e, PMGBase.max_degree(e) + shift) for e in ele.sub_elements()), cell=ele.cell()) elif isinstance(ele, ufl.MixedElement): - shift = degree-PMGBase.max_degree(ele) - return type(ele)(*(PMGBase.reconstruct_degree(e, PMGBase.max_degree(e)+shift) for e in ele.sub_elements())) + shift = degree - PMGBase.max_degree(ele) + return type(ele)(*(PMGBase.reconstruct_degree(e, PMGBase.max_degree(e) + shift) for e in ele.sub_elements())) elif isinstance(ele, ufl.WithMapping): return type(ele)(PMGBase.reconstruct_degree(ele.wrapee, degree), ele.mapping()) - elif isinstance(ele, (ufl.HDivElement, ufl.HCurlElement, ufl.BrokenElement, ufl.RestrictedElement)): + elif isinstance(ele, (ufl.HDivElement, ufl.HCurlElement, ufl.BrokenElement)): return type(ele)(PMGBase.reconstruct_degree(ele._element, degree)) + elif isinstance(ele, ufl.RestrictedElement): + return type(ele)(PMGBase.reconstruct_degree(ele._element, degree), restriction_domain=ele._restriction_domain) else: return ele.reconstruct(degree=degree) @@ -420,7 +449,6 @@ def configure_pmg(self, pc, pdm): ppc.setType("mg") ppc.setOperators(*pc.getOperators()) ppc.setDM(pdm) - ppc.incrementTabLevel(1, parent=pc) # PETSc unfortunately requires us to make an ugly hack. # We would like to use GMG for the coarse solve, at least @@ -431,8 +459,7 @@ def configure_pmg(self, pc, pdm): # for the user, if they haven't already; I don't know any # other way to get PETSc to know this at the right time. opts = PETSc.Options(pc.getOptionsPrefix() + "pmg_") - if "mg_coarse_pc_mg_levels" not in opts: - opts["mg_coarse_pc_mg_levels"] = odm.getRefineLevel() + 1 + opts["mg_coarse_pc_mg_levels"] = odm.getRefineLevel() + 1 return ppc @@ -443,7 +470,7 @@ def applyTranspose(self, pc, x, y): return self.ppc.applyTranspose(x, y) def coarsen_bc_value(self, bc, cV): - return firedrake.zero(cV.shape) + return 0 class PMGSNES(SNESBase, PMGBase): @@ -455,7 +482,8 @@ def configure_pmg(self, snes, pdm): psnes.setOptionsPrefix(snes.getOptionsPrefix() + "pfas_") psnes.setType("fas") psnes.setDM(pdm) - psnes.incrementTabLevel(1, parent=snes) + psnes.setTolerances(max_it=1) + psnes.setConvergenceTest("skip") (f, residual) = snes.getFunction() assert residual is not None @@ -463,8 +491,7 @@ def configure_pmg(self, snes, pdm): psnes.setFunction(fun, f.duplicate(), args=args, kargs=kargs) pdm.setGlobalVector(f.duplicate()) - self.dummy = f.duplicate() - psnes.setSolution(f.duplicate()) + psnes.setSolution(snes.getSolution()) # PETSc unfortunately requires us to make an ugly hack. # We would like to use GMG for the coarse solve, at least @@ -475,10 +502,8 @@ def configure_pmg(self, snes, pdm): # for the user, if they haven't already; I don't know any # other way to get PETSc to know this at the right time. opts = PETSc.Options(snes.getOptionsPrefix() + "pfas_") - if "fas_coarse_pc_mg_levels" not in opts: - opts["fas_coarse_pc_mg_levels"] = odm.getRefineLevel() + 1 - if "fas_coarse_snes_fas_levels" not in opts: - opts["fas_coarse_snes_fas_levels"] = odm.getRefineLevel() + 1 + opts["fas_coarse_pc_mg_levels"] = odm.getRefineLevel() + 1 + opts["fas_coarse_snes_fas_levels"] = odm.getRefineLevel() + 1 return psnes @@ -486,7 +511,7 @@ def step(self, snes, x, f, y): ctx = get_appctx(snes.dm) push_appctx(self.ppc.dm, ctx) x.copy(y) - self.ppc.solve(snes.vec_rhs or self.dummy, y) + self.ppc.solve(snes.vec_rhs or None, y) y.aypx(-1, x) snes.setConvergedReason(self.ppc.getConvergedReason()) pop_appctx(self.ppc.dm) @@ -501,13 +526,11 @@ def coarsen_bc_value(self, bc, cV): def prolongation_transfer_kernel_action(Vf, expr): - from tsfc import compile_expression_dual_evaluation - from tsfc.finatinterface import create_element to_element = create_element(Vf.ufl_element()) kernel = compile_expression_dual_evaluation(expr, to_element, Vf.ufl_element(), log=PETSc.Log.isActive()) coefficients = extract_numbered_coefficients(expr, kernel.coefficient_numbers) if kernel.needs_external_coords: - coefficients = [Vf.ufl_domain().coordinates] + coefficients + coefficients = [Vf.mesh().coordinates] + coefficients return op2.Kernel(kernel.ast, kernel.name, requires_zeroed_output_arguments=True, @@ -515,92 +538,173 @@ def prolongation_transfer_kernel_action(Vf, expr): events=(kernel.event,)), coefficients -@lru_cache(maxsize=10) def expand_element(ele): - """ - Expand a FiniteElement as an EnrichedElement of TensorProductElements, discarding modifiers. - """ - if ele.cell() == ufl.quadrilateral: - quadrilateral_tpc = ufl.TensorProductCell(ufl.interval, ufl.interval) - return expand_element(ele.reconstruct(cell=quadrilateral_tpc)) - elif ele.cell() == ufl.hexahedron: - hexahedron_tpc = ufl.TensorProductCell(ufl.quadrilateral, ufl.interval) - return expand_element(ele.reconstruct(cell=hexahedron_tpc)) - elif isinstance(ele, (ufl.TensorElement, ufl.VectorElement)): - return expand_element(ele._sub_element) - elif isinstance(ele, (ufl.HDivElement, ufl.HCurlElement, ufl.BrokenElement, ufl.RestrictedElement)): - return expand_element(ele._element) - elif isinstance(ele, ufl.WithMapping): - return expand_element(ele.wrapee) - elif isinstance(ele, ufl.MixedElement): - return ufl.MixedElement(*[expand_element(e) for e in ele.sub_elements()]) - elif isinstance(ele, ufl.EnrichedElement): - terms = [] - for e in ele._elements: - ee = expand_element(e) - if isinstance(ee, ufl.EnrichedElement): - terms.extend(ee._elements) - else: - terms.append(ee) - return ufl.EnrichedElement(*terms) - elif isinstance(ele, ufl.TensorProductElement): - factors = [expand_element(e) for e in ele.sub_elements()] + """Expand a FiniteElement as an EnrichedElement of TensorProductElements, + discarding modifiers.""" + if isinstance(ele, finat.FlattenedDimensions): + return expand_element(ele.product) + elif isinstance(ele, (finat.HDivElement, finat.HCurlElement)): + return expand_element(ele.wrappee) + elif isinstance(ele, finat.DiscontinuousElement): + return expand_element(ele.element) + elif isinstance(ele, finat.EnrichedElement): + terms = list(map(expand_element, ele.elements)) + return finat.EnrichedElement(terms) + elif isinstance(ele, finat.TensorProductElement): + factors = list(map(expand_element, ele.factors)) terms = [tuple()] for e in factors: new_terms = [] - for f in e._elements if isinstance(e, ufl.EnrichedElement) else [e]: - f_factors = f.sub_elements() if isinstance(f, ufl.TensorProductElement) else (f,) - new_terms.extend([t_factors + f_factors for t_factors in terms]) + for f in e.elements if isinstance(e, finat.EnrichedElement) else [e]: + f_factors = tuple(f.factors) if isinstance(f, finat.TensorProductElement) else (f,) + new_terms.extend(t_factors + f_factors for t_factors in terms) terms = new_terms - if len(terms) == 1: - return ufl.TensorProductElement(*terms[0]) - else: - return ufl.EnrichedElement(*[ufl.TensorProductElement(*k) for k in terms]) + terms = list(map(finat.TensorProductElement, terms)) + return finat.EnrichedElement(terms) else: return ele -def get_line_elements(V): - from FIAT.reference_element import LINE - from tsfc.finatinterface import create_element - ele = V.ufl_element() - if isinstance(ele, ufl.MixedElement) and not isinstance(ele, (ufl.TensorElement, ufl.VectorElement)): - raise ValueError("MixedElements are not decomposed into tensor products") - rvs = ele.reference_value_size() - ele = expand_element(ele) - if isinstance(ele, ufl.EnrichedElement): - ele = ele._elements[0] - - finat_ele = create_element(ele) - if rvs*finat_ele.space_dimension() != V.value_size*V.finat_element.space_dimension(): - raise ValueError("Failed to decompose %s into a single tensor product" % V.ufl_element()) - factors = finat_ele.factors if hasattr(finat_ele, "factors") else (finat_ele,) +def hash_fiat_element(element): + """FIAT elements are not hashable, + this is not the best way to create a hash""" + restriction = None + e = element + if isinstance(e, FIAT.DiscontinuousElement): + # this hash does not care about inter-element continuity + e = e._element + if isinstance(e, FIAT.RestrictedElement): + restriction = tuple(e._indices) + e = e._element + if len(restriction) == e.space_dimension(): + restriction = None + family = e.__class__.__name__ + degree = e.order + return (family, element.ref_el, degree, restriction) + + +def generate_key_evaluate_dual(source, target, alpha=tuple()): + return hash_fiat_element(source) + hash_fiat_element(target) + (alpha,) + + +def get_readonly_view(arr): + result = arr.view() + result.flags.writeable = False + return result + + +@cached({}, key=generate_key_evaluate_dual) +def evaluate_dual(source, target, alpha=tuple()): + """Evaluate the action of a set of dual functionals of the target element + on the (derivative of order alpha of the) basis functions of the source + element.""" + primal = source.get_nodal_basis() + dual = target.get_dual_set() + A = dual.to_riesz(primal) + B = numpy.transpose(primal.get_coeffs()) + if sum(alpha) != 0: + dmats = primal.get_dmats() + for i in range(len(alpha)): + for j in range(alpha[i]): + B = numpy.dot(dmats[i], B) + return get_readonly_view(numpy.dot(A, B)) + + +@cached({}, key=generate_key_evaluate_dual) +def compare_element(e1, e2): + """Numerically compare two :class:`FIAT.elements`. + Equality is satisfied if e2.dual_basis(e1.primal_basis) == identity.""" + if e1 is e2: + return True + if e1.space_dimension() != e2.space_dimension(): + return False + B = evaluate_dual(e1, e2) + return numpy.allclose(B, numpy.eye(B.shape[0]), rtol=1E-14, atol=1E-14) + + +@cached({}, key=lambda V: V.ufl_element()) +@PETSc.Log.EventDecorator("GetLineElements") +def get_permutation_to_line_elements(V): + """ + Find DOF permutation to factor out the EnrichedElement expansion into common + TensorProductElements. This routine exposes structure to e.g vectorize + prolongation of NCE or NCF accross vector components, by permuting all + components into a common TensorProductElement. + + This is temporary while we wait for dual evaluation of :class:`finat.EnrichedElement`. + + :arg V: a :class:`.FunctionSpace` + + :returns: a 3-tuple of the DOF permutation, the unique terms in expansion as + a list of tuples of :class:`FIAT.FiniteElements`, and the cyclic + permutations of the axes to form the element given by their shifts + in list of `int` tuples + """ + finat_element = V.finat_element + expansion = expand_element(finat_element) + if expansion.space_dimension() != finat_element.space_dimension(): + raise ValueError("Failed to decompose %s into tensor products" % V.ufl_element()) + line_elements = [] - for e in reversed(factors): - fiat_ele = e.fiat_equivalent - if fiat_ele.get_reference_element().shape != LINE: - raise ValueError("Expecting %s to be on the interval" % fiat_ele) - line_elements.append(fiat_ele) - return line_elements - - -@lru_cache(maxsize=10) -def get_line_interpolator(felem, celem): - from FIAT import functional, make_quadrature - fdual = felem.dual_basis() - cdual = celem.dual_basis() - if len(fdual) == len(cdual): - if all(f.get_point_dict() == c.get_point_dict() for f, c in zip(fdual, cdual)): - return numpy.array([]) - - if all(isinstance(phi, functional.PointEvaluation) for phi in fdual): - pts = [list(phi.get_point_dict().keys())[0] for phi in fdual] - return celem.tabulate(0, pts)[(0,)] - else: - pts = make_quadrature(felem.get_reference_element(), - felem.space_dimension()).get_points() - return numpy.dot(celem.tabulate(0, pts)[(0,)], - numpy.linalg.inv(felem.tabulate(0, pts)[(0,)])) + terms = expansion.elements if hasattr(expansion, "elements") else [expansion] + for term in terms: + factors = term.factors if hasattr(term, "factors") else (term,) + fiat_factors = tuple(e.fiat_equivalent for e in reversed(factors)) + if any(e.get_reference_element().get_spatial_dimension() != 1 for e in fiat_factors): + raise ValueError("Failed to decompose %s into line elements" % V.ufl_element()) + line_elements.append(fiat_factors) + + shapes = [tuple(e.space_dimension() for e in factors) for factors in line_elements] + sizes = list(map(numpy.prod, shapes)) + dof_ranges = numpy.cumsum([0] + sizes) + + dof_perm = [] + unique_line_elements = [] + shifts = [] + + visit = [False for e in line_elements] + while False in visit: + base = line_elements[visit.index(False)] + tdim = len(base) + pshape = tuple(e.space_dimension() for e in base) + unique_line_elements.append(base) + + axes_shifts = tuple() + for shift in range(tdim): + if finat_element.formdegree != 2: + shift = (tdim - shift) % tdim + + perm = base[shift:] + base[:shift] + for i, term in enumerate(line_elements): + if not visit[i]: + is_perm = all(e1.space_dimension() == e2.space_dimension() + for e1, e2 in zip(perm, term)) + if is_perm: + is_perm = all(compare_element(e1, e2) for e1, e2 in zip(perm, term)) + + if is_perm: + axes_shifts += ((tdim - shift) % tdim, ) + dofs = numpy.arange(*dof_ranges[i:i+2], dtype=PETSc.IntType).reshape(pshape) + dofs = numpy.transpose(dofs, axes=numpy.roll(numpy.arange(tdim), -shift)) + assert dofs.shape == shapes[i] + dof_perm.append(dofs.flat) + visit[i] = True + + shifts.append(axes_shifts) + + dof_perm = get_readonly_view(numpy.concatenate(dof_perm)) + return dof_perm, unique_line_elements, shifts + + +def get_permuted_map(V): + """ + Return a PermutedMap with the same tensor product shape for + every component of H(div) or H(curl) tensor product elements + """ + indices, _, _ = get_permutation_to_line_elements(V) + if numpy.all(indices[:-1] < indices[1:]): + return V.cell_node_map() + return op2.PermutedMap(V.cell_node_map(), indices) # Common kernel to compute y = kron(A3, kron(A2, A1)) * x @@ -609,7 +713,7 @@ def get_line_interpolator(felem, celem): #include #include -static inline void kronmxv(PetscBLASInt tflag, +static inline void kronmxv_inplace(PetscBLASInt tflag, PetscBLASInt mx, PetscBLASInt my, PetscBLASInt mz, PetscBLASInt nx, PetscBLASInt ny, PetscBLASInt nz, PetscBLASInt nel, PetscScalar *A1, PetscScalar *A2, PetscScalar *A3, @@ -629,7 +733,7 @@ def get_line_interpolator(felem, celem): y is (mx*my*mz)-by-nel. Important notes: -The input data in x is destroyed in the process. +This routine is in-place: the input data in x and y are destroyed in the process. Need to allocate nel*max(mx, nx)*max(my, ny)*max(mz, nz) memory for both x and y. */ @@ -675,42 +779,238 @@ def get_line_interpolator(felem, celem): *y = ptr[ires]; return; } -""" +static inline void kronmxv(PetscBLASInt tflag, + PetscBLASInt mx, PetscBLASInt my, PetscBLASInt mz, + PetscBLASInt nx, PetscBLASInt ny, PetscBLASInt nz, PetscBLASInt nel, + PetscScalar *A1, PetscScalar *A2, PetscScalar *A3, + PetscScalar *x, PetscScalar *y, PetscScalar *xwork, PetscScalar *ywork){ + /* + Same as kronmxv_inplace, but the work buffers allow the input data in x to + be kept untouched. + */ -def make_kron_code(Vf, Vc, t_in, t_out, mat_name): - nscal = Vf.ufl_element().reference_value_size() - felems = get_line_elements(Vf) - celems = get_line_elements(Vc) - if len(felems) != len(celems): - raise ValueError("Fine and coarse elements do not have the same number of factors") - if len(felems) > 3: - raise ValueError("More than three factors are not supported") + PetscScalar *ptr[2] = {xwork, ywork}; - # Declare array shapes to be used as literals inside the kernels - fshape = [e.space_dimension() for e in felems] - cshape = [e.space_dimension() for e in celems] - shapes = [(nscal,) + tuple(fshape), (nscal,) + tuple(cshape)] + if(ptr[0] != x) + for(PetscBLASInt j=0; j 3: + raise ValueError("More than three factors are not supported") + + # Declare array shapes to be used as literals inside the kernels + nscal = psize*len(shift) + fshape = [e.space_dimension() for e in felem] + cshape = [e.space_dimension() for e in celem] + fshapes.append((nscal,) + tuple(fshape)) + cshapes.append((nscal,) + tuple(cshape)) + + J = [identity_filter(evaluate_dual(ce, fe)).T for ce, fe in zip(celem, felem)] + if any(Jk.size and numpy.isclose(Jk, 0.0E0).all() for Jk in J): + prolong_code.append(f""" + for({IntType_c} i=0; i<{nscal*numpy.prod(fshape)}; i++) {t_out}[i+{fskip}] = 0.0E0; + """) + restrict_code.append(f""" + for({IntType_c} i=0; i<{nscal*numpy.prod(cshape)}; i++) {t_in}[i+{cskip}] = 0.0E0; + """) + else: + Jsize = numpy.cumsum([Jlen] + [Jk.size for Jk in J]) + Jptrs = ["%s+%d" % (mat_name, Jsize[k]) if J[k].size else "NULL" for k in range(len(J))] + Jmats.extend(J) + Jlen = Jsize[-1] + + # The Kronecker product routines assume 3D shapes, so in 1D and 2D we pass NULL instead of J + Jargs = ", ".join(Jptrs+["NULL"]*(3-len(Jptrs))) + fargs = ", ".join(map(str, fshape+[1]*(3-len(fshape)))) + cargs = ", ".join(map(str, cshape+[1]*(3-len(cshape)))) + if in_place: + prolong_code.append(f""" + kronmxv_inplace(0, {fargs}, {cargs}, {nscal}, {Jargs}, &{t_in}, &{t_out}); + """) + restrict_code.append(f""" + kronmxv_inplace(1, {cargs}, {fargs}, {nscal}, {Jargs}, &{t_out}, &{t_in}); + """) + elif shifts == fshifts: + if has_code and psize > 1: + raise ValueError("Single tensor product to many tensor products not implemented for vectors") + # Single tensor product to many + prolong_code.append(f""" + kronmxv(0, {fargs}, {cargs}, {nscal}, {Jargs}, {t_in}+{cskip}, {t_out}+{fskip}, {scratch}, {t_out}+{fskip}); + """) + restrict_code.append(f""" + kronmxv(1, {cargs}, {fargs}, {nscal}, {Jargs}, {t_out}+{fskip}, {t_in}+{cskip}, {t_out}+{fskip}, {scratch}); + """) + else: + # Many tensor products to single tensor product + if has_code: + raise ValueError("Many tensor products to single tensor product not implemented") + fskip = 0 + prolong_code.append(f""" + kronmxv(0, {fargs}, {cargs}, {nscal}, {Jargs}, {t_in}+{cskip}, {t_out}+{fskip}, {t_in}+{cskip}, {t_out}+{fskip}); + """) + restrict_code.append(f""" + kronmxv(1, {cargs}, {fargs}, {nscal}, {Jargs}, {t_out}+{fskip}, {t_in}+{cskip}, {t_out}+{fskip}, {t_in}+{cskip}); + """) + has_code = True + fskip += nscal*numpy.prod(fshape) + cskip += nscal*numpy.prod(cshape) + + # Pass the 1D interpolators as a hexadecimal string + Jdata = ", ".join(map(float.hex, chain.from_iterable(Jk.flat for Jk in Jmats))) + operator_decl.append(f""" + PetscScalar {mat_name}[{Jlen}] = {{ {Jdata} }}; + """) + + operator_decl = "".join(operator_decl) + prolong_code = "".join(prolong_code) + restrict_code = "".join(reversed(restrict_code)) + shapes = [tuple(map(max, zip(*fshapes))), tuple(map(max, zip(*cshapes)))] + + if fskip > numpy.prod(shapes[0]): + shapes[0] = (fskip, 1, 1, 1) + if cskip > numpy.prod(shapes[1]): + shapes[1] = (cskip, 1, 1, 1) return operator_decl, prolong_code, restrict_code, shapes @@ -753,10 +1053,11 @@ def cache_generate_code(kernel, comm): return code -def make_mapping_code(Q, fmapping, cmapping, t_in, t_out): - domain = Q.ufl_domain() - A = get_piola_tensor(cmapping, domain, inverse=False) - B = get_piola_tensor(fmapping, domain, inverse=True) +def make_mapping_code(Q, cmapping, fmapping, t_in, t_out): + if fmapping == cmapping: + return None + A = get_piola_tensor(cmapping, Q.mesh(), inverse=False) + B = get_piola_tensor(fmapping, Q.mesh(), inverse=True) tensor = A if B: tensor = ufl.dot(B, tensor) if tensor else B @@ -795,28 +1096,20 @@ def make_mapping_code(Q, fmapping, cmapping, t_in, t_out): return coef_decl, prolong_code, restrict_code, mapping_code, coefficients -def get_axes_shift(ele): - """Return the form degree of a FInAT element after discarding modifiers""" - if hasattr(ele, "element"): - return get_axes_shift(ele.element) - else: - return ele.formdegree - - def make_permutation_code(V, vshape, pshape, t_in, t_out, array_name): - shift = get_axes_shift(V.finat_element) - tdim = V.mesh().topological_dimension() - if shift % tdim: + _, _, shifts = get_permutation_to_line_elements(V) + shift = shifts[0] + if shift != (0,): ndof = numpy.prod(vshape) permutation = numpy.reshape(numpy.arange(ndof), pshape) - axes = numpy.arange(tdim) + axes = numpy.arange(len(shift)) for k in range(permutation.shape[0]): - permutation[k] = numpy.reshape(numpy.transpose(permutation[k], axes=numpy.roll(axes, -shift*k)), pshape[1:]) + permutation[k] = numpy.reshape(numpy.transpose(permutation[k], axes=numpy.roll(axes, -shift[k])), pshape[1:]) nflip = 0 mapping = V.ufl_element().mapping().lower() if mapping == "contravariant piola": # flip the sign of the first component - nflip = ndof//tdim + nflip = ndof//len(shift) elif mapping == "covariant piola": # flip the order of reference components permutation = numpy.flip(permutation, axis=0) @@ -850,60 +1143,65 @@ def make_permutation_code(V, vshape, pshape, t_in, t_out, array_name): return decl, prolong, restrict -def get_permuted_map(V): - """ - Return a PermutedMap with the same tensor product shape for - every component of H(div) or H(curl) tensor product elements - """ - shift = get_axes_shift(V.finat_element) - if shift % V.mesh().topological_dimension() == 0: - return V.cell_node_map() - - elements = get_line_elements(V) - axes = numpy.arange(len(elements)) - pshape = [-1] + [e.space_dimension() for e in elements] - permutation = numpy.reshape(numpy.arange(V.finat_element.space_dimension()), pshape) - for k in range(permutation.shape[0]): - permutation[k] = numpy.reshape(numpy.transpose(permutation[k], axes=numpy.roll(axes, shift*k)), pshape[1:]) - - permutation = numpy.reshape(permutation, (-1,)) - return PermutedMap(V.cell_node_map(), permutation) - - class StandaloneInterpolationMatrix(object): """ Interpolation matrix for a single standalone space. """ - def __init__(self, Vf, Vc, Vf_bcs, Vc_bcs): - self.Vf_bcs = Vf_bcs + + _cache_work = {} + + def __init__(self, Vc, Vf, Vc_bcs, Vf_bcs): + self.uc = self.work_function(Vc) + self.uf = self.work_function(Vf) + self.Vc = self.uc.function_space() + self.Vf = self.uf.function_space() self.Vc_bcs = Vc_bcs - if isinstance(Vf, firedrake.Function): - self.uf = Vf - Vf = Vf.function_space() - else: - self.uf = firedrake.Function(Vf) - if isinstance(Vc, firedrake.Function): - self.uc = Vc - Vc = Vc.function_space() - else: - self.uc = firedrake.Function(Vc) + self.Vf_bcs = Vf_bcs - self.weight = self.multiplicity(Vf) - with self.weight.dat.vec as w: + def work_function(self, V): + if isinstance(V, firedrake.Function): + return V + key = (V.ufl_element(), V.mesh()) + try: + return self._cache_work[key] + except KeyError: + return self._cache_work.setdefault(key, firedrake.Function(V)) + + @cached_property + def _weight(self): + weight = firedrake.Function(self.Vf) + size = self.Vf.finat_element.space_dimension() * self.Vf.value_size + kernel_code = f""" + void weight(PetscScalar *restrict w){{ + for(PetscInt i=0; i<{size}; i++) w[i] += 1.0; + return; + }} + """ + kernel = op2.Kernel(kernel_code, "weight", requires_zeroed_output_arguments=True) + op2.par_loop(kernel, weight.cell_set, weight.dat(op2.INC, weight.cell_node_map())) + with weight.dat.vec as w: w.reciprocal() + return weight + @cached_property + def _kernels(self): try: - uf_map = get_permuted_map(Vf) - uc_map = get_permuted_map(Vc) - prolong_kernel, restrict_kernel, coefficients = self.make_blas_kernels(Vf, Vc) + # We generate custom prolongation and restriction kernels mainly because: + # 1. Code generation for the transpose of prolongation is not readily available + # 2. Dual evaluation of EnrichedElement is not yet implemented in FInAT + uf_map = get_permuted_map(self.Vf) + uc_map = get_permuted_map(self.Vc) + prolong_kernel, restrict_kernel, coefficients = self.make_blas_kernels(self.Vf, self.Vc) prolong_args = [prolong_kernel, self.uf.cell_set, self.uf.dat(op2.INC, uf_map), self.uc.dat(op2.READ, uc_map), - self.weight.dat(op2.READ, uf_map)] + self._weight.dat(op2.READ, uf_map)] except ValueError: - uf_map = Vf.cell_node_map() - uc_map = Vc.cell_node_map() - prolong_kernel, restrict_kernel, coefficients = self.make_kernels(Vf, Vc) + # The elements do not have the expected tensor product structure + # Fall back to aij kernels + uf_map = self.Vf.cell_node_map() + uc_map = self.Vc.cell_node_map() + prolong_kernel, restrict_kernel, coefficients = self.make_kernels(self.Vf, self.Vc) prolong_args = [prolong_kernel, self.uf.cell_set, self.uf.dat(op2.WRITE, uf_map), self.uc.dat(op2.READ, uc_map)] @@ -911,10 +1209,47 @@ def __init__(self, Vf, Vc, Vf_bcs, Vc_bcs): restrict_args = [restrict_kernel, self.uf.cell_set, self.uc.dat(op2.INC, uc_map), self.uf.dat(op2.READ, uf_map), - self.weight.dat(op2.READ, uf_map)] + self._weight.dat(op2.READ, uf_map)] coefficient_args = [c.dat(op2.READ, c.cell_node_map()) for c in coefficients] - self._prolong = partial(op2.par_loop, *prolong_args, *coefficient_args) - self._restrict = partial(op2.par_loop, *restrict_args, *coefficient_args) + prolong = op2.ParLoop(*prolong_args, *coefficient_args) + restrict = op2.ParLoop(*restrict_args, *coefficient_args) + return prolong, restrict + + def _prolong(self): + with self.uf.dat.vec_wo as uf: + uf.set(0.0E0) + self._kernels[0]() + + def _restrict(self): + with self.uc.dat.vec_wo as uc: + uc.set(0.0E0) + self._kernels[1]() + + def view(self, mat, viewer=None): + if viewer is None: + return + typ = viewer.getType() + if typ != PETSc.Viewer.Type.ASCII: + return + viewer.printfASCII("Firedrake matrix-free prolongator %s\n" % + type(self).__name__) + + def getInfo(self, mat, info=None): + memory = self.uf.dat.nbytes + self.uc.dat.nbytes + if self._weight is not None: + memory += self._weight.dat.nbytes + if info is None: + info = PETSc.Mat.InfoType.GLOBAL_SUM + if info == PETSc.Mat.InfoType.LOCAL: + return {"memory": memory} + elif info == PETSc.Mat.InfoType.GLOBAL_SUM: + gmem = mat.comm.tompi4py().allreduce(memory, op=op2.MPI.SUM) + return {"memory": gmem} + elif info == PETSc.Mat.InfoType.GLOBAL_MAX: + gmem = mat.comm.tompi4py().allreduce(memory, op=op2.MPI.MAX) + return {"memory": gmem} + else: + raise ValueError("Unknown info type %s" % info) @staticmethod def make_blas_kernels(Vf, Vc): @@ -922,8 +1257,8 @@ def make_blas_kernels(Vf, Vc): Interpolation and restriction kernels between CG / DG tensor product spaces on quads and hexes. - Works by tabulating the coarse 1D Lagrange basis - functions as the (fdegree+1)-by-(cdegree+1) matrix Jhat, + Works by tabulating the coarse 1D basis functions + as the (fdegree+1)-by-(cdegree+1) matrix Jhat, and using the fact that the 2D / 3D tabulation is the tensor product J = kron(Jhat, kron(Jhat, Jhat)) """ @@ -936,43 +1271,47 @@ def make_blas_kernels(Vf, Vc): coefficients = [] mapping_code = "" coef_decl = "" + if fmapping == cmapping: # interpolate on each direction via Kroncker product - operator_decl, prolong_code, restrict_code, shapes = make_kron_code(Vf, Vc, "t0", "t1", "J0") + operator_decl, prolong_code, restrict_code, shapes = make_kron_code(Vc, Vf, "t0", "t1", "J0", "t2") else: decl = [""]*4 prolong = [""]*5 restrict = [""]*5 # get embedding element for Vf with identity mapping and collocated vector component DOFs try: - Q = Vf if fmapping == "identity" else firedrake.FunctionSpace(Vf.ufl_domain(), - felem.reconstruct(mapping="identity")) - mapping_output = make_mapping_code(Q, fmapping, cmapping, "t0", "t1") + qelem = felem + if qelem.mapping() != "identity": + qelem = qelem.reconstruct(mapping="identity") + Qf = Vf if qelem == felem else firedrake.FunctionSpace(Vf.mesh(), qelem) + mapping_output = make_mapping_code(Qf, cmapping, fmapping, "t0", "t1") in_place_mapping = True except Exception: - Qe = ufl.FiniteElement("DQ", cell=felem.cell(), degree=PMGBase.max_degree(felem)) + qelem = ufl.FiniteElement("DQ", cell=felem.cell(), degree=PMGBase.max_degree(felem)) if felem.value_shape(): - Qe = ufl.TensorElement(Qe, shape=felem.value_shape(), symmetry=felem.symmetry()) - Q = firedrake.FunctionSpace(Vf.ufl_domain(), Qe) - mapping_output = make_mapping_code(Q, fmapping, cmapping, "t0", "t1") - - qshape = (Q.value_size, Q.finat_element.space_dimension()) - # interpolate to embedding fine space, permute to FInAT ordering, and apply the mapping - decl[0], prolong[0], restrict[0], shapes = make_kron_code(Q, Vc, "t0", "t1", "J0") - decl[1], restrict[1], prolong[1] = make_permutation_code(Vc, qshape, shapes[0], "t0", "t1", "perm0") - coef_decl, prolong[2], restrict[2], mapping_code, coefficients = mapping_output - - if not in_place_mapping: - # permute to Kronecker-friendly ordering and interpolate to fine space - decl[2], prolong[3], restrict[3] = make_permutation_code(Vf, qshape, shapes[0], "t1", "t0", "perm1") - decl[3], prolong[4], restrict[4], _shapes = make_kron_code(Vf, Q, "t0", "t1", "J1") - shapes.extend(_shapes) + qelem = ufl.TensorElement(qelem, shape=felem.value_shape(), symmetry=felem.symmetry()) + Qf = firedrake.FunctionSpace(Vf.mesh(), qelem) + mapping_output = make_mapping_code(Qf, cmapping, fmapping, "t0", "t1") + + qshape = (Qf.value_size, Qf.finat_element.space_dimension()) + # interpolate to embedding fine space + decl[0], prolong[0], restrict[0], shapes = make_kron_code(Vc, Qf, "t0", "t1", "J0", "t2") + + if mapping_output is not None: + # permute to FInAT ordering, and apply the mapping + decl[1], restrict[1], prolong[1] = make_permutation_code(Vc, qshape, shapes[0], "t0", "t1", "perm0") + coef_decl, prolong[2], restrict[2], mapping_code, coefficients = mapping_output + if not in_place_mapping: + # permute to Kronecker-friendly ordering and interpolate to fine space + decl[2], prolong[3], restrict[3] = make_permutation_code(Vf, qshape, shapes[0], "t1", "t0", "perm1") + decl[3], prolong[4], restrict[4], _shapes = make_kron_code(Qf, Vf, "t0", "t1", "J1", "t2") + shapes.extend(_shapes) operator_decl = "".join(decl) prolong_code = "".join(prolong) restrict_code = "".join(reversed(restrict)) - lwork = numpy.prod([max(*dims) for dims in zip(*shapes)]) # FInAT elements order the component DOFs related to the same node contiguously. # We transpose before and after the multiplication times J to have each component # stored contiguously as a scalar field, thus reducing the number of dgemm calls. @@ -982,6 +1321,10 @@ def make_blas_kernels(Vf, Vc): fshape = (Vf.value_size, Vf.finat_element.space_dimension()) cshape = (Vc.value_size, Vc.finat_element.space_dimension()) + + lwork = numpy.prod([max(*dims) for dims in zip(*shapes)]) + lwork = max(lwork, max(numpy.prod(fshape), numpy.prod(cshape))) + if cshape[0] == 1: coarse_read = f"""for({IntType_c} i=0; i<{numpy.prod(cshape)}; i++) t0[i] = x[i];""" coarse_write = f"""for({IntType_c} i=0; i<{numpy.prod(cshape)}; i++) x[i] += t0[i];""" @@ -1017,9 +1360,10 @@ def make_blas_kernels(Vf, Vc): void prolongation(PetscScalar *restrict y, const PetscScalar *restrict x, const PetscScalar *restrict w{coef_decl}){{ - PetscScalar work[2][{lwork}]; + PetscScalar work[3][{lwork}] = {{0.0E0}}; PetscScalar *t0 = work[0]; PetscScalar *t1 = work[1]; + PetscScalar *t2 = work[2]; {operator_decl} {coarse_read} {prolong_code} @@ -1029,9 +1373,10 @@ def make_blas_kernels(Vf, Vc): void restriction(PetscScalar *restrict x, const PetscScalar *restrict y, const PetscScalar *restrict w{coef_decl}){{ - PetscScalar work[2][{lwork}]; + PetscScalar work[3][{lwork}] = {{0.0E0}}; PetscScalar *t0 = work[0]; PetscScalar *t1 = work[1]; + PetscScalar *t2 = work[2]; {operator_decl} {fine_read} {restrict_code} @@ -1058,11 +1403,10 @@ def make_kernels(self, Vf, Vc): # A local matrix is generated each time the kernel is executed. element_kernel = loopy.generate_code_v2(matrix_kernel.code).device_code() element_kernel = element_kernel.replace("void expression_kernel", "static void expression_kernel") - dimc = Vc.finat_element.space_dimension() * Vc.value_size - dimf = Vf.finat_element.space_dimension() * Vf.value_size - coef_args = "".join([", c%d" % i for i in range(len(coefficients))]) coef_decl = "".join([", const %s *restrict c%d" % (ScalarType_c, i) for i in range(len(coefficients))]) + dimc = Vc.finat_element.space_dimension() * Vc.value_size + dimf = Vf.finat_element.space_dimension() * Vf.value_size restrict_code = f""" {element_kernel} @@ -1078,22 +1422,6 @@ def make_kernels(self, Vf, Vc): restrict_kernel = op2.Kernel(restrict_code, "restriction", requires_zeroed_output_arguments=True) return prolong_kernel, restrict_kernel, coefficients - @staticmethod - def multiplicity(V): - # Lawrence's magic code for calculating dof multiplicities - shapes = (V.finat_element.space_dimension(), - numpy.prod(V.shape)) - domain = "{[i,j]: 0 <= i < %d and 0 <= j < %d}" % shapes - instructions = """ - for i, j - w[i,j] = w[i,j] + 1 - end - """ - weight = firedrake.Function(V) - firedrake.par_loop((domain, instructions), firedrake.dx, - {"w": (weight, op2.INC)}) - return weight - def multTranspose(self, mat, rf, rc): """ Implement restriction: restrict residual on fine grid rf to coarse grid rc. @@ -1103,8 +1431,6 @@ def multTranspose(self, mat, rf, rc): for bc in self.Vf_bcs: bc.zero(self.uf) - with self.uc.dat.vec_wo as uc: - uc.set(0.0E0) self._restrict() for bc in self.Vc_bcs: @@ -1121,8 +1447,6 @@ def mult(self, mat, xc, xf, inc=False): for bc in self.Vc_bcs: bc.zero(self.uc) - with self.uf.dat.vec_wo as uf: - uf.set(0.0E0) self._prolong() for bc in self.Vf_bcs: @@ -1146,25 +1470,29 @@ class MixedInterpolationMatrix(StandaloneInterpolationMatrix): """ Interpolation matrix for a mixed finite element space. """ - def __init__(self, Vf, Vc, Vf_bcs, Vc_bcs): - self.Vf_bcs = Vf_bcs - self.Vc_bcs = Vc_bcs - self.uf = Vf if isinstance(Vf, firedrake.Function) else firedrake.Function(Vf) - self.uc = Vc if isinstance(Vc, firedrake.Function) else firedrake.Function(Vc) - - self.standalones = [] - for (i, (uf_sub, uc_sub)) in enumerate(zip(self.uf.subfunctions, self.uc.subfunctions)): - Vf_sub_bcs = [bc for bc in Vf_bcs if bc.function_space().index == i] - Vc_sub_bcs = [bc for bc in Vc_bcs if bc.function_space().index == i] - standalone = StandaloneInterpolationMatrix(uf_sub, uc_sub, Vf_sub_bcs, Vc_sub_bcs) - self.standalones.append(standalone) + @cached_property + def _weight(self): + return None - self._prolong = lambda: [standalone._prolong() for standalone in self.standalones] - self._restrict = lambda: [standalone._restrict() for standalone in self.standalones] + @cached_property + def _standalones(self): + standalones = [] + for i, (uc_sub, uf_sub) in enumerate(zip(self.uc.subfunctions, self.uf.subfunctions)): + Vc_sub_bcs = tuple(bc for bc in self.Vc_bcs if bc.function_space().index == i) + Vf_sub_bcs = tuple(bc for bc in self.Vf_bcs if bc.function_space().index == i) + standalone = StandaloneInterpolationMatrix(uc_sub, uf_sub, Vc_sub_bcs, Vf_sub_bcs) + standalones.append(standalone) + return standalones + + @cached_property + def _kernels(self): + prolong = lambda: [s._prolong() for s in self._standalones] + restrict = lambda: [s._restrict() for s in self._standalones] + return prolong, restrict def getNestSubMatrix(self, i, j): if i == j: - s = self.standalones[i] + s = self._standalones[i] sizes = (s.uf.dof_dset.layout_vec.getSizes(), s.uc.dof_dset.layout_vec.getSizes()) M_shll = PETSc.Mat().createPython(sizes, s, comm=s.uf._comm) M_shll.setUp() @@ -1173,17 +1501,17 @@ def getNestSubMatrix(self, i, j): return None -def prolongation_matrix_aij(Pk, P1, Pk_bcs=[], P1_bcs=[]): - if isinstance(Pk, firedrake.Function): - Pk = Pk.function_space() +def prolongation_matrix_aij(P1, Pk, P1_bcs=[], Pk_bcs=[]): if isinstance(P1, firedrake.Function): P1 = P1.function_space() + if isinstance(Pk, firedrake.Function): + Pk = Pk.function_space() sp = op2.Sparsity((Pk.dof_dset, P1.dof_dset), (Pk.cell_node_map(), P1.cell_node_map())) mat = op2.Mat(sp, PETSc.ScalarType) - mesh = Pk.ufl_domain() + mesh = Pk.mesh() fele = Pk.ufl_element() if isinstance(fele, ufl.MixedElement) and not isinstance(fele, (ufl.VectorElement, ufl.TensorElement)): @@ -1228,15 +1556,14 @@ def prolongation_matrix_aij(Pk, P1, Pk_bcs=[], P1_bcs=[]): return mat.handle -def prolongation_matrix_matfree(Vf, Vc, Vf_bcs=[], Vc_bcs=[]): +def prolongation_matrix_matfree(Vc, Vf, Vc_bcs=[], Vf_bcs=[]): fele = Vf.ufl_element() if isinstance(fele, ufl.MixedElement) and not isinstance(fele, (ufl.VectorElement, ufl.TensorElement)): - ctx = MixedInterpolationMatrix(Vf, Vc, Vf_bcs, Vc_bcs) + ctx = MixedInterpolationMatrix(Vc, Vf, Vc_bcs, Vf_bcs) else: - ctx = StandaloneInterpolationMatrix(Vf, Vc, Vf_bcs, Vc_bcs) + ctx = StandaloneInterpolationMatrix(Vc, Vf, Vc_bcs, Vf_bcs) sizes = (Vf.dof_dset.layout_vec.getSizes(), Vc.dof_dset.layout_vec.getSizes()) M_shll = PETSc.Mat().createPython(sizes, ctx, comm=Vf._comm) M_shll.setUp() - return M_shll diff --git a/tests/multigrid/test_hiptmair.py b/tests/multigrid/test_hiptmair.py index d851e43e2f..b59022d9c5 100644 --- a/tests/multigrid/test_hiptmair.py +++ b/tests/multigrid/test_hiptmair.py @@ -62,9 +62,14 @@ def run_riesz_map(V, mat_type): a = inner(d(u), d(v))*dx + inner(u, v)*dx L = inner(f, v)*dx bcs = [DirichletBC(V, u_exact, "on_boundary")] - + if V.mesh().ufl_cell().is_simplex(): + appctx = dict() + else: + from firedrake.preconditioners.fdm import tabulate_exterior_derivative + appctx = {"get_gradient": tabulate_exterior_derivative, + "get_curl": tabulate_exterior_derivative} problem = LinearVariationalProblem(a, L, uh, bcs=bcs) - solver = LinearVariationalSolver(problem, solver_parameters=parameters) + solver = LinearVariationalSolver(problem, solver_parameters=parameters, appctx=appctx) solver.solve() its = solver.snes.ksp.getIterationNumber() return its @@ -72,7 +77,7 @@ def run_riesz_map(V, mat_type): @pytest.mark.skipcomplexnoslate @pytest.mark.parametrize(["family", "cell"], - [("N1curl", "tetrahedron")]) + [("N1curl", "tetrahedron"), ("NCE", "hexahedron")]) def test_hiptmair_hcurl(family, cell): mesh = mesh_hierarchy(cell)[-1] V = FunctionSpace(mesh, family, degree=1) @@ -82,7 +87,7 @@ def test_hiptmair_hcurl(family, cell): @pytest.mark.skipcomplexnoslate @pytest.mark.parametrize(["family", "cell"], - [("RT", "tetrahedron")]) + [("RT", "tetrahedron"), ("NCF", "hexahedron")]) def test_hiptmair_hdiv(family, cell): mesh = mesh_hierarchy(cell)[-1] V = FunctionSpace(mesh, family, degree=1) diff --git a/tests/multigrid/test_p_multigrid.py b/tests/multigrid/test_p_multigrid.py index 17e4e33296..3954536f1f 100644 --- a/tests/multigrid/test_p_multigrid.py +++ b/tests/multigrid/test_p_multigrid.py @@ -2,58 +2,121 @@ from firedrake import * -def test_reconstruct_degree(): - meshes = [UnitSquareMesh(1, 1, quadrilateral=True)] - meshes.append(ExtrudedMesh(meshes[0], layers=1)) - for mesh in meshes: - ndim = mesh.topological_dimension() - elist = [] - for degree in [7, 2, 31]: - V = VectorFunctionSpace(mesh, "Q", degree) - Q = FunctionSpace(mesh, "DQ", degree-2) - Z = MixedFunctionSpace([V, Q]) - e = Z.ufl_element() - elist.append(e) - assert e == PMGPC.reconstruct_degree(elist[0], degree) - - elist = [] - for degree in [7, 2, 31]: - V = FunctionSpace(mesh, "NCF" if ndim == 3 else "RTCF", degree) - Q = FunctionSpace(mesh, "DQ", degree-1) - Z = MixedFunctionSpace([V, Q]) - e = Z.ufl_element() - elist.append(e) - assert e == PMGPC.reconstruct_degree(elist[0], degree) - - -def test_prolongation_matrix_matfree(): +@pytest.fixture(params=[2, 3], + ids=["Rectangle", "Box"]) +def tp_mesh(request): + nx = 4 + distribution = {"overlap_type": (DistributedMeshOverlapType.VERTEX, 1)} + m = UnitSquareMesh(nx, nx, quadrilateral=True, distribution_parameters=distribution) + if request.param == 3: + m = ExtrudedMesh(m, nx) + + x = SpatialCoordinate(m) + xnew = as_vector([acos(1-2*xj)/pi for xj in x]) + m.coordinates.interpolate(xnew) + return m + + +@pytest.fixture(params=[0, 1, 2], + ids=["H1", "HCurl", "HDiv"]) +def tp_family(tp_mesh, request): + tdim = tp_mesh.topological_dimension() + if tdim == 3: + families = ["Q", "NCE", "NCF"] + else: + families = ["Q", "RTCE", "RTCF"] + return families[request.param] + + +@pytest.fixture(params=[None, "hierarchical", "fdm"], + ids=["spectral", "hierarchical", "fdm"]) +def variant(request): + return request.param + + +@pytest.fixture(params=[0, 1], + ids=["CG-DG", "HDiv-DG"]) +def mixed_family(tp_mesh, request): + if request.param == 0: + Vfamily = "Q" + else: + tdim = tp_mesh.topological_dimension() + Vfamily = "NCF" if tdim == 3 else "RTCF" + Qfamily = "DQ" + return Vfamily, Qfamily + + +def test_reconstruct_degree(tp_mesh, mixed_family): + """ Construct a complicated mixed element and ensure we may recover it by + p-refining or p-coarsening an element of the same family with different + degree. + """ + elist = [] + Vfamily, Qfamily = mixed_family + for degree in [7, 2, 31]: + if Vfamily in ["NCF", "RTCF"]: + V = FunctionSpace(tp_mesh, Vfamily, degree) + else: + V = VectorFunctionSpace(tp_mesh, Vfamily, degree) + Q = FunctionSpace(tp_mesh, Qfamily, degree-2) + Z = MixedFunctionSpace([V, Q]) + e = Z.ufl_element() + + elist.append(e) + assert e == PMGPC.reconstruct_degree(elist[0], degree) + + +def test_prolong_de_rham(tp_mesh): + """ Interpolate a linear vector function between [H1]^d, HCurl and HDiv spaces + where it can be exactly represented + """ + from firedrake.preconditioners.pmg import prolongation_matrix_matfree + + tdim = tp_mesh.topological_dimension() + b = Constant(list(range(tdim))) + mat = diag(Constant([tdim+1]*tdim)) + Constant([[-1]*tdim]*tdim) + expr = dot(mat, SpatialCoordinate(tp_mesh)) + b + + cell = tp_mesh.ufl_cell() + elems = [VectorElement(FiniteElement("Q", cell=cell, degree=2)), + FiniteElement("NCE" if tdim == 3 else "RTCE", cell=cell, degree=2), + FiniteElement("NCF" if tdim == 3 else "RTCF", cell=cell, degree=2)] + fs = [FunctionSpace(tp_mesh, e) for e in elems] + us = [Function(V) for V in fs] + us[0].interpolate(expr) + for u in us: + for v in us: + if u != v: + P = prolongation_matrix_matfree(u, v).getPythonContext() + P._prolong() + assert norm(v-expr, "L2") < 1E-14 + + +def test_prolong_low_order_to_restricted(tp_mesh, tp_family, variant): + """ Interpolate a low-order function to interior and facet high-order spaces + and ensure that the sum of the two high-order functions is equal to the + low-order function + """ from firedrake.preconditioners.pmg import prolongation_matrix_matfree - tol = 1E-14 - meshes = [UnitSquareMesh(3, 2, quadrilateral=True)] - meshes.append(ExtrudedMesh(meshes[0], layers=2)) - for mesh in meshes: - ndim = mesh.topological_dimension() - b = Constant(list(range(ndim))) - mat = diag(Constant([ndim+1]*ndim)) + Constant([[-1]*ndim]*ndim) - expr = dot(mat, SpatialCoordinate(mesh)) + b - - variant = None - cell = mesh.ufl_cell() - elems = [] - elems.append(VectorElement(FiniteElement("Q", cell=cell, degree=3, variant=variant))) - elems.append(FiniteElement("NCF" if ndim == 3 else "RTCF", cell=cell, degree=2, variant=variant)) - elems.append(FiniteElement("NCE" if ndim == 3 else "RTCE", cell=cell, degree=2, variant=variant)) - fs = [FunctionSpace(mesh, e) for e in elems] - us = [Function(V) for V in fs] - us[0].interpolate(expr) - for u in us: - for v in us: - if u != v: - v.assign(0) - P = prolongation_matrix_matfree(v, u).getPythonContext() - P._prolong() - assert norm(v-expr, "L2") < tol + degree = 5 + cell = tp_mesh.ufl_cell() + element = FiniteElement(tp_family, cell=cell, degree=degree, variant=variant) + Vi = FunctionSpace(tp_mesh, RestrictedElement(element, restriction_domain="interior")) + Vf = FunctionSpace(tp_mesh, RestrictedElement(element, restriction_domain="facet")) + Vc = FunctionSpace(tp_mesh, tp_family, degree=1) + + ui = Function(Vi) + uf = Function(Vf) + uc = Function(Vc) + uc.dat.data[0::2] = 0.0 + uc.dat.data[1::2] = 1.0 + + for v in [ui, uf]: + P = prolongation_matrix_matfree(uc, v).getPythonContext() + P._prolong() + + assert norm(ui + uf - uc, "L2") < 2E-14 @pytest.fixture(params=["triangles", "quadrilaterals"], scope="module") @@ -240,7 +303,8 @@ def test_p_multigrid_mixed(mat_type): "ksp_max_it": 3, "pc_type": "jacobi"} - coarse = {"ksp_type": "richardson", + coarse = {"mat_type": "aij", # This circumvents the need for AssembledPC + "ksp_type": "richardson", "ksp_max_it": 1, "ksp_norm_type": "unpreconditioned", "ksp_monitor": None, @@ -255,7 +319,7 @@ def test_p_multigrid_mixed(mat_type): "ksp_monitor_true_residual": None, "pc_type": "python", "pc_python_type": "firedrake.PMGPC", - # "mat_type": mat_type, # FIXME bug with mat-free jacobi on MixedFunctionSpace + "mat_type": mat_type, "pmg_pc_mg_type": "multiplicative", "pmg_mg_levels": relax, "pmg_mg_coarse": coarse} @@ -270,10 +334,14 @@ def test_p_multigrid_mixed(mat_type): ppc = solver.snes.ksp.pc.getPythonContext().ppc assert ppc.getMGLevels() == 3 - level = solver._ctx + # test that nullspace component is zero assert abs(assemble(z[1]*dx)) < 1E-12 + # test that we converge to the exact solution assert norm(z-z_exact, "H1") < 1E-12 + + # test that we have coarsened the nullspace correctly ctx_levels = 0 + level = solver._ctx while level is not None: nsp = level._nullspace assert isinstance(nsp, MixedVectorSpaceBasis) @@ -284,6 +352,13 @@ def test_p_multigrid_mixed(mat_type): ctx_levels += 1 assert ctx_levels == 3 + # test that caches are parallel-safe + dummy_eq = type(object).__eq__ + for cache in (PMGPC._coarsen_cache, PMGPC._transfer_cache): + assert len(cache) > 0 + for k in cache: + assert type(k).__eq__ is dummy_eq + def test_p_fas_scalar(): mat_type = "matfree" @@ -313,6 +388,7 @@ def test_p_fas_scalar(): atol = rtol * Fnorm coarse = { + "mat_type": "aij", "ksp_type": "preonly", "ksp_norm_type": None, "pc_type": "cholesky"} @@ -321,7 +397,6 @@ def test_p_fas_scalar(): "ksp_type": "chebyshev", "ksp_monitor_true_residual": None, "ksp_norm_type": "unpreconditioned", - "ksp_max_it": 3, "pc_type": "jacobi"} pmg = { @@ -340,7 +415,7 @@ def test_p_fas_scalar(): "pmg_mg_coarse": coarse} pfas = { - "mat_type": "aij", + "mat_type": mat_type, "snes_monitor": None, "snes_converged_reason": None, "snes_atol": atol, @@ -364,23 +439,23 @@ def test_p_fas_scalar(): @pytest.mark.skipcomplex def test_p_fas_nonlinear_scalar(): mat_type = "matfree" - N = 4 - dxq = dx(degree=3*N+2) # here we also test coarsening of quadrature degree + degree = 4 + dxq = dx(degree=3*degree+2) # here we also test coarsening of quadrature degree mesh = UnitSquareMesh(4, 4, quadrilateral=True) - V = FunctionSpace(mesh, "CG", N) + V = FunctionSpace(mesh, "CG", degree) u = Function(V) f = Constant(1) bcs = DirichletBC(V, 0, "on_boundary") # Regularized p-Laplacian p = 5 - eps = 1 + eps = Constant(1) y = eps + inner(grad(u), grad(u)) E = (1/p)*(y**(p/2))*dxq - inner(f, u)*dxq F = derivative(E, u, TestFunction(V)) - fcp = {"quadrature_degree": 3*N+2} + fcp = {"quadrature_degree": 3*degree+2} problem = NonlinearVariationalProblem(F, u, bcs, form_compiler_parameters=fcp) # Due to the convoluted nature of the nested iteration @@ -391,7 +466,7 @@ def test_p_fas_nonlinear_scalar(): rtol = 1E-8 atol = rtol * Fnorm - + rtol = 0.0 newton = { "mat_type": "aij", "snes_monitor": None, @@ -399,7 +474,7 @@ def test_p_fas_nonlinear_scalar(): "snes_type": "newtonls", "snes_max_it": 20, "snes_atol": atol, - "snes_rtol": 1E-50} + "snes_rtol": rtol} coarse = { "ksp_type": "preonly", @@ -414,7 +489,7 @@ def test_p_fas_nonlinear_scalar(): pmg = { "ksp_atol": atol*1E-1, - "ksp_rtol": 1E-50, + "ksp_rtol": rtol, "ksp_type": "cg", "ksp_converged_reason": None, "ksp_monitor_true_residual": None, @@ -434,7 +509,7 @@ def test_p_fas_nonlinear_scalar(): "snes_monitor": None, "snes_converged_reason": None, "snes_atol": atol, - "snes_rtol": 1E-50, + "snes_rtol": rtol, "snes_type": "python", "snes_python_type": "firedrake.PMGSNES", "pfas_snes_fas_type": "kaskade", @@ -458,7 +533,7 @@ def check_coarsen_quadrature(solver): Nq, = Nq Nl = p.u.ufl_element().degree() try: - Nl, = set(Nl) + Nl = max(Nl) except TypeError: pass assert Nq == 3*Nl+2 diff --git a/tests/multigrid/test_poisson_p1pcmg_extruded_serendipity.py b/tests/multigrid/test_poisson_p1pcmg_extruded_serendipity.py index f82acdf62e..b9eb6e830a 100644 --- a/tests/multigrid/test_poisson_p1pcmg_extruded_serendipity.py +++ b/tests/multigrid/test_poisson_p1pcmg_extruded_serendipity.py @@ -10,12 +10,12 @@ def run_poisson(): "ksp_monitor": None, "pc_type": "python", "pc_python_type": "firedrake.P1PC", - "pmg_coarse_degree": coarse_deg, "pmg_mg_levels": { "ksp_type": "chebyshev", "ksp_max_it": 2, "pc_type": "jacobi"}, "pmg_mg_coarse": { + "degree": coarse_deg, "ksp_type": "preonly", "pc_type": "lu", "pc_factor_mat_solver_type": "mumps" diff --git a/tests/regression/test_fdm.py b/tests/regression/test_fdm.py index 34e6469396..3ad7db838e 100644 --- a/tests/regression/test_fdm.py +++ b/tests/regression/test_fdm.py @@ -1,30 +1,33 @@ import pytest from firedrake import * - -fdmstar = { +ksp = { "mat_type": "matfree", "ksp_type": "cg", "ksp_atol": 0.0E0, "ksp_rtol": 1.0E-8, - "ksp_norm_type": "unpreconditioned", - "ksp_monitor_true_residual": None, - "ksp_converged_reason": None, + "ksp_norm_type": "natural", + "ksp_monitor": None, +} + +coarse = { + "mat_type": "aij", + "ksp_type": "preonly", + "pc_type": "cholesky", +} + +# FDM without static condensation +fdmstar = { "pc_type": "python", "pc_python_type": "firedrake.P1PC", - "pmg_coarse_mat_type": "aij", - "pmg_mg_coarse": { - "ksp_type": "preonly", - "pc_type": "cholesky", - }, + "pmg_mg_coarse": coarse, "pmg_mg_levels": { + "ksp_max_it": 1, "ksp_type": "chebyshev", "ksp_norm_type": "none", "esteig_ksp_type": "cg", "esteig_ksp_norm_type": "natural", - "ksp_chebyshev_esteig": "0.75,0.25,0.0,1.0", - "ksp_chebyshev_esteig_noisy": True, - "ksp_chebyshev_esteig_steps": 8, + "ksp_chebyshev_esteig": "0.5,0.5,0.0,1.0", "pc_type": "python", "pc_python_type": "firedrake.FDMPC", "fdm": { @@ -32,12 +35,80 @@ "pc_python_type": "firedrake.ASMExtrudedStarPC", "pc_star_mat_ordering_type": "nd", "pc_star_sub_sub_pc_type": "cholesky", - "pc_star_sub_sub_pc_factor_mat_solver_type": "petsc", - "pc_star_sub_sub_pc_factor_mat_ordering_type": "natural", } } } +# FDM with static condensation +facetstar = { + "pc_type": "python", + "pc_python_type": "firedrake.FacetSplitPC", + "facet_pc_type": "python", + "facet_pc_python_type": "firedrake.FDMPC", + "facet_fdm_static_condensation": True, + "facet_fdm_pc_use_amat": False, + "facet_fdm_pc_type": "fieldsplit", + "facet_fdm_pc_fieldsplit_type": "symmetric_multiplicative", + "facet_fdm_fieldsplit_0": { + "ksp_type": "preonly", + "pc_type": "icc", # this is exact for the sparse approximation used in FDM + }, + "facet_fdm_fieldsplit_1": { + "ksp_type": "preonly", + "pc_type": "python", + "pc_python_type": "firedrake.P1PC", + "pmg_mg_coarse": coarse, + "pmg_mg_levels": { + "ksp_max_it": 1, + "ksp_type": "chebyshev", + "ksp_norm_type": "none", + "esteig_ksp_type": "cg", + "esteig_ksp_norm_type": "natural", + "ksp_chebyshev_esteig": "0.5,0.5,0.0,1.0", + "pc_type": "python", + "pc_python_type": "firedrake.ASMExtrudedStarPC", + "pc_star_mat_ordering_type": "nd", + "pc_star_sub_sub_pc_type": "cholesky", + } + } +} + +fdmstar.update(ksp) +facetstar.update(ksp) + + +def build_riesz_map(V, d): + beta = Constant(1E-4) + subs = [(1, 3)] + if V.mesh().cell_set._extruded: + subs += ["top"] + + x = SpatialCoordinate(V.mesh()) + x -= Constant([0.5]*len(x)) + if V.ufl_element().value_shape() == (): + u_exact = exp(-10*dot(x, x)) + u_bc = u_exact + else: + A = Constant([[-1.]*len(x)]*len(x)) + diag(Constant([len(x)]*len(x))) + u_exact = dot(A, x) * exp(-10*dot(x, x)) + u_bc = Function(V) + u_bc.project(u_exact, solver_parameters={"mat_type": "matfree", "pc_type": "jacobi"}) + + bcs = [DirichletBC(V, u_bc, sub) for sub in subs] + + uh = Function(V) + test = TestFunction(V) + trial = TrialFunction(V) + a = lambda v, u: inner(v, beta*u)*dx + inner(d(v), d(u))*dx + return LinearVariationalProblem(a(test, trial), a(test, u_exact), uh, bcs=bcs) + + +def solve_riesz_map(problem, solver_parameters): + problem.u.assign(0) + solver = LinearVariationalSolver(problem, solver_parameters=solver_parameters) + solver.solve() + return solver.snes.ksp.getIterationNumber() + @pytest.fixture(params=[2, 3], ids=["Rectangle", "Box"]) @@ -54,67 +125,65 @@ def mesh(request): return m -@pytest.fixture -def expected(mesh): - if mesh.topological_dimension() == 2: - return [5, 5, 5] - elif mesh.topological_dimension() == 3: - return [8, 8, 8] - - @pytest.fixture(params=[None, "fdm"], ids=["spectral", "fdm"]) def variant(request): return request.param @pytest.mark.skipcomplex -def test_p_independence(mesh, expected, variant): - nits = [] - for p in range(3, 6): - e = FiniteElement("Lagrange", cell=mesh.ufl_cell(), degree=p, variant=variant) - V = FunctionSpace(mesh, e) - u = TrialFunction(V) - v = TestFunction(V) - - ndim = mesh.geometric_dimension() - x = SpatialCoordinate(mesh) - x -= Constant([0.5]*ndim) - u_exact = dot(x, x) - f_exact = grad(u_exact) - B = -div(f_exact) +def test_p_independence_hgrad(mesh, variant): + family = "Lagrange" + expected = [16, 12] if mesh.topological_dimension() == 3 else [9, 7] + solvers = [fdmstar] if variant is None else [fdmstar, facetstar] + for degree in range(3, 6): + element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant=variant) + V = FunctionSpace(mesh, element) + problem = build_riesz_map(V, grad) + for sp, expected_it in zip(solvers, expected): + assert solve_riesz_map(problem, sp) <= expected_it - a = inner(grad(v), grad(u))*dx - L = inner(v, B)*dx - subs = ("on_boundary",) - if mesh.cell_set._extruded: - subs += ("top", "bottom") - bcs = [DirichletBC(V, u_exact, sub) for sub in subs] +@pytest.mark.skipcomplex +def test_p_independence_hcurl(mesh): + family = "NCE" if mesh.topological_dimension() == 3 else "RTCE" + expected = [13, 10] if mesh.topological_dimension() == 3 else [6, 6] + solvers = [fdmstar, facetstar] + for degree in range(3, 6): + element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant="fdm") + V = FunctionSpace(mesh, element) + problem = build_riesz_map(V, curl) + for sp, expected_it in zip(solvers, expected): + assert solve_riesz_map(problem, sp) <= expected_it - uh = Function(V) - problem = LinearVariationalProblem(a, L, uh, bcs=bcs) - solver = LinearVariationalSolver(problem, solver_parameters=fdmstar) - solver.solve() - nits.append(solver.snes.ksp.getIterationNumber()) - assert norm(u_exact-uh, "H1") < 2.0E-7 - assert nits <= expected + +@pytest.mark.skipcomplex +def test_p_independence_hdiv(mesh): + family = "NCF" if mesh.topological_dimension() == 3 else "RTCF" + expected = [6, 6] + solvers = [fdmstar, facetstar] + for degree in range(3, 6): + element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant="fdm") + V = FunctionSpace(mesh, element) + problem = build_riesz_map(V, div) + for sp, expected_it in zip(solvers, expected): + assert solve_riesz_map(problem, sp) <= expected_it @pytest.mark.skipcomplex def test_variable_coefficient(mesh): - ndim = mesh.geometric_dimension() + gdim = mesh.geometric_dimension() k = 4 V = FunctionSpace(mesh, "Lagrange", k) u = TrialFunction(V) v = TestFunction(V) x = SpatialCoordinate(mesh) - x -= Constant([0.5]*ndim) + x -= Constant([0.5]*len(x)) # variable coefficients - alphas = [0.1+10*dot(x, x)]*ndim + alphas = [0.1+10*dot(x, x)]*gdim alphas[0] = 1+10*exp(-dot(x, x)) alpha = diag(as_vector(alphas)) - beta = ((10*cos(3*pi*x[0]) + 20*sin(2*pi*x[1]))*cos(pi*x[ndim-1]))**2 + beta = ((10*cos(3*pi*x[0]) + 20*sin(2*pi*x[1]))*cos(pi*x[gdim-1]))**2 a = (inner(grad(v), dot(alpha, grad(u))) + inner(v, beta*u))*dx(degree=3*k+2) L = inner(v, Constant(1))*dx @@ -128,68 +197,69 @@ def test_variable_coefficient(mesh): problem = LinearVariationalProblem(a, L, uh, bcs=bcs) solver = LinearVariationalSolver(problem, solver_parameters=fdmstar) solver.solve() - assert solver.snes.ksp.getIterationNumber() <= 14 + expected = 23 if gdim == 3 else 14 + assert solver.snes.ksp.getIterationNumber() <= expected @pytest.fixture(params=["cg", "dg", "rt"], ids=["cg", "dg", "rt"]) def fs(request, mesh): degree = 3 - ndim = mesh.topological_dimension() + tdim = mesh.topological_dimension() cell = mesh.ufl_cell() element = request.param - variant = None + variant = "fdm_ipdg" if element == "rt": - family = "RTCF" if ndim == 2 else "NCF" + family = "RTCF" if tdim == 2 else "NCF" return FunctionSpace(mesh, FiniteElement(family, cell, degree=degree, variant=variant)) else: - if ndim == 1: + if tdim == 1: family = "DG" if element == "dg" else "CG" else: family = "DQ" if element == "dg" else "Q" - return VectorFunctionSpace(mesh, FiniteElement(family, cell, degree=degree, variant=variant), dim=5-ndim) + return VectorFunctionSpace(mesh, FiniteElement(family, cell, degree=degree, variant=variant), dim=5-tdim) @pytest.mark.skipcomplex -def test_direct_solver(fs): +def test_ipdg_direct_solver(fs): mesh = fs.mesh() x = SpatialCoordinate(mesh) - ndim = mesh.geometric_dimension() + gdim = mesh.geometric_dimension() ncomp = fs.ufl_element().value_size() - u_exact = dot(x, x) - if ncomp: - u_exact = as_vector([u_exact + Constant(k) for k in range(ncomp)]) - N = fs.ufl_element().degree() + homogenize = gdim > 2 + if homogenize: + rg = RandomGenerator(PCG64(seed=123456789)) + uh = rg.uniform(fs, -1, 1) + u_exact = zero(uh.ufl_shape) + u_bc = 0 + else: + uh = Function(fs) + u_exact = dot(x, x) + if ncomp: + u_exact = as_vector([u_exact + Constant(k) for k in range(ncomp)]) + u_bc = u_exact + + degree = fs.ufl_element().degree() try: - N, = set(N) + degree = max(degree) except TypeError: pass - - quad_degree = 2*(N+1)-1 - uh = Function(fs) - u = TrialFunction(fs) - v = TestFunction(fs) + quad_degree = 2*(degree+1)-1 # problem coefficients - A1 = diag(Constant(range(1, ndim+1))) + A1 = diag(Constant(range(1, gdim+1))) A2 = diag(Constant(range(1, ncomp+1))) alpha = lambda grad_u: dot(dot(A2, grad_u), A1) beta = diag(Constant(range(2, ncomp+2))) - n = FacetNormal(mesh) - f_exact = alpha(grad(u_exact)) - B = dot(beta, u_exact) - div(f_exact) - T = dot(f_exact, n) - extruded = mesh.cell_set._extruded subs = (1,) - if ndim > 1: + if gdim > 1: subs += (3,) if extruded: subs += ("top",) - - bcs = [DirichletBC(fs, u_exact, sub) for sub in subs] + bcs = [DirichletBC(fs, u_bc, sub) for sub in subs] dirichlet_ids = subs if "on_boundary" in dirichlet_ids: @@ -217,24 +287,30 @@ def test_direct_solver(fs): ds_Dir = sum(ds_Dir, ds(tuple())) ds_Neu = sum(ds_Neu, ds(tuple())) - eta = Constant((N+1)**2) - h = CellVolume(mesh)/FacetArea(mesh) - penalty = eta/h + n = FacetNormal(mesh) + h = CellVolume(mesh) / FacetArea(mesh) + eta = Constant((degree+1)**2) + penalty = eta / h - outer_jump = lambda w, n: outer(w('+'), n('+')) + outer(w('-'), n('-')) - num_flux = lambda w: alpha(avg(penalty/2) * outer_jump(w, n)) - num_flux_b = lambda w: alpha((penalty/2) * outer(w, n)) + num_flux = lambda u: avg(penalty) * avg(outer(u, n)) + num_flux_b = lambda u: (penalty/2) * outer(u, n) + a_int = lambda v, u: inner(2 * avg(outer(v, n)), alpha(num_flux(u) - avg(grad(u)))) + a_Dir = lambda v, u: inner(outer(v, n), alpha(num_flux_b(u) - grad(u))) - a = (inner(v, dot(beta, u)) * dxq - + inner(grad(v), alpha(grad(u))) * dxq - + inner(outer_jump(v, n), num_flux(u)-avg(alpha(grad(u)))) * dS_int - + inner(outer_jump(u, n), num_flux(v)-avg(alpha(grad(v)))) * dS_int - + inner(outer(v, n), num_flux_b(u)-alpha(grad(u))) * ds_Dir - + inner(outer(u, n), num_flux_b(v)-alpha(grad(v))) * ds_Dir) + v = TestFunction(fs) + u = TrialFunction(fs) + a = ((inner(v, dot(beta, u)) + inner(grad(v), alpha(grad(u)))) * dxq + + (a_int(v, u) + a_int(u, v)) * dS_int + + (a_Dir(v, u) + a_Dir(u, v)) * ds_Dir) - L = (inner(v, B)*dxq - + inner(v, T)*ds_Neu - + inner(outer(u_exact, n), 2*num_flux_b(v)-alpha(grad(v))) * ds_Dir) + if homogenize: + L = 0 + else: + f_exact = alpha(grad(u_exact)) + B = dot(beta, u_exact) - div(f_exact) + T = dot(f_exact, n) + L = (inner(v, B)*dxq + inner(v, T)*ds_Neu + + inner(outer(u_exact, n), alpha(2*num_flux_b(v) - grad(v))) * ds_Dir) problem = LinearVariationalProblem(a, L, uh, bcs=bcs) solver = LinearVariationalSolver(problem, solver_parameters={ @@ -246,7 +322,7 @@ def test_direct_solver(fs): "ksp_monitor": None, "ksp_norm_type": "unpreconditioned", "pc_type": "python", - "pc_python_type": "firedrake.FDMPC", + "pc_python_type": "firedrake.PoissonFDMPC", "fdm_pc_type": "cholesky", "fdm_pc_factor_mat_solver_type": "mumps", "fdm_pc_factor_mat_ordering_type": "nd", @@ -254,4 +330,8 @@ def test_direct_solver(fs): solver.solve() assert solver.snes.ksp.getIterationNumber() == 1 - assert norm(u_exact-uh, "H1") < 1.0E-8 + if homogenize: + with uh.dat.vec_ro as uvec: + assert uvec.norm() < 1E-8 + else: + assert norm(u_exact-uh, "H1") < 1.0E-8