From 2921f7d1151c0d91bb6b04ee43924055ab2678f3 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 7 Mar 2023 12:05:07 +0000 Subject: [PATCH 01/75] Sparse FDM preconditioner for the de Rham complex --- firedrake/preconditioners/fdm.py | 1513 ++++++++++++++++++++++++------ tests/regression/test_fdm.py | 149 ++- 2 files changed, 1342 insertions(+), 320 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 826e1f4447..4542178d7c 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -1,11 +1,17 @@ -from functools import lru_cache, partial +from functools import partial, lru_cache +from itertools import product +from pyop2.sparsity import get_preallocation from firedrake.petsc import PETSc from firedrake.preconditioners.base import PCBase +from firedrake.preconditioners.facet_split import split_dofs, restricted_dofs +from firedrake_citations import Citations import firedrake.dmhooks as dmhooks import firedrake +import ctypes import numpy import ufl -from firedrake_citations import Citations +import FIAT +import finat Citations().add("Brubeck2021", """ @misc{Brubeck2021, @@ -18,32 +24,42 @@ } """) -__all__ = ("FDMPC",) +__all__ = ("FDMPC", "PoissonFDMPC") class FDMPC(PCBase): """ A preconditioner for tensor-product elements that changes the shape - functions so that the H^1 Riesz map is diagonalized in the interior of a - Cartesian cell, and assembles a global sparse matrix on which other - preconditioners, such as `ASMStarPC`, can be applied. + functions so that the H(d) Riesz map is sparse on Cartesian cells, + and assembles a global sparse matrix on which other preconditioners, + such as `ASMStarPC`, can be applied. Here we assume that the volume integrals in the Jacobian can be expressed as: - inner(grad(v), alpha(grad(u)))*dx + inner(v, beta(u))*dx + inner(d(v), alpha(d(u)))*dx + inner(v, beta(u))*dx where alpha and beta are linear functions (tensor contractions). - The sparse matrix is obtained by approximating alpha and beta by cell-wise - constants and discarding the coefficients in alpha that couple together - mixed derivatives and mixed components. - - For spaces that are not H^1-conforming, this preconditioner will use - the symmetric interior-penalty DG method. The penalty coefficient can be - provided in the application context, keyed on ``"eta"``. + The sparse matrix is obtained by approximating (v, alpha u) and (v, beta u) as + diagonal mass matrices """ _prefix = "fdm_" + _variant = "fdm" + + _reference_tensor_cache = {} + _coefficient_cache = {} + _c_code_cache = {} + + @staticmethod + def load_set_values(triu=False): + cache = FDMPC._c_code_cache + key = triu + if key not in cache: + comm = PETSc.COMM_SELF + cache[key] = load_assemble_csr(comm, triu=triu) + return cache[key] + @PETSc.Log.EventDecorator("FDMInit") def initialize(self, pc): from firedrake.assemble import allocate_matrix, assemble @@ -52,77 +68,91 @@ def initialize(self, pc): Citations().register("Brubeck2021") self.comm = pc.comm - + Amat, Pmat = pc.getOperators() prefix = pc.getOptionsPrefix() options_prefix = prefix + self._prefix + options = PETSc.Options(options_prefix) + + use_amat = options.getBool("pc_use_amat", True) + pmat_type = options.getString("mat_type", PETSc.Mat.Type.AIJ) + diagonal_scale = options.getBool("diagonal_scale", False) appctx = self.get_appctx(pc) fcp = appctx.get("form_compiler_parameters") + self.appctx = appctx # Get original Jacobian form and bcs - octx = dmhooks.get_appctx(pc.getDM()) - mat_type = octx.mat_type - oproblem = octx._problem - J = oproblem.J - bcs = tuple(oproblem.bcs) + if Pmat.getType() == "python": + ctx = Pmat.getPythonContext() + J = ctx.a + bcs = tuple(ctx.bcs) + mat_type = "matfree" + else: + ctx = dmhooks.get_appctx(pc.getDM()) + J = ctx.Jp or ctx.J + bcs = tuple(ctx._problem.bcs) + mat_type = ctx.mat_type + + if isinstance(J, firedrake.slate.Add): + J = J.children[0].form + assert type(J) == ufl.Form # Transform the problem into the space with FDM shape functions - V = J.arguments()[0].function_space() + V = J.arguments()[-1].function_space() element = V.ufl_element() - e_fdm = element.reconstruct(variant="fdm") - - def interp_nullspace(I, nsp): - if not nsp: - return nsp - vectors = [] - for x in nsp.getVecs(): - y = I.createVecLeft() - I.mult(x, y) - vectors.append(y) - if nsp.hasConstant(): - y = I.createVecLeft() - x = I.createVecRight() - x.set(1.0E0) - I.mult(x, y) - vectors.append(y) - x.destroy() - return PETSc.NullSpace().create(constant=False, vectors=vectors, comm=nsp.getComm()) - - # Matrix-free assembly of the transformed Jacobian + e_fdm = element.reconstruct(variant=self._variant) + if element == e_fdm: V_fdm, J_fdm, bcs_fdm = (V, J, bcs) - Amat, _ = pc.getOperators() - self._ctx_ref = octx else: + # Matrix-free assembly of the transformed Jacobian V_fdm = firedrake.FunctionSpace(V.mesh(), e_fdm) - J_fdm = ufl.replace(J, {t: t.reconstruct(function_space=V_fdm) for t in J.arguments()}) - bcs_fdm = tuple(bc.reconstruct(V=V_fdm) for bc in bcs) - self.fdm_interp = prolongation_matrix_matfree(V, V_fdm, [], bcs_fdm) - self.A = allocate_matrix(J_fdm, bcs=bcs_fdm, form_compiler_parameters=fcp, mat_type=mat_type, - options_prefix=options_prefix) - self._assemble_A = partial(assemble, J_fdm, tensor=self.A, bcs=bcs_fdm, - form_compiler_parameters=fcp, mat_type=mat_type) - self._assemble_A() - Amat = self.A.petscmat + J_fdm = J(*[t.reconstruct(function_space=V_fdm) for t in J.arguments()], coefficients={}) + bcs_fdm = tuple(bc.reconstruct(V=V_fdm, g=0) for bc in bcs) - omat, _ = pc.getOperators() - inject = prolongation_matrix_matfree(V_fdm, V, [], []) - Amat.setNullSpace(interp_nullspace(inject, omat.getNullSpace())) - Amat.setTransposeNullSpace(interp_nullspace(inject, omat.getTransposeNullSpace())) - Amat.setNearNullSpace(interp_nullspace(inject, omat.getNearNullSpace())) + self.fdm_interp = prolongation_matrix_matfree(V, V_fdm, [], bcs_fdm) self.work_vec_x = Amat.createVecLeft() self.work_vec_y = Amat.createVecRight() - + if use_amat: + omat = Amat + self.A = allocate_matrix(J_fdm, bcs=bcs_fdm, form_compiler_parameters=fcp, + mat_type=mat_type, options_prefix=options_prefix) + self._assemble_A = partial(assemble, J_fdm, tensor=self.A, bcs=bcs_fdm, + form_compiler_parameters=fcp, mat_type=mat_type) + self._assemble_A() + Amat = self.A.petscmat + + def interp_nullspace(I, nsp): + if not nsp.handle: + return nsp + vectors = [] + for x in nsp.getVecs(): + y = I.createVecLeft() + I.mult(x, y) + vectors.append(y) + if nsp.hasConstant(): + y = I.createVecLeft() + x = I.createVecRight() + x.set(1.0E0) + I.mult(x, y) + vectors.append(y) + x.destroy() + return PETSc.NullSpace().create(constant=False, vectors=vectors, comm=nsp.getComm()) + + inject = prolongation_matrix_matfree(V_fdm, V, [], []) + Amat.setNullSpace(interp_nullspace(inject, omat.getNullSpace())) + Amat.setTransposeNullSpace(interp_nullspace(inject, omat.getTransposeNullSpace())) + Amat.setNearNullSpace(interp_nullspace(inject, omat.getNearNullSpace())) + + if len(bcs) > 0: + self.bc_nodes = numpy.unique(numpy.concatenate([bcdofs(bc, ghost=False) for bc in bcs])) + else: + self.bc_nodes = numpy.empty(0, dtype=PETSc.IntType) self._ctx_ref = self.new_snes_ctx(pc, J_fdm, bcs_fdm, mat_type, fcp=fcp, options_prefix=options_prefix) - if len(bcs) > 0: - self.bc_nodes = numpy.unique(numpy.concatenate([bcdofs(bc, ghost=False) for bc in bcs])) - else: - self.bc_nodes = numpy.empty(0, dtype=PETSc.IntType) - # Assemble the FDM preconditioner with sparse local matrices - Pmat, self._assemble_P = self.assemble_fdm_op(V_fdm, J_fdm, bcs_fdm, appctx) + Pmat, self._assemble_P = self.assemble_fdm_op(V_fdm, J_fdm, bcs_fdm, fcp, appctx, pmat_type, diagonal_scale) self._assemble_P() Pmat.setNullSpace(Amat.getNullSpace()) Pmat.setTransposeNullSpace(Amat.getTransposeNullSpace()) @@ -136,18 +166,174 @@ def interp_nullspace(I, nsp): # We set a DM and an appropriate SNESContext on the constructed PC so one # can do e.g. multigrid or patch solves. - fdm_dm = V_fdm.dm - self._dm = fdm_dm - - fdmpc.setDM(fdm_dm) + self._dm = V_fdm.dm + fdmpc.setDM(self._dm) fdmpc.setOptionsPrefix(options_prefix) fdmpc.setOperators(A=Amat, P=Pmat) - fdmpc.setUseAmat(True) + fdmpc.setUseAmat(use_amat) self.pc = fdmpc - - with dmhooks.add_hooks(fdm_dm, self, appctx=self._ctx_ref, save=False): + if hasattr(self, "_ctx_ref"): + with dmhooks.add_hooks(self._dm, self, appctx=self._ctx_ref, save=False): + fdmpc.setFromOptions() + else: fdmpc.setFromOptions() + @PETSc.Log.EventDecorator("FDMPrealloc") + def assemble_fdm_op(self, V, J, bcs, form_compiler_parameters, appctx, pmat_type, diagonal_scale): + """ + Assemble the sparse preconditioner with cell-wise constant coefficients. + + :arg V: the :class:`firedrake.FunctionSpace` of the form arguments + :arg J: the Jacobian bilinear form + :arg bcs: an iterable of boundary conditions on V + :arg appctx: the application context + + :returns: 2-tuple with the preconditioner :class:`PETSc.Mat` and its assembly callable + """ + ifacet, = numpy.nonzero([is_restricted(Vsub.finat_element)[1] for Vsub in V]) + if len(ifacet) == 0: + Vfacet = None + Vbig = V + _, fdofs = split_dofs(V.finat_element) + elif len(ifacet) == 1: + Vfacet = V[ifacet[0]] + ebig, = set(unrestrict_element(Vsub.ufl_element()) for Vsub in V) + Vbig = firedrake.FunctionSpace(V.mesh(), ebig) + if len(V) > 1: + dims = [Vsub.finat_element.space_dimension() for Vsub in V] + assert sum(dims) == Vbig.finat_element.space_dimension() + fdofs = restricted_dofs(Vfacet.finat_element, Vbig.finat_element) + else: + raise ValueError("Expecting at most one FunctionSpace restricted onto facets.") + + value_size = Vbig.value_size + if value_size != 1: + fdofs = numpy.add.outer(value_size * fdofs, numpy.arange(value_size, dtype=fdofs.dtype)) + dofs = numpy.arange(value_size * Vbig.finat_element.space_dimension(), dtype=fdofs.dtype) + idofs = numpy.setdiff1d(dofs, fdofs, assume_unique=True) + self.ises = tuple(PETSc.IS().createGeneral(indices, comm=PETSc.COMM_SELF) for indices in (idofs, fdofs)) + self.submats = [None for _ in range(7)] + + self.reference_tensor_on_diag = dict() + self.get_static_condensation = dict() + if Vfacet: + # If we are in a facet space, we build the Schur complement on its diagonal block + self.reference_tensor_on_diag[Vfacet] = self.assemble_reference_tensor(Vbig) + self.get_static_condensation[Vfacet] = lambda A: condense_element_mat(A, self.ises[0], self.ises[1], self.submats) + + elif len(fdofs) and V.finat_element.formdegree == 0: + # If we are in H(grad), we just pad with zeros on the statically-condensed pattern + i1 = PETSc.IS().createGeneral(dofs, comm=PETSc.COMM_SELF) + self.get_static_condensation[V] = lambda Ae: condense_element_pattern(Ae, self.ises[0], i1, self.submats) + + # dict of cell to global mappings for each function space + self.cell_to_global = dict() + self.lgmaps = dict() + + @PETSc.Log.EventDecorator("FDMGetIndices") + def cell_to_global(lgmap, cell_to_local, cell_index, result=None): + result = cell_to_local(cell_index, result=result) + return lgmap.apply(result, result=result) + + bc_rows = dict() + for Vsub in V: + lgmap = Vsub.local_to_global_map([bc.reconstruct(V=Vsub, g=0) for bc in bcs]) + bsize = Vsub.dof_dset.layout_vec.getBlockSize() + cell_to_local, nel = glonum_fun(Vsub.cell_node_map(), bsize=bsize) + self.cell_to_global[Vsub] = partial(cell_to_global, lgmap, cell_to_local) + self.lgmaps[Vsub] = lgmap + + own = Vsub.dof_dset.layout_vec.getLocalSize() + bdofs = numpy.nonzero(lgmap.indices[:own] < 0)[0].astype(PETSc.IntType) + bc_rows[Vsub] = Vsub.dof_dset.lgmap.apply(bdofs, result=bdofs) + + # get coefficients on a given cell + coefficients, assembly_callables = self.assemble_coef(J, form_compiler_parameters) + coeffs = [coefficients.get(k) for k in ("beta", "alpha")] + cmaps = [glonum_fun(ck.cell_node_map())[0] for ck in coeffs] + + @PETSc.Log.EventDecorator("FDMGetCoeffs") + def get_coeffs(e, result=None): + vals = [] + for k, (coeff, cmap) in enumerate(zip(coeffs, cmaps)): + get_coeffs.indices[k] = cmap(e, result=get_coeffs.indices[k]) + vals.append(coeff.dat.data_ro[get_coeffs.indices[k]]) + return numpy.concatenate(vals, out=result) + get_coeffs.indices = [None for _ in range(len(coeffs))] + self.get_coeffs = get_coeffs + + self.nel = nel + self.work_mats = dict() + + Pmats = dict() + addv = PETSc.InsertMode.ADD_VALUES + symmetric = pmat_type.endswith("sbaij") + + # Store only off-diagonal blocks with more columns than rows to save memory + Vsort = sorted(V, key=lambda Vsub: Vsub.dim()) + for Vrow, Vcol in product(Vsort, Vsort): + if symmetric and (Vcol, Vrow) in Pmats: + P = PETSc.Mat().createTranspose(Pmats[Vcol, Vrow]) + else: + on_diag = Vrow == Vcol + triu = on_diag and symmetric + ptype = pmat_type if on_diag else PETSc.Mat.Type.AIJ + sizes = tuple(Vsub.dof_dset.layout_vec.getSizes() for Vsub in (Vrow, Vcol)) + # bsizes = tuple(Vsub.dof_dset.layout_vec.getBlockSize() for Vsub in (Vrow, Vcol)) + + preallocator = PETSc.Mat().create(comm=self.comm) + preallocator.setType(PETSc.Mat.Type.PREALLOCATOR) + preallocator.setSizes(sizes) + preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False) + preallocator.setUp() + self.set_values(preallocator, Vrow, Vcol, addv, triu=triu) + preallocator.assemble() + d_nnz, o_nnz = get_preallocation(preallocator, sizes[0][0]) + preallocator.destroy() + if on_diag: + numpy.maximum(d_nnz, 1, out=d_nnz) + + P = PETSc.Mat().create(comm=self.comm) + P.setType(ptype) + P.setSizes(sizes) + # P.setBlockSizes(*bsizes) + P.setPreallocationNNZ((d_nnz, o_nnz)) + P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) + if ptype.endswith("sbaij"): + P.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True) + P.setUp() + Pmats[Vrow, Vcol] = P + + if len(V) == 1: + Pmat = Pmats[V, V] + else: + Pmat = PETSc.Mat().createNest([[Pmats[Vrow, Vcol] for Vcol in V] for Vrow in V], comm=V.comm) + + self.diag = None + + @PETSc.Log.EventDecorator("FDMAssemble") + def assemble_P(): + for _assemble in assembly_callables: + _assemble() + for Vrow, Vcol in product(Vsort, Vsort): + P = Pmats[Vrow, Vcol] + if P.getType().endswith("aij"): + P.zeroEntries() + if Vrow == Vcol and len(bc_rows[Vrow]) > 0: + rows = bc_rows[Vrow][:, None] + vals = numpy.ones(rows.shape, dtype=PETSc.RealType) + P.setValuesRCV(rows, rows, vals, addv) + self.set_values(P, Vrow, Vcol, addv) + Pmat.assemble() + if diagonal_scale: + diag = Pmat.getDiagonal(result=self.diag) + diag.sqrtabs() + diag.reciprocal() + Pmat.diagonalScale(L=diag, R=diag) + self.diag = diag + + return Pmat, assemble_P + @PETSc.Log.EventDecorator("FDMUpdate") def update(self, pc): if hasattr(self, "A"): @@ -155,26 +341,24 @@ def update(self, pc): self._assemble_P() def apply(self, pc, x, y): - dm = self._dm - with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref): - if hasattr(self, "fdm_interp"): - self.fdm_interp.multTranspose(x, self.work_vec_x) + if hasattr(self, "_ctx_ref"): + self.fdm_interp.multTranspose(x, self.work_vec_x) + with dmhooks.add_hooks(self._dm, self, appctx=self._ctx_ref): self.pc.apply(self.work_vec_x, self.work_vec_y) - self.fdm_interp.mult(self.work_vec_y, y) - y.array_w[self.bc_nodes] = x.array_r[self.bc_nodes] - else: - self.pc.apply(x, y) + self.fdm_interp.mult(self.work_vec_y, y) + y.array_w[self.bc_nodes] = x.array_r[self.bc_nodes] + else: + self.pc.apply(x, y) def applyTranspose(self, pc, x, y): - dm = self._dm - with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref): - if hasattr(self, "fdm_interp"): - self.fdm_interp.multTranspose(x, self.work_vec_y) + if hasattr(self, "_ctx_ref"): + self.fdm_interp.multTranspose(x, self.work_vec_y) + with dmhooks.add_hooks(self._dm, self, appctx=self._ctx_ref): self.pc.applyTranspose(self.work_vec_y, self.work_vec_x) - self.fdm_interp.mult(self.work_vec_x, y) - y.array_w[self.bc_nodes] = x.array_r[self.bc_nodes] - else: - self.pc.applyTranspose(x, y) + self.fdm_interp.mult(self.work_vec_x, y) + y.array_w[self.bc_nodes] = x.array_r[self.bc_nodes] + else: + self.pc.applyTranspose(x, y) def view(self, pc, viewer=None): super(FDMPC, self).view(pc, viewer) @@ -182,27 +366,770 @@ def view(self, pc, viewer=None): viewer.printfASCII("PC to apply inverse\n") self.pc.view(viewer) - def assemble_fdm_op(self, V, J, bcs, appctx): + def destroy(self, pc): + objs = [] + if hasattr(self, "pc"): + objs.append(self.pc.getOperators()[-1]) + objs.append(self.pc) + if hasattr(self, "submats"): + objs.extend(self.submats) + if hasattr(self, "work_mats"): + objs.extend(list(self.work_mats.values())) + if hasattr(self, "ises"): + objs.extend(self.ises) + for obj in objs: + if hasattr(obj, "destroy"): + obj.destroy() + + @PETSc.Log.EventDecorator("FDMSetValues") + def set_values(self, A, Vrow, Vcol, addv, triu=False): + + def RtAP(R, A, P, result=None): + RtAP.buff = A.matMult(P, result=RtAP.buff) + return R.transposeMatMult(RtAP.buff, result=result) + RtAP.buff = None + + set_values_csr = self.load_set_values(triu=triu) + get_rindices = self.cell_to_global[Vrow] + if Vrow == Vcol: + get_cindices = lambda e, result=None: result + update_A = lambda Ae, rindices, cindices: set_values_csr(A, Ae, rindices, rindices, addv) + rtensor = self.reference_tensor_on_diag.get(Vrow, None) or self.assemble_reference_tensor(Vrow) + assemble_element_mat = lambda De, result=None: De.PtAP(rtensor, result=result) + condense_element_mat = self.get_static_condensation.get(Vrow, None) + else: + get_cindices = self.cell_to_global[Vcol] + update_A = lambda Ae, rindices, cindices: set_values_csr(A, Ae, rindices, cindices, addv) + rtensor = self.assemble_reference_tensor(Vrow) + ctensor = self.assemble_reference_tensor(Vcol) + assemble_element_mat = lambda De, result=None: RtAP(rtensor, De, ctensor, result=result) + condense_element_mat = None + + do_sort = True + if condense_element_mat is None: + condense_element_mat = lambda x: x + do_sort = False + + common_key = "coefs" + rindices = None + cindices = None + if A.getType() != PETSc.Mat.Type.PREALLOCATOR: + Ae = self.work_mats[Vrow, Vcol] + De = self.work_mats[common_key] + data = self.work_csr[2] + insert = PETSc.InsertMode.INSERT + work_vec = De.getDiagonal() + if len(data.shape) == 3: + @PETSc.Log.EventDecorator("FDMUpdateDiag") + def update_De(data): + De.setValuesCSR(*self.work_csr, addv=insert) + De.assemble() + return De + else: + @PETSc.Log.EventDecorator("FDMUpdateDiag") + def update_De(data): + work_vec.setArray(data) + De.setDiagonal(work_vec, addv=insert) + return De + + for e in range(self.nel): + rindices = get_rindices(e, result=rindices) + cindices = get_cindices(e, result=cindices) + data = self.get_coeffs(e, result=data) + Ae = assemble_element_mat(update_De(data), result=Ae) + update_A(condense_element_mat(Ae), rindices, cindices) + + work_vec.destroy() + + elif self.nel: + if common_key not in self.work_mats: + data = self.get_coeffs(0) + data.fill(1.0E0) + shape = data.shape + (1,)*(3-len(data.shape)) + nrows = shape[0] * shape[1] + ai = numpy.arange(nrows+1, dtype=PETSc.IntType) + aj = numpy.tile(ai[:-1].reshape((-1, shape[1])), (1, shape[2])) + if shape[2] > 1: + ai *= shape[2] + data = numpy.tile(numpy.eye(shape[2]), shape[:1] + (1,)*(len(shape)-1)) + + self.work_csr = (ai, aj, data) + De = PETSc.Mat().createAIJ((nrows, nrows), csr=self.work_csr, comm=PETSc.COMM_SELF) + self.work_mats[common_key] = De + + De = self.work_mats[common_key] + Ae = assemble_element_mat(De, result=None) + self.work_mats[Vrow, Vcol] = Ae + if do_sort: + sort_interior_dofs(self.ises[0], Ae) + Se = condense_element_mat(Ae) + + for e in range(self.nel): + rindices = get_rindices(e, result=rindices) + cindices = get_cindices(e, result=cindices) + update_A(Se, rindices, cindices) + else: + self.work_csr = (None, None, None) + self.work_mats[common_key] = None + self.work_mats[Vrow, Vcol] = None + if RtAP.buff: + RtAP.buff.destroy() + + @PETSc.Log.EventDecorator("FDMCoefficients") + def assemble_coef(self, J, form_compiler_parameters): """ - Assemble the sparse preconditioner with cell-wise constant coefficients. + Obtain coefficients as the diagonal of a weighted mass matrix in V^k x V^{k+1} + """ + from ufl.algorithms.ad import expand_derivatives + from ufl.algorithms.expand_indices import expand_indices + from firedrake.formmanipulation import ExtractSubBlock + from firedrake.assemble import assemble - :arg V: the :class:`~.FunctionSpace` of the form arguments - :arg J: the Jacobian bilinear form - :arg bcs: an iterable of boundary conditions on V - :arg appctx: the application context + index = len(J.arguments()[-1].function_space())-1 + if index: + splitter = ExtractSubBlock() + J = splitter.split(J, argument_indices=(index, index)) - :returns: 2-tuple with the preconditioner :class:`PETSc.Mat` and its assembly callable - """ - from pyop2.sparsity import get_preallocation + mesh = J.ufl_domain() + ndim = mesh.topological_dimension() + args_J = J.arguments() + e = args_J[0].ufl_element() + if isinstance(e, (ufl.VectorElement, ufl.TensorElement)): + e = e._sub_element + e = unrestrict_element(e) + sobolev = e.sobolev_space() + + map_grad = None + if sobolev == ufl.H1: + map_grad = lambda p: p + elif sobolev in [ufl.HCurl, ufl.HDiv]: + u = ufl.Coefficient(ufl.FunctionSpace(mesh, e)) + du = ufl.variable(ufl.grad(u)) + dku = ufl.div(u) if sobolev == ufl.HDiv else ufl.curl(u) + eps = expand_derivatives(ufl.diff(ufl.replace(expand_derivatives(dku), {ufl.grad(u): du}), du)) + if sobolev == ufl.HDiv: + map_grad = lambda p: ufl.outer(p, eps/ndim) + elif len(eps.ufl_shape) == 3: + map_grad = lambda p: ufl.dot(p, eps/2) + else: + map_grad = lambda p: p*(eps/2) + + V = args_J[0].function_space() + formdegree = V.finat_element.formdegree + degree = e.degree() + try: + degree = max(degree) + except TypeError: + pass + qdeg = degree + if formdegree == ndim: + qfam = "DG" if ndim == 1 else "DQ" + qdeg = 0 + elif formdegree == 0: + qfam = "DG" if ndim == 1 else "RTCE" if ndim == 2 else "NCE" + elif formdegree == 1 and ndim == 3: + qfam = "NCF" + else: + qfam = "DQ L2" + qdeg = degree - 1 + + qvariant = "fdm_quadrature" + elements = [e.reconstruct(variant=qvariant), + ufl.FiniteElement(qfam, cell=mesh.ufl_cell(), degree=qdeg, variant=qvariant)] + elements = list(map(ufl.BrokenElement, elements)) + if V.shape: + elements = [ufl.TensorElement(ele, shape=V.shape) for ele in elements] + + Z = firedrake.FunctionSpace(mesh, ufl.MixedElement(elements)) + args = (firedrake.TestFunctions(Z), firedrake.TrialFunctions(Z)) + repargs = {t: v[0] for t, v in zip(args_J, args)} + repgrad = {ufl.grad(t): map_grad(v[1]) for t, v in zip(args_J, args)} if map_grad else dict() + Jcell = expand_indices(expand_derivatives(ufl.Form(J.integrals_by_type("cell")))) + mixed_form = ufl.replace(ufl.replace(Jcell, repgrad), repargs) + + key = (mixed_form.signature(), mesh) + block_diagonal = True + + if key not in self._coefficient_cache and False: + M = assemble(mixed_form, mat_type="matfree", + form_compiler_parameters=form_compiler_parameters) + + coefs = [] + mats = [] + for iset in Z.dof_dset.field_ises: + Msub = M.petscmat.createSubMatrix(iset, iset) + coefs.append(Msub.getPythonContext()._diagonal) + mats.append(Msub) + + def scale_coefficients(): + for Msub, coef in zip(mats, coefs): + ksp = PETSc.KSP().create(comm=V.comm) + ksp.setOperators(A=Msub, P=Msub) + ksp.setType(PETSc.KSP.Type.CG) + ksp.setNormType(PETSc.KSP.NormType.NATURAL) + ksp.pc.setType(PETSc.PC.Type.JACOBI) + ksp.setTolerances(rtol=1E-3, atol=0.0E0, max_it=8) + ksp.setComputeEigenvalues(True) + ksp.setUp() + + x = Msub.createVecRight() + b = Msub.createVecLeft() + x.set(0) + b.setRandom() + ksp.solve(b, x) + ew = numpy.real(ksp.computeEigenvalues()) + ksp.destroy() + x.destroy() + b.destroy() + dscale = (max(ew) + min(ew))/2 + dscale = sum(ew) / len(ew) + scale = dscale if dscale == dscale else 1 + with coef.dat.vec as diag: + diag.scale(scale) + + coefficients = {"beta": coefs[0], "alpha": coefs[1]} + assembly_callables = [scale_coefficients] + self._coefficient_cache[key] = (coefficients, assembly_callables) + return self._coefficient_cache[key] + + if key not in self._coefficient_cache: + if not block_diagonal or not V.shape: + tensor = firedrake.Function(Z) + coefficients = {"beta": tensor.sub(0), "alpha": tensor.sub(1)} + assembly_callables = [partial(assemble, mixed_form, tensor=tensor, diagonal=True, + form_compiler_parameters=form_compiler_parameters)] + else: + M = assemble(mixed_form, mat_type="matfree", + form_compiler_parameters=form_compiler_parameters) + coefficients = dict() + assembly_callables = [] + for iset, name in zip(Z.dof_dset.field_ises, ("beta", "alpha")): + sub = M.petscmat.createSubMatrix(iset, iset) + ctx = sub.getPythonContext() + coefficients[name] = ctx._block_diagonal + assembly_callables.append(ctx._assemble_block_diagonal) + + self._coefficient_cache[key] = (coefficients, assembly_callables) + return self._coefficient_cache[key] + + @PETSc.Log.EventDecorator("FDMRefTensor") + def assemble_reference_tensor(self, V): + ndim = V.mesh().topological_dimension() + value_size = V.value_size + formdegree = V.finat_element.formdegree + degree = V.finat_element.degree + try: + degree = max(degree) + except TypeError: + pass + if formdegree == ndim: + degree = degree + 1 + is_interior, is_facet = is_restricted(V.finat_element) + key = (degree, ndim, formdegree, V.value_size, is_interior, is_facet) + cache = self._reference_tensor_cache + if key not in cache: + full_key = (degree, ndim, formdegree, V.value_size, 0, 0) + + if is_facet and full_key in cache: + result = cache[full_key] + noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.comm) + cache[key] = result.createSubMatrix(noperm, self.ises[1]) + noperm.destroy() + return cache[key] + + elements = sorted(get_base_elements(V.finat_element), key=lambda e: e.formdegree) + ref_el = elements[0].get_reference_element() + eq = FIAT.FDMQuadrature(ref_el, degree) + e0 = elements[0] if elements[0].formdegree == 0 else FIAT.FDMLagrange(ref_el, degree) + e1 = elements[-1] if elements[-1].formdegree == 1 else FIAT.FDMDiscontinuousLagrange(ref_el, degree-1) + if is_interior: + e0 = FIAT.RestrictedElement(e0, restriction_domain="interior") + if hasattr(eq.dual, "rule"): + rule = eq.dual.rule + else: + rule = FIAT.quadrature.make_quadrature(ref_el, degree+1) + + pts = rule.get_points() + wts = rule.get_weights() + + phiq = eq.tabulate(0, pts) + phi1 = e1.tabulate(0, pts) + phi0 = e0.tabulate(1, pts) + + moments = lambda v, u: numpy.dot(numpy.multiply(v, wts), u.T) + A00 = moments(phiq[(0, )], phi0[(0, )]) + A11 = moments(phi1[(0, )], phi1[(0, )]) + A10 = moments(phi1[(0, )], phi0[(1, )]) + A10 = numpy.linalg.solve(A11, A10) + A11 = numpy.eye(A11.shape[0]) + + Ihat = mass_matrix(ndim, formdegree, A00, A11) + Dhat = diff_matrix(ndim, formdegree, A00, A11, A10) + result = block_mat([[Ihat], [Dhat]]) + Ihat.destroy() + Dhat.destroy() + + if value_size != 1: + eye = petsc_sparse(numpy.eye(value_size)) + temp = result + result = temp.kron(eye) + temp.destroy() + eye.destroy() + + if is_facet: + cache[full_key] = result + noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.comm) + result = result.createSubMatrix(noperm, self.ises[1]) + noperm.destroy() + + cache[key] = result + return cache[key] + + +def factor_interior_mat(A00): + # Assume that interior DOF list i0 is ordered such that A00 is block diagonal + # with blocks of increasing dimension + indptr, indices, data = A00.getValuesCSR() + degree = numpy.diff(indptr) + + # TODO handle non-symmetric case with LU, requires scipy + invchol = lambda X: numpy.linalg.inv(numpy.linalg.cholesky(X)) + nblocks = numpy.count_nonzero(degree == 1) + zlice = slice(0, nblocks) + numpy.sqrt(data[zlice], out=data[zlice]) + numpy.reciprocal(data[zlice], out=data[zlice]) + PETSc.Log.logFlops(2*nblocks) + for k in range(2, degree[-1]+1): + nblocks = numpy.count_nonzero(degree == k) + zlice = slice(zlice.stop, zlice.stop + k*nblocks) + data[zlice] = invchol(data[zlice].reshape((-1, k, k))).reshape((-1,)) + flops = ((k+1)**3 + 5*(k+1)-12)//3 + k**3 + PETSc.Log.logFlops(flops*nblocks) + + A00.setValuesCSR(indptr, indices, data) + A00.assemble() + + +@PETSc.Log.EventDecorator("FDMCondense") +def condense_element_mat(A, i0, i1, submats): + isrows = [i0, i0, i1, i1] + iscols = [i0, i1, i0, i1] + submats[:4] = A.createSubMatrices(isrows, iscols=iscols, submats=submats[:4] if submats[0] else None) + A00, A01, A10, A11 = submats[:4] + factor_interior_mat(A00) + submats[4] = A00.matMult(A01, result=submats[4]) + submats[5] = A10.matTransposeMult(A00, result=submats[5]) + submats[6] = submats[5].matMult(submats[4], result=submats[6]) + submats[6].aypx(-1.0, A11) + return submats[6] + + +@PETSc.Log.EventDecorator("FDMCondense") +def condense_element_pattern(A, i0, i1, submats): + isrows = [i0, i0, i1] + iscols = [i0, i1, i0] + submats[:3] = A.createSubMatrices(isrows, iscols=iscols, submats=submats[:3] if submats[0] else None) + A00, A01, A10 = submats[:3] + submats[4] = A10.matTransposeMult(A00, result=submats[4]) + submats[5] = A00.matMult(A01, result=submats[5]) + submats[6] = submats[4].matMult(submats[5], result=submats[6]) + submats[6].aypx(0.0, A) + return submats[6] + + +@PETSc.Log.EventDecorator("LoadCode") +def load_c_code(code, name, **kwargs): + from pyop2.compilation import load + from pyop2.utils import get_petsc_dir + cppargs = ["-I%s/include" % d for d in get_petsc_dir()] + ldargs = (["-L%s/lib" % d for d in get_petsc_dir()] + + ["-Wl,-rpath,%s/lib" % d for d in get_petsc_dir()] + + ["-lpetsc", "-lm"]) + funptr = load(code, "c", name, + cppargs=cppargs, ldargs=ldargs, + **kwargs) + + def get_pointer(obj): + if isinstance(obj, (PETSc.Mat, PETSc.Vec)): + return obj.handle + elif isinstance(obj, numpy.ndarray): + return obj.ctypes.data + return obj + + @PETSc.Log.EventDecorator(name) + def wrapper(*args): + return funptr(*list(map(get_pointer, args))) + return wrapper + + +def load_assemble_csr(comm, triu=False): + if triu: + name = "setSubMatCSR_SBAIJ" + select_cols = "icol < irow ? -1: icol" + else: + name = "setSubMatCSR_AIJ" + select_cols = "icol" + code = f""" +#include + +PetscErrorCode {name}(Mat A, + Mat B, + PetscInt *rindices, + PetscInt *cindices, + InsertMode addv) +{{ + PetscInt ncols, irow, icol; + PetscInt *cols, *indices; + PetscScalar *vals; + + PetscInt m, n; + PetscErrorCode ierr; + PetscFunctionBeginUser; + MatGetSize(B, &m, NULL); + + n = 0; + for (PetscInt i = 0; i < m; i++) {{ + ierr = MatGetRow(B, i, &ncols, NULL, NULL);CHKERRQ(ierr); + n = ncols > n ? ncols : n; + ierr = MatRestoreRow(B, i, &ncols, NULL, NULL);CHKERRQ(ierr); + }} + PetscMalloc1(n, &indices); + for (PetscInt i = 0; i < m; i++) {{ + ierr = MatGetRow(B, i, &ncols, &cols, &vals);CHKERRQ(ierr); + irow = rindices[i]; + for (PetscInt j = 0; j < ncols; j++) {{ + icol = cindices[cols[j]]; + indices[j] = {select_cols}; + }} + ierr = MatSetValues(A, 1, &irow, ncols, indices, vals, addv);CHKERRQ(ierr); + ierr = MatRestoreRow(B, i, &ncols, &cols, &vals);CHKERRQ(ierr); + }} + PetscFree(indices); + PetscFunctionReturn(0); +}} +""" + argtypes = [ctypes.c_voidp, ctypes.c_voidp, + ctypes.c_voidp, ctypes.c_voidp, ctypes.c_int] + return load_c_code(code, name, comm=comm, argtypes=argtypes, + restype=ctypes.c_int) + + +def petsc_sparse(A_numpy, rtol=1E-10): + Amax = max(A_numpy.min(), A_numpy.max(), key=abs) + atol = rtol*Amax + nnz = numpy.count_nonzero(abs(A_numpy) > atol, axis=1).astype(PETSc.IntType) + A = PETSc.Mat().createAIJ(A_numpy.shape, nnz=(nnz, 0), comm=PETSc.COMM_SELF) + for row, Arow in enumerate(A_numpy): + cols = numpy.argwhere(abs(Arow) > atol).astype(PETSc.IntType).flat + A.setValues(row, cols, Arow[cols], PETSc.InsertMode.INSERT) + A.assemble() + return A + + +def block_mat(A_blocks): + if len(A_blocks) == 1: + if len(A_blocks[0]) == 1: + return A_blocks[0][0] + + nrows = sum([Arow[0].size[0] for Arow in A_blocks]) + ncols = sum([Aij.size[1] for Aij in A_blocks[0]]) + nnz = numpy.concatenate([sum([numpy.diff(Aij.getValuesCSR()[0]) for Aij in Arow]) for Arow in A_blocks]) + A = PETSc.Mat().createAIJ((nrows, ncols), nnz=(nnz, 0), comm=PETSc.COMM_SELF) + imode = PETSc.InsertMode.INSERT + insert_block = FDMPC.load_set_values() + rsizes = [sum([Ai[0].size[0] for Ai in A_blocks[:k]]) for k in range(len(A_blocks)+1)] + csizes = [sum([Aij.size[1] for Aij in A_blocks[0][:k]]) for k in range(len(A_blocks[0])+1)] + rows = [numpy.arange(*rsizes[i:i+2], dtype=PETSc.IntType) for i in range(len(A_blocks))] + cols = [numpy.arange(*csizes[j:j+2], dtype=PETSc.IntType) for j in range(len(A_blocks[0]))] + for Ai, irows in zip(A_blocks, rows): + for Aij, jcols in zip(Ai, cols): + insert_block(A, Aij, irows, jcols, imode) + + A.assemble() + return A + + +def is_restricted(finat_element): + is_interior = True + is_facet = True + tdim = finat_element.cell.get_spatial_dimension() + entity_dofs = finat_element.entity_dofs() + for edim in sorted(entity_dofs): + v = sum(list(entity_dofs[edim].values()), []) + if len(v): + try: + edim = sum(edim) + except TypeError: + pass + if edim == tdim: + is_facet = False + else: + is_interior = False + return is_interior, is_facet + + +def sort_interior_dofs(idofs, A): + Aii = A.createSubMatrix(idofs, idofs) + indptr, indices, _ = Aii.getValuesCSR() + n = idofs.getSize() + visit = numpy.zeros((n, ), dtype=bool) + perm = [] + degree = 0 + while not visit.all(): + degree += 1 + for i in range(n): + if not visit[i]: + neigh = indices[slice(*indptr[i:i+2])] + if len(neigh) == degree: + visit[neigh] = True + perm.extend(neigh) + + idofs.setIndices(idofs.getIndices()[perm]) + + +def kron3(A, B, C, scale=None): + temp = B.kron(C) + if scale is not None: + temp.scale(scale) + result = A.kron(temp) + temp.destroy() + return result + + +def mass_matrix(ndim, formdegree, B00, B11): + B00 = petsc_sparse(B00) + B11 = petsc_sparse(B11) + if ndim == 1: + B_blocks = [B11 if formdegree else B00] + elif ndim == 2: + if formdegree == 0: + B_blocks = [B00.kron(B00)] + elif formdegree == 1: + B_blocks = [B00.kron(B11), B11.kron(B00)] + else: + B_blocks = [B11.kron(B11)] + elif ndim == 3: + if formdegree == 0: + B_blocks = [kron3(B00, B00, B00)] + elif formdegree == 1: + B_blocks = [kron3(B00, B00, B11), kron3(B00, B11, B00), kron3(B11, B00, B00)] + elif formdegree == 2: + B_blocks = [kron3(B00, B11, B11), kron3(B11, B00, B11), kron3(B11, B11, B00)] + else: + B_blocks = [kron3(B11, B11, B11)] + + B00.destroy() + B11.destroy() + if len(B_blocks) == 1: + result = B_blocks[0] + else: + nrows = sum(Bk.size[0] for Bk in B_blocks) + ncols = sum(Bk.size[1] for Bk in B_blocks) + csr_block = [Bk.getValuesCSR() for Bk in B_blocks] + ishift = numpy.cumsum([0] + [csr[0][-1] for csr in csr_block]) + jshift = numpy.cumsum([0] + [Bk.size[1] for Bk in B_blocks]) + indptr = numpy.concatenate([csr[0][bool(shift):]+shift for csr, shift in zip(csr_block, ishift[:-1])]) + indices = numpy.concatenate([csr[1]+shift for csr, shift in zip(csr_block, jshift[:-1])]) + data = numpy.concatenate([csr[2] for csr in csr_block]) + result = PETSc.Mat().createAIJ((nrows, ncols), csr=(indptr, indices, data), comm=PETSc.COMM_SELF) + for B in B_blocks: + B.destroy() + return result + + +def diff_matrix(ndim, formdegree, A00, A11, A10): + if formdegree == ndim: + ncols = A10.shape[0]**ndim + A_zero = PETSc.Mat().createAIJ((1, ncols), nnz=(0, 0), comm=PETSc.COMM_SELF) + A_zero.assemble() + return A_zero + + A00 = petsc_sparse(A00) + A11 = petsc_sparse(A11) + A10 = petsc_sparse(A10) + if ndim == 1: + return A10 + elif ndim == 2: + if formdegree == 0: + A_blocks = [[A00.kron(A10)], [A10.kron(A00)]] + elif formdegree == 1: + A_blocks = [[A10.kron(A11), A11.kron(A10)]] + A_blocks[-1][-1].scale(-1) + elif ndim == 3: + if formdegree == 0: + A_blocks = [[kron3(A00, A00, A10)], [kron3(A00, A10, A00)], [kron3(A10, A00, A00)]] + elif formdegree == 1: + size = tuple(A11.getSize()[k] * A10.getSize()[k] * A00.getSize()[k] for k in range(2)) + A_zero = PETSc.Mat().createAIJ(size, nnz=(0, 0), comm=PETSc.COMM_SELF) + A_zero.assemble() + A_blocks = [[kron3(A00, A10, A11, scale=-1), kron3(A00, A11, A10), A_zero], + [kron3(A10, A00, A11, scale=-1), A_zero, kron3(A11, A00, A10)], + [A_zero, kron3(A10, A11, A00), kron3(A11, A10, A00, scale=-1)]] + elif formdegree == 2: + A_blocks = [[kron3(A10, A11, A11, scale=-1), kron3(A11, A10, A11), kron3(A11, A11, A10)]] + + A00.destroy() + A11.destroy() + A10.destroy() + result = block_mat(A_blocks) + for A_row in A_blocks: + for A in A_row: + A.destroy() + return result + + +def diff_prolongator(Vf, Vc, fbcs=[], cbcs=[]): + from tsfc.finatinterface import create_element + from firedrake.preconditioners.pmg import fiat_reference_prolongator + + ef = Vf.finat_element + ec = Vc.finat_element + if ef.formdegree - ec.formdegree != 1: + raise ValueError("Expecting Vf = d(Vc)") + + elements = list(set(get_base_elements(ec) + get_base_elements(ef))) + elements = sorted(elements, key=lambda e: e.formdegree) + e0, e1 = elements[::len(elements)-1] + + degree = e0.degree() + A11 = numpy.eye(degree, dtype=PETSc.RealType) + A00 = numpy.eye(degree+1, dtype=PETSc.RealType) + A10 = fiat_reference_prolongator(e1, e0, derivative=True) + + ndim = Vc.mesh().topological_dimension() + Dhat = diff_matrix(ndim, ec.formdegree, A00, A11, A10) + + scalar_element = lambda e: e._sub_element if isinstance(e, (ufl.TensorElement, ufl.VectorElement)) else e + fdofs = restricted_dofs(ef, create_element(unrestrict_element(scalar_element(Vf.ufl_element())))) + cdofs = restricted_dofs(ec, create_element(unrestrict_element(scalar_element(Vc.ufl_element())))) + fises = PETSc.IS().createGeneral(fdofs, comm=PETSc.COMM_SELF) + cises = PETSc.IS().createGeneral(cdofs, comm=PETSc.COMM_SELF) + temp = Dhat + Dhat = temp.createSubMatrix(fises, cises) + fises.destroy() + cises.destroy() + temp.destroy() + if Vf.value_size > 1: + temp = Dhat + eye = petsc_sparse(numpy.eye(Vf.value_size, dtype=PETSc.RealType)) + Dhat = temp.kron(eye) + temp.destroy() + eye.destroy() + + rmap = Vf.local_to_global_map(fbcs) + cmap = Vc.local_to_global_map(cbcs) + rlocal, nel = glonum_fun(Vf.cell_node_map(), bsize=Vf.value_size) + clocal, nel = glonum_fun(Vc.cell_node_map(), bsize=Vc.value_size) + + def cell_to_global(lgmap, cell_to_local, e, result=None): + result = cell_to_local(e, result=result) + return lgmap.apply(result, result=result) + + imode = PETSc.InsertMode.INSERT + update_Dmat = FDMPC.load_set_values() + + sizes = tuple(V.dof_dset.layout_vec.getSizes() for V in (Vf, Vc)) + block_size = Vf.dof_dset.layout_vec.getBlockSize() + preallocator = PETSc.Mat().create(comm=Vf.comm) + preallocator.setType(PETSc.Mat.Type.PREALLOCATOR) + preallocator.setSizes(sizes) + preallocator.setUp() + + rindices = None + cindices = None + for e in range(nel): + rindices = cell_to_global(rmap, rlocal, e, result=rindices) + cindices = cell_to_global(cmap, clocal, e, result=cindices) + update_Dmat(preallocator, Dhat, rindices, cindices, imode) + + preallocator.assemble() + nnz = get_preallocation(preallocator, sizes[0][0]) + preallocator.destroy() + Dmat = PETSc.Mat().createAIJ(sizes, block_size, nnz=nnz, comm=Vf.comm) + Dmat.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) + + for e in range(nel): + rindices = cell_to_global(rmap, rlocal, e, result=rindices) + cindices = cell_to_global(cmap, clocal, e, result=cindices) + update_Dmat(Dmat, Dhat, rindices, cindices, imode) + + Dmat.assemble() + Dhat.destroy() + return Dmat + + +def unrestrict_element(ele): + if isinstance(ele, ufl.VectorElement): + return type(ele)(unrestrict_element(ele._sub_element), dim=ele.num_sub_elements()) + elif isinstance(ele, ufl.TensorElement): + return type(ele)(unrestrict_element(ele._sub_element), shape=ele._shape, symmetry=ele.symmetry()) + elif isinstance(ele, ufl.EnrichedElement): + return type(ele)(*list(dict.fromkeys(unrestrict_element(e) for e in ele._elements))) + elif isinstance(ele, ufl.TensorProductElement): + return type(ele)(*(unrestrict_element(e) for e in ele.sub_elements()), cell=ele.cell()) + elif isinstance(ele, ufl.MixedElement): + return type(ele)(*(unrestrict_element(e) for e in ele.sub_elements())) + elif isinstance(ele, ufl.WithMapping): + return type(ele)(unrestrict_element(ele.wrapee), ele.mapping()) + elif isinstance(ele, ufl.RestrictedElement): + return unrestrict_element(ele._element) + elif isinstance(ele, (ufl.HDivElement, ufl.HCurlElement, ufl.BrokenElement)): + return type(ele)(unrestrict_element(ele._element)) + else: + return ele + + +def get_base_elements(e): + if isinstance(e, finat.EnrichedElement): + return sum(list(map(get_base_elements, e.elements)), []) + elif isinstance(e, finat.TensorProductElement): + return sum(list(map(get_base_elements, e.factors)), []) + elif isinstance(e, finat.cube.FlattenedDimensions): + return get_base_elements(e.product) + elif isinstance(e, (finat.HCurlElement, finat.HDivElement)): + return get_base_elements(e.wrappee) + elif isinstance(e, finat.finiteelementbase.FiniteElementBase): + return get_base_elements(e.fiat_equivalent) + elif isinstance(e, FIAT.RestrictedElement): + return get_base_elements(e._element) + return [e] + + +class PoissonFDMPC(FDMPC): + """ + A preconditioner for tensor-product elements that changes the shape + functions so that the H^1 Riesz map is sparse in the interior of a + Cartesian cell, and assembles a global sparse matrix on which other + preconditioners, such as `ASMStarPC`, can be applied. + + Here we assume that the volume integrals in the Jacobian can be expressed as: + + inner(grad(v), alpha(grad(u)))*dx + inner(v, beta(u))*dx + + where alpha and beta are linear functions (tensor contractions). + The sparse matrix is obtained by approximating alpha and beta by cell-wise + constants and discarding the coefficients in alpha that couple together + mixed derivatives and mixed components. + + For spaces that are not H^1-conforming, this preconditioner will use + the symmetric interior-penalty DG method. The penalty coefficient can be + provided in the application context, keyed on ``"eta"``. + """ + + _variant = "fdm_ipdg" + + def assemble_reference_tensor(self, V): from firedrake.preconditioners.pmg import get_line_elements try: - line_elements = get_line_elements(V) + line_elements, shifts = get_line_elements(V) except ValueError: raise ValueError("FDMPC does not support the element %s" % V.ufl_element()) + line_elements, = line_elements + self.axes_shifts, = shifts + degree = max(e.degree() for e in line_elements) - eta = float(appctx.get("eta", (degree+1)**2)) - quad_degree = 2*degree+1 + eta = float(self.appctx.get("eta", degree*(degree+1))) element = V.finat_element is_dg = element.entity_dofs() == element.entity_closure_dofs() @@ -213,83 +1140,59 @@ def assemble_fdm_op(self, V, J, bcs, appctx): Afdm[:0], Dfdm[:0], bdof[:0] = tuple(zip(fdm_setup_ipdg(e, eta))) if not (e.formdegree or is_dg): Dfdm[0] = None + return Afdm, Dfdm, bdof - # coefficients w.r.t. the reference values - coefficients, self.assembly_callables = self.assemble_coef(J, quad_degree) - # set arbitrary non-zero coefficients for preallocation - for coef in coefficients.values(): - with coef.dat.vec as cvec: - cvec.set(1.0E0) - - bcflags = get_weak_bc_flags(J) - - # preallocate by calling the assembly routine on a PREALLOCATOR Mat - sizes = (V.dof_dset.layout_vec.getSizes(),)*2 - block_size = V.dof_dset.layout_vec.getBlockSize() - prealloc = PETSc.Mat().create(comm=self.comm) - prealloc.setType(PETSc.Mat.Type.PREALLOCATOR) - prealloc.setSizes(sizes) - prealloc.setUp() - self.assemble_kron(prealloc, V, bcs, eta, coefficients, Afdm, Dfdm, bdof, bcflags) - nnz = get_preallocation(prealloc, block_size * V.dof_dset.set.size) - Pmat = PETSc.Mat().createAIJ(sizes, block_size, nnz=nnz, comm=self.comm) - Pmat.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) - assemble_P = partial(self.assemble_kron, Pmat, V, bcs, eta, - coefficients, Afdm, Dfdm, bdof, bcflags) - prealloc.destroy() - return Pmat, assemble_P - - def assemble_kron(self, A, V, bcs, eta, coefficients, Afdm, Dfdm, bdof, bcflags): + @PETSc.Log.EventDecorator("FDMSetValues") + def set_values(self, A, Vrow, Vcol, addv, triu=False): """ Assemble the stiffness matrix in the FDM basis using Kronecker products of interval matrices :arg A: the :class:`PETSc.Mat` to assemble - :arg V: the :class:`~.FunctionSpace` of the form arguments - :arg bcs: an iterable of :class:`~.DirichletBC` s - :arg eta: a ``float`` penalty parameter for the symmetric interior penalty method - :arg coefficients: a ``dict`` mapping strings to :class:`firedrake.function.Function` s with the form coefficients - :arg Afdm: the list with sparse interval matrices - :arg Dfdm: the list with normal derivatives matrices - :arg bcflags: the :class:`numpy.ndarray` with BC facet flags returned by ``get_weak_bc_flags`` + :arg Vrow: the :class:`firedrake.FunctionSpace` test space + :arg Vcol: the :class:`firedrake.FunctionSpace` trial space """ - from firedrake.preconditioners.pmg import get_axes_shift - Gq = coefficients.get("Gq") - Bq = coefficients.get("Bq") - Gq_facet = coefficients.get("Gq_facet") - PT_facet = coefficients.get("PT_facet") - - imode = PETSc.InsertMode.ADD_VALUES - lgmap = V.local_to_global_map(bcs) - + set_values_csr = self.load_set_values(triu=triu) + update_A = lambda A, Ae, rindices: set_values_csr(A, Ae, rindices, rindices, addv) + condense_element_mat = self.get_static_condensation.get(Vrow, lambda x: x) + condense_element_mat = lambda x: x + + get_rindices = self.cell_to_global[Vrow] + rtensor = self.reference_tensor_on_diag.get(Vrow, None) or self.assemble_reference_tensor(Vrow) + self.reference_tensor_on_diag[Vrow] = rtensor + Afdm, Dfdm, bdof = rtensor + + Gq = self.coefficients.get("alpha") + Bq = self.coefficients.get("beta") + bcflags = self.coefficients.get("bcflags") + Gq_facet = self.coefficients.get("Gq_facet") + PT_facet = self.coefficients.get("PT_facet") + + V = Vrow bsize = V.value_size ncomp = V.ufl_element().reference_value_size() sdim = (V.finat_element.space_dimension() * bsize) // ncomp # dimension of a single component ndim = V.ufl_domain().topological_dimension() - shift = get_axes_shift(V.finat_element) % ndim + shift = self.axes_shifts * bsize - index_cell, nel = glonum_fun(V.cell_node_map()) - index_coef, _ = glonum_fun(Gq.cell_node_map()) + index_coef, _ = glonum_fun((Gq or Bq).cell_node_map()) + index_bc, _ = glonum_fun(bcflags.cell_node_map()) flag2id = numpy.kron(numpy.eye(ndim, ndim, dtype=PETSc.IntType), [[1], [2]]) # pshape is the shape of the DOFs in the tensor product pshape = tuple(Ak[0].size[0] for Ak in Afdm) - if shift: - assert ncomp == ndim - pshape = [tuple(numpy.roll(pshape, -shift*k)) for k in range(ncomp)] + static_condensation = False + if sdim != numpy.prod(pshape): + static_condensation = True - if A.getType() != PETSc.Mat.Type.PREALLOCATOR: - A.zeroEntries() - for assemble_coef in self.assembly_callables: - assemble_coef() - - # insert the identity in the Dirichlet rows and columns - for row in V.dof_dset.lgmap.indices[lgmap.indices < 0]: - A.setValue(row, row, 1.0E0, imode) + if set(shift) != {0}: + assert ncomp == ndim + pshape = [tuple(numpy.roll(pshape, -shift[k])) for k in range(ncomp)] # assemble zero-th order term separately, including off-diagonals (mixed components) # I cannot do this for hdiv elements as off-diagonals are not sparse, this is because # the FDM eigenbases for GLL(N) and GLL(N-1) are not orthogonal to each other - use_diag_Bq = Bq is None or len(Bq.ufl_shape) != 2 + rindices = None + use_diag_Bq = Bq is None or len(Bq.ufl_shape) != 2 or static_condensation if not use_diag_Bq: bshape = Bq.ufl_shape # Be = Bhat kron ... kron Bhat @@ -299,67 +1202,86 @@ def assemble_kron(self, A, V, bcs, eta, coefficients, Afdm, Dfdm, bdof, bcflags) aptr = numpy.arange(0, (bshape[0]+1)*bshape[1], bshape[1], dtype=PETSc.IntType) aidx = numpy.tile(numpy.arange(bshape[1], dtype=PETSc.IntType), bshape[0]) - for e in range(nel): + for e in range(self.nel): # Ae = Be kron Bq[e] adata = numpy.sum(Bq.dat.data_ro[index_coef(e)], axis=0) Ae = PETSc.Mat().createAIJWithArrays(bshape, (aptr, aidx, adata), comm=PETSc.COMM_SELF) Ae = Be.kron(Ae) - - ie = index_cell(e) - ie = numpy.repeat(ie*bsize, bsize) + numpy.tile(numpy.arange(bsize, dtype=ie.dtype), len(ie)) - rows = lgmap.apply(ie) - set_submat_csr(A, Ae, rows, imode) + rindices = get_rindices(e, result=rindices) + update_A(A, Ae, rindices) Ae.destroy() Be.destroy() Bq = None # assemble the second order term and the zero-th order term if any, - # discarding mixed derivatives and mixed components - for e in range(nel): - ie = numpy.reshape(index_cell(e), (ncomp//bsize, -1)) + # discarding mixed derivatives and mixed componentsget_weak_bc_flags(J) + mue = numpy.zeros((ncomp, ndim), dtype=PETSc.RealType) + bqe = numpy.zeros((ncomp,), dtype=PETSc.RealType) + + for e in range(self.nel): je = index_coef(e) - bce = bcflags[e] + bce = bcflags.dat.data_ro_with_halos[index_bc(e)] > 1E-8 + + rindices = get_rindices(e, result=rindices) + rows = numpy.reshape(rindices, (-1, bsize)) + rows = numpy.transpose(rows) + rows = numpy.reshape(rows, (ncomp, -1)) # get second order coefficient on this cell - mue = numpy.atleast_1d(numpy.sum(Gq.dat.data_ro[je], axis=0)) + if Gq is not None: + mue.flat[:] = numpy.sum(Gq.dat.data_ro[je], axis=0) + # get zero-th order coefficient on this cell if Bq is not None: - # get zero-th order coefficient on this cell - bqe = numpy.atleast_1d(numpy.sum(Bq.dat.data_ro[je], axis=0)) + bqe.flat[:] = numpy.sum(Bq.dat.data_ro[je], axis=0) for k in range(ncomp): # permutation of axes with respect to the first vector component - axes = numpy.roll(numpy.arange(ndim), -shift*k) + axes = numpy.roll(numpy.arange(ndim), -shift[k]) # for each component: compute the stiffness matrix Ae - muk = mue[k] if len(mue.shape) == 2 else mue bck = bce[:, k] if len(bce.shape) == 2 else bce fbc = numpy.dot(bck, flag2id) - # Ae = mue[k][0] Ahat + bqe[k] Bhat - Be = Afdm[axes[0]][0].copy() - Ae = Afdm[axes[0]][1+fbc[0]].copy() - Ae.scale(muk[0]) - if Bq is not None: - Ae.axpy(bqe[k], Be) - - if ndim > 1: - # Ae = Ae kron Bhat + mue[k][1] Bhat kron Ahat - Ae = Ae.kron(Afdm[axes[1]][0]) - Ae.axpy(muk[1], Be.kron(Afdm[axes[1]][1+fbc[1]])) - if ndim > 2: - # Ae = Ae kron Bhat + mue[k][2] Bhat kron Bhat kron Ahat - Be = Be.kron(Afdm[axes[1]][0]) - Ae = Ae.kron(Afdm[axes[2]][0]) - Ae.axpy(muk[2], Be.kron(Afdm[axes[2]][1+fbc[2]])) - - rows = lgmap.apply(ie[0]*bsize+k if bsize == ncomp else ie[k]) - set_submat_csr(A, Ae, rows, imode) + if Gq is not None: + # Ae = mue[k][0] Ahat + bqe[k] Bhat + Be = Afdm[axes[0]][0].copy() + Ae = Afdm[axes[0]][1+fbc[0]].copy() + Ae.scale(mue[k][0]) + if Bq is not None: + Ae.axpy(bqe[k], Be) + + if ndim > 1: + # Ae = Ae kron Bhat + mue[k][1] Bhat kron Ahat + Ae = Ae.kron(Afdm[axes[1]][0]) + if Gq is not None: + Ae.axpy(mue[k][1], Be.kron(Afdm[axes[1]][1+fbc[1]])) + + if ndim > 2: + # Ae = Ae kron Bhat + mue[k][2] Bhat kron Bhat kron Ahat + Be = Be.kron(Afdm[axes[1]][0]) + Ae = Ae.kron(Afdm[axes[2]][0]) + if Gq is not None: + Ae.axpy(mue[k][2], Be.kron(Afdm[axes[2]][1+fbc[2]])) + Be.destroy() + + elif Bq is not None: + Ae = Afdm[axes[0]][0] + for m in range(1, ndim): + Ae = Ae.kron(Afdm[axes[m]][0]) + Ae.scale(bqe[k]) + + Ae = condense_element_mat(Ae) + update_A(A, Ae, rows[k].astype(PETSc.IntType)) Ae.destroy() - Be.destroy() # assemble SIPG interior facet terms if the normal derivatives have been set up if any(Dk is not None for Dk in Dfdm): + if static_condensation: + raise NotImplementedError("Static condensation for SIPG not implemented") if ndim < V.ufl_domain().geometric_dimension(): raise NotImplementedError("SIPG on immersed meshes is not implemented") + eta = float(self.appctx.get("eta")) + + lgmap = self.lgmaps[V] index_facet, local_facet_data, nfacets = get_interior_facet_maps(V) index_coef, _, _ = get_interior_facet_maps(Gq_facet or Gq) rows = numpy.zeros((2, sdim), dtype=PETSc.IntType) @@ -382,7 +1304,7 @@ def assemble_kron(self, A, V, bcs, eta, coefficients, Afdm, Dfdm, bdof, bcflags) Gfacet = numpy.sum(Gq.dat.data_ro_with_halos[je], axis=1) for k in range(ncomp): - axes = numpy.roll(numpy.arange(ndim), -shift*k) + axes = numpy.roll(numpy.arange(ndim), -shift[k]) Dfacet = Dfdm[axes[0]] if Dfacet is None: continue @@ -439,31 +1361,30 @@ def assemble_kron(self, A, V, bcs, eta, coefficients, Afdm, Dfdm, bdof, bcflags) rows[0] = pull_axis(icell[0][k0], pshape[k0], idir[0]) rows[1] = pull_axis(icell[1][k1], pshape[k1], idir[1]) - set_submat_csr(A, Ae, rows, imode) + update_A(A, Ae, rows) Ae.destroy() - A.assemble() - def assemble_coef(self, J, quad_deg, discard_mixed=True, cell_average=True): - """ - Return the coefficients of the Jacobian form arguments and their gradient with respect to the reference coordinates. - - :arg J: the Jacobian bilinear form - :arg quad_deg: the quadrature degree used for the coefficients - :arg discard_mixed: discard entries in second order coefficient with mixed derivatives and mixed components - :arg cell_average: to return the coefficients as DG_0 Functions - - :returns: a 2-tuple of - coefficients: a dictionary mapping strings to :class:`firedrake.function.Function` s with the coefficients of the form, - assembly_callables: a list of assembly callables for each coefficient of the form - """ + @PETSc.Log.EventDecorator("FDMCoefficients") + def assemble_coef(self, J, form_compiler_parameters, discard_mixed=True, cell_average=True): from ufl import inner, diff from ufl.algorithms.ad import expand_derivatives + coefficients = {} assembly_callables = [] mesh = J.ufl_domain() tdim = mesh.topological_dimension() Finv = ufl.JacobianInverse(mesh) + + args_J = J.arguments() + V = args_J[-1].function_space() + degree = V.ufl_element().degree() + try: + degree = max(degree) + except TypeError: + pass + quad_deg = 2*degree+1 + quad_deg = (form_compiler_parameters or {}).get("degree", quad_deg) dx = firedrake.dx(degree=quad_deg) if cell_average: @@ -474,7 +1395,6 @@ def assemble_coef(self, J, quad_deg, discard_mixed=True, cell_average=True): degree = quad_deg # extract coefficients directly from the bilinear form - args_J = J.arguments() integrals_J = J.integrals_by_type("cell") mapping = args_J[0].ufl_element().mapping().lower() Piola = get_piola_tensor(mapping, mesh) @@ -517,11 +1437,12 @@ def assemble_coef(self, J, quad_deg, discard_mixed=True, cell_average=True): Qe = ufl.TensorElement(family, mesh.ufl_cell(), degree=degree, quad_scheme="default", shape=G.ufl_shape, symmetry=True) # assemble second order coefficient - Q = firedrake.FunctionSpace(mesh, Qe) - q = firedrake.TestFunction(Q) - Gq = firedrake.Function(Q) - coefficients["Gq"] = Gq - assembly_callables.append(partial(firedrake.assemble, inner(G, q)*dx, Gq)) + if not isinstance(alpha, ufl.constantvalue.Zero): + Q = firedrake.FunctionSpace(mesh, Qe) + q = firedrake.TestFunction(Q) + Gq = firedrake.Function(Q) + coefficients["alpha"] = Gq + assembly_callables.append(partial(firedrake.assemble, inner(G, q)*dx, Gq)) # assemble zero-th order coefficient if not isinstance(beta, ufl.constantvalue.Zero): @@ -535,7 +1456,7 @@ def assemble_coef(self, J, quad_deg, discard_mixed=True, cell_average=True): Q = firedrake.FunctionSpace(mesh, Qe) q = firedrake.TestFunction(Q) Bq = firedrake.Function(Q) - coefficients["Bq"] = Bq + coefficients["beta"] = Bq assembly_callables.append(partial(firedrake.assemble, inner(beta, q)*dx, Bq)) if Piola: @@ -565,6 +1486,45 @@ def assemble_coef(self, J, quad_deg, discard_mixed=True, cell_average=True): PT_facet = firedrake.Function(Q) coefficients["PT_facet"] = PT_facet assembly_callables.append(partial(firedrake.assemble, ((inner(q('+'), PT('+')) + inner(q('-'), PT('-')))/area)*dS_int, PT_facet)) + + # make DGT functions with BC flags + rvs = V.ufl_element().reference_value_shape() + cell = mesh.ufl_cell() + family = "CG" if cell.topological_dimension() == 1 else "DGT" + degree = 1 if cell.topological_dimension() == 1 else 0 + Qe = ufl.FiniteElement(family, cell=cell, degree=degree) + if rvs: + Qe = ufl.TensorElement(Qe, shape=rvs) + Q = firedrake.FunctionSpace(mesh, Qe) + q = firedrake.TestFunction(Q) + bcflags = firedrake.Function(Q) + + ref_args = [ufl.variable(t) for t in args_J] + replace_args = {t: s for t, s in zip(args_J, ref_args)} + + forms = [] + md = {"quadrature_degree": 0} + for it in J.integrals(): + itype = it.integral_type() + if itype.startswith("exterior_facet"): + beta = ufl.diff(ufl.diff(ufl.replace(it.integrand(), replace_args), ref_args[0]), ref_args[1]) + beta = expand_derivatives(beta) + if rvs: + beta = ufl.diag_vector(beta) + ds_ext = ufl.Measure(itype, domain=mesh, subdomain_id=it.subdomain_id(), metadata=md) + forms.append(ufl.inner(q, beta)*ds_ext) + + if len(forms): + form = sum(forms) + if len(form.arguments()) == 1: + assembly_callables.append(partial(firedrake.assemble, form, bcflags)) + coefficients["bcflags"] = bcflags + + # set arbitrary non-zero coefficients for preallocation + for coef in coefficients.values(): + with coef.dat.vec as cvec: + cvec.set(1.0E0) + self.coefficients = coefficients return coefficients, assembly_callables @@ -643,12 +1603,13 @@ def fdm_setup_ipdg(fdm_element, eta): Dfdm: the tabulation of the normal derivatives of the Dirichlet eigenfunctions. bdof: the indices of PointEvaluation dofs. """ - from FIAT.quadrature import GaussLegendreQuadratureLineRule - from FIAT.functional import PointEvaluation ref_el = fdm_element.get_reference_element() degree = fdm_element.degree() - rule = GaussLegendreQuadratureLineRule(ref_el, degree+1) - bdof = [k for k, f in enumerate(fdm_element.dual_basis()) if isinstance(f, PointEvaluation)] + if hasattr(fdm_element.dual, "rule"): + rule = fdm_element.dual.rule + else: + rule = FIAT.quadrature.make_quadrature(ref_el, degree+1) + bdof = [k for k, f in enumerate(fdm_element.dual_basis()) if isinstance(f, FIAT.functional.PointEvaluation)] phi = fdm_element.tabulate(1, rule.get_points()) Jhat = phi[(0, )] @@ -680,7 +1641,7 @@ def get_interior_facet_maps(V): """ Extrude V.interior_facet_node_map and V.ufl_domain().interior_facets.local_facet_dat - :arg V: a :class:`~.FunctionSpace` + :arg V: a :class:`FunctionSpace` :returns: the 3-tuple of facet_to_nodes_fun: maps interior facets to the nodes of the two cells sharing it, @@ -747,35 +1708,75 @@ def get_interior_facet_maps(V): return facet_to_nodes_fun, local_facet_data_fun, nfacets -@lru_cache(maxsize=10) -def glonum_fun(node_map): +@lru_cache(maxsize=20) +def glonum_fun(node_map, bsize=1): """ - Return a function that maps each topological entity to its nodes and the total number of entities. + Return a the local numbering given an non-extruded local map and the total number of entities. - :arg node_map: a :class:`pyop2.Map` mapping entities to their nodes, including ghost entities. + :arg node_map: a :class:`pyop2.Map` mapping entities to their local dofs, including ghost entities. - :returns: a 2-tuple with the map and the number of cells owned by this process + :returns: a 2-tuple with the map and the number of entities owned by this process """ nelv = node_map.values.shape[0] if node_map.offset is None: - return lambda e: node_map.values_with_halo[e], nelv + nel = nelv + + def glonum(e, result=None): + if result is None: + result = numpy.copy(node_map.values_with_halo[e]) + else: + numpy.copyto(result, node_map.values_with_halo[e]) + return result + else: layers = node_map.iterset.layers_array if layers.shape[0] == 1: nelz = layers[0, 1]-layers[0, 0]-1 nel = nelz*nelv - return lambda e: node_map.values_with_halo[e//nelz] + (e % nelz)*node_map.offset, nel + + def _glonum(node_map, nelz, e, result=None): + if result is None: + result = numpy.copy(node_map.values_with_halo[e // nelz]) + else: + numpy.copyto(result, node_map.values_with_halo[e // nelz]) + result += (e % nelz)*node_map.offset + return result + glonum = partial(_glonum, node_map, nelz) + else: nelz = layers[:, 1]-layers[:, 0]-1 nel = sum(nelz[:nelv]) to_base = numpy.repeat(numpy.arange(node_map.values_with_halo.shape[0], dtype=node_map.offset.dtype), nelz) to_layer = numpy.concatenate([numpy.arange(nz, dtype=node_map.offset.dtype) for nz in nelz]) - return lambda e: node_map.values_with_halo[to_base[e]] + to_layer[e]*node_map.offset, nel + + def _glonum(node_map, to_base, to_layer, e, result=None): + if result is None: + result = numpy.copy(node_map.values_with_halo[to_base[e]]) + else: + numpy.copyto(result, node_map.values_with_halo[to_base[e]]) + result += to_layer[e]*node_map.offset + return result + glonum = partial(_glonum, node_map, to_base, to_layer) + + if bsize == 1: + return glonum, nel + + ibase = numpy.arange(bsize, dtype=node_map.values.dtype) + + def vector_glonum(bsize, ibase, e, result=None): + index = None + if result is not None: + index = result[:, 0] + index = glonum(e, result=index) + index *= bsize + return numpy.add.outer(index, ibase, out=result) + + return partial(vector_glonum, bsize, ibase), nel def glonum(node_map): """ - Return an array with the nodes of each topological entity of a certain kind. + Return an array with the node map. :arg node_map: a :class:`pyop2.Map` mapping entities to their nodes, including ghost entities. @@ -794,43 +1795,19 @@ def glonum(node_map): return numpy.repeat(node_map.values_with_halo, nelz, axis=0) + numpy.kron(to_layer.reshape((-1, 1)), node_map.offset) -def get_weak_bc_flags(J): - """ - Return flags indicating whether the zero-th order coefficient on each facet of every cell is non-zero - """ - from ufl.algorithms.ad import expand_derivatives - mesh = J.ufl_domain() - args_J = J.arguments() - V = args_J[0].function_space() - rvs = V.ufl_element().reference_value_shape() - cell = mesh.ufl_cell() - family = "CG" if cell.topological_dimension() == 1 else "DGT" - degree = 1 if cell.topological_dimension() == 1 else 0 - Qe = ufl.FiniteElement(family, cell=cell, degree=degree) - if rvs: - Qe = ufl.TensorElement(Qe, shape=rvs) - Q = firedrake.FunctionSpace(mesh, Qe) - q = firedrake.TestFunction(Q) - - ref_args = [ufl.variable(t) for t in args_J] - replace_args = {t: s for t, s in zip(args_J, ref_args)} - - forms = [] - md = {"quadrature_degree": 0} - for it in J.integrals(): - itype = it.integral_type() - if itype.startswith("exterior_facet"): - beta = ufl.diff(ufl.diff(ufl.replace(it.integrand(), replace_args), ref_args[0]), ref_args[1]) - beta = expand_derivatives(beta) - if rvs: - beta = ufl.diag_vector(beta) - ds_ext = ufl.Measure(itype, domain=mesh, subdomain_id=it.subdomain_id(), metadata=md) - forms.append(ufl.inner(q, beta)*ds_ext) - - tol = 1E-8 - if len(forms): - bq = firedrake.assemble(sum(forms)) - fbc = bq.dat.data_with_halos[glonum(Q.cell_node_map())] - return (abs(fbc) > tol).astype(PETSc.IntType) - else: - return numpy.zeros(glonum(Q.cell_node_map()).shape, dtype=PETSc.IntType) +def spy(A, comm=None): + import matplotlib.pyplot as plt + import scipy.sparse as sp + if comm is None: + comm = A.comm + nnz = A.getInfo()["nz_used"] + if A.getType().endswith("sbaij"): + A.setOption(PETSc.Mat.Option.GETROW_UPPERTRIANGULAR, True) + csr = tuple(reversed(A.getValuesCSR())) + if comm.rank == 0: + csr[0].fill(1) + scipy_mat = sp.csr_matrix(csr, shape=A.getSize()) + fig, axes = plt.subplots(nrows=1, ncols=1) + axes.spy(scipy_mat, marker=".", markersize=2) + plt.title("nnz(A) = %d" % nnz) + plt.show() diff --git a/tests/regression/test_fdm.py b/tests/regression/test_fdm.py index 34e6469396..825ee81c36 100644 --- a/tests/regression/test_fdm.py +++ b/tests/regression/test_fdm.py @@ -3,19 +3,19 @@ fdmstar = { - "mat_type": "matfree", - "ksp_type": "cg", - "ksp_atol": 0.0E0, - "ksp_rtol": 1.0E-8, - "ksp_norm_type": "unpreconditioned", - "ksp_monitor_true_residual": None, - "ksp_converged_reason": None, - "pc_type": "python", - "pc_python_type": "firedrake.P1PC", - "pmg_coarse_mat_type": "aij", - "pmg_mg_coarse": { - "ksp_type": "preonly", - "pc_type": "cholesky", + 'mat_type': 'matfree', + 'ksp_type': 'cg', + 'ksp_atol': 0.0E0, + 'ksp_rtol': 1.0E-8, + 'ksp_norm_type': 'unpreconditioned', + 'ksp_monitor_true_residual': None, + 'ksp_converged_reason': None, + 'pc_type': 'python', + 'pc_python_type': 'firedrake.P1PC', + 'pmg_mg_coarse': { + 'mat_type': 'aij', + 'ksp_type': 'preonly', + 'pc_type': 'cholesky', }, "pmg_mg_levels": { "ksp_type": "chebyshev", @@ -40,10 +40,10 @@ @pytest.fixture(params=[2, 3], - ids=["Rectangle", "Box"]) + ids=['Rectangle', 'Box']) def mesh(request): nx = 4 - distribution = {"overlap_type": (DistributedMeshOverlapType.VERTEX, 1)} + distribution = {'overlap_type': (DistributedMeshOverlapType.VERTEX, 1)} m = UnitSquareMesh(nx, nx, quadrilateral=True, distribution_parameters=distribution) if request.param == 3: m = ExtrudedMesh(m, nx) @@ -62,7 +62,7 @@ def expected(mesh): return [8, 8, 8] -@pytest.fixture(params=[None, "fdm"], ids=["spectral", "fdm"]) +@pytest.fixture(params=[None, 'fdm'], ids=['spectral', 'fdm']) def variant(request): return request.param @@ -71,7 +71,7 @@ def variant(request): def test_p_independence(mesh, expected, variant): nits = [] for p in range(3, 6): - e = FiniteElement("Lagrange", cell=mesh.ufl_cell(), degree=p, variant=variant) + e = FiniteElement('Lagrange', cell=mesh.ufl_cell(), degree=p, variant=variant) V = FunctionSpace(mesh, e) u = TrialFunction(V) v = TestFunction(V) @@ -86,9 +86,9 @@ def test_p_independence(mesh, expected, variant): a = inner(grad(v), grad(u))*dx L = inner(v, B)*dx - subs = ("on_boundary",) + subs = ('on_boundary',) if mesh.cell_set._extruded: - subs += ("top", "bottom") + subs += ('top', 'bottom') bcs = [DirichletBC(V, u_exact, sub) for sub in subs] uh = Function(V) @@ -104,7 +104,7 @@ def test_p_independence(mesh, expected, variant): def test_variable_coefficient(mesh): ndim = mesh.geometric_dimension() k = 4 - V = FunctionSpace(mesh, "Lagrange", k) + V = FunctionSpace(mesh, 'Lagrange', k) u = TrialFunction(V) v = TestFunction(V) x = SpatialCoordinate(mesh) @@ -119,9 +119,9 @@ def test_variable_coefficient(mesh): a = (inner(grad(v), dot(alpha, grad(u))) + inner(v, beta*u))*dx(degree=3*k+2) L = inner(v, Constant(1))*dx - subs = ("on_boundary",) + subs = ('on_boundary',) if mesh.cell_set._extruded: - subs += ("top", "bottom") + subs += ('top', 'bottom') bcs = [DirichletBC(V, zero(V.ufl_element().value_shape()), sub) for sub in subs] uh = Function(V) @@ -131,27 +131,27 @@ def test_variable_coefficient(mesh): assert solver.snes.ksp.getIterationNumber() <= 14 -@pytest.fixture(params=["cg", "dg", "rt"], - ids=["cg", "dg", "rt"]) +@pytest.fixture(params=['cg', 'dg', 'rt'], + ids=['cg', 'dg', 'rt']) def fs(request, mesh): degree = 3 ndim = mesh.topological_dimension() cell = mesh.ufl_cell() element = request.param - variant = None - if element == "rt": - family = "RTCF" if ndim == 2 else "NCF" + variant = 'fdm_ipdg' + if element == 'rt': + family = 'RTCF' if ndim == 2 else 'NCF' return FunctionSpace(mesh, FiniteElement(family, cell, degree=degree, variant=variant)) else: if ndim == 1: - family = "DG" if element == "dg" else "CG" + family = 'DG' if element == 'dg' else 'CG' else: - family = "DQ" if element == "dg" else "Q" + family = 'DQ' if element == 'dg' else 'Q' return VectorFunctionSpace(mesh, FiniteElement(family, cell, degree=degree, variant=variant), dim=5-ndim) @pytest.mark.skipcomplex -def test_direct_solver(fs): +def test_ipdg_direct_solver(fs): mesh = fs.mesh() x = SpatialCoordinate(mesh) ndim = mesh.geometric_dimension() @@ -187,31 +187,31 @@ def test_direct_solver(fs): if ndim > 1: subs += (3,) if extruded: - subs += ("top",) + subs += ('top',) bcs = [DirichletBC(fs, u_exact, sub) for sub in subs] dirichlet_ids = subs - if "on_boundary" in dirichlet_ids: + if 'on_boundary' in dirichlet_ids: neumann_ids = [] else: make_tuple = lambda s: s if type(s) == tuple else (s,) neumann_ids = list(set(mesh.exterior_facets.unique_markers) - set(sum([make_tuple(s) for s in subs if type(s) != str], ()))) if extruded: - if "top" not in dirichlet_ids: - neumann_ids.append("top") - if "bottom" not in dirichlet_ids: - neumann_ids.append("bottom") + if 'top' not in dirichlet_ids: + neumann_ids.append('top') + if 'bottom' not in dirichlet_ids: + neumann_ids.append('bottom') dxq = dx(degree=quad_degree, domain=mesh) if extruded: dS_int = dS_v(degree=quad_degree) + dS_h(degree=quad_degree) - ds_ext = {"on_boundary": ds_v(degree=quad_degree), "bottom": ds_b(degree=quad_degree), "top": ds_t(degree=quad_degree)} + ds_ext = {'on_boundary': ds_v(degree=quad_degree), 'bottom': ds_b(degree=quad_degree), 'top': ds_t(degree=quad_degree)} ds_Dir = [ds_ext.get(s) or ds_v(s, degree=quad_degree) for s in dirichlet_ids] ds_Neu = [ds_ext.get(s) or ds_v(s, degree=quad_degree) for s in neumann_ids] else: dS_int = dS(degree=quad_degree) - ds_ext = {"on_boundary": ds(degree=quad_degree)} + ds_ext = {'on_boundary': ds(degree=quad_degree)} ds_Dir = [ds_ext.get(s) or ds(s, degree=quad_degree) for s in dirichlet_ids] ds_Neu = [ds_ext.get(s) or ds(s, degree=quad_degree) for s in neumann_ids] @@ -238,20 +238,65 @@ def test_direct_solver(fs): problem = LinearVariationalProblem(a, L, uh, bcs=bcs) solver = LinearVariationalSolver(problem, solver_parameters={ - "mat_type": "matfree", - "ksp_type": "cg", - "ksp_atol": 0.0E0, - "ksp_rtol": 1.0E-8, - "ksp_max_it": 3, - "ksp_monitor": None, - "ksp_norm_type": "unpreconditioned", - "pc_type": "python", - "pc_python_type": "firedrake.FDMPC", - "fdm_pc_type": "cholesky", - "fdm_pc_factor_mat_solver_type": "mumps", - "fdm_pc_factor_mat_ordering_type": "nd", - }, appctx={"eta": eta, }) + 'mat_type': 'matfree', + 'ksp_type': 'cg', + 'ksp_atol': 0.0E0, + 'ksp_rtol': 1.0E-8, + 'ksp_max_it': 3, + 'ksp_monitor': None, + 'ksp_norm_type': 'unpreconditioned', + 'pc_type': 'python', + 'pc_python_type': 'firedrake.PoissonFDMPC', + 'fdm_pc_type': 'cholesky', + 'fdm_pc_factor_mat_solver_type': 'mumps', + 'fdm_pc_factor_mat_ordering_type': 'nd', + }, appctx={'eta': eta, }) solver.solve() assert solver.snes.ksp.getIterationNumber() == 1 - assert norm(u_exact-uh, "H1") < 1.0E-8 + assert norm(u_exact-uh, 'H1') < 1.0E-8 + + +@pytest.mark.skipcomplex +def test_static_condensation(mesh): + degree = 3 + quad_degree = 2*degree+1 + cell = mesh.ufl_cell() + e = FiniteElement('Lagrange', cell=cell, degree=degree, variant='fdm') + Z = FunctionSpace(mesh, MixedElement(*[RestrictedElement(e, d) for d in ("interior", "facet")])) + z = Function(Z) + u = sum(split(z)) + + f = Constant(1) + U = ((1/2)*inner(grad(u), grad(u)) - inner(u, f))*dx(degree=quad_degree) + F = derivative(U, z, TestFunction(Z)) + a = derivative(F, z, TrialFunction(Z)) + + subs = ['on_boundary'] + if mesh.cell_set._extruded: + subs += ['top', 'bottom'] + bcs = [DirichletBC(Z.sub(1), zero(), sub) for sub in subs] + + problem = LinearVariationalProblem(a, -F, z, bcs=bcs) + solver = LinearVariationalSolver(problem, solver_parameters={ + 'mat_type': 'matfree', + 'ksp_monitor': None, + 'ksp_type': 'preonly', + 'ksp_norm_type': 'unpreconditioned', + 'pc_type': 'python', + 'pc_python_type': 'firedrake.SCPC', + 'pc_sc_eliminate_fields': '0', + 'condensed_field': { + 'mat_type': 'matfree', + 'ksp_monitor': None, + 'ksp_type': 'preonly', + 'ksp_norm_type': 'unpreconditioned', + 'pc_type': 'python', + 'pc_python_type': 'firedrake.FDMPC', + 'fdm_pc_type': 'lu', + 'fdm_pc_mat_factor_solver_type': 'mumps' + } + }) + solver.solve() + residual = solver.snes.ksp.buildResidual() + assert residual.norm() < 1E-14 From 283fbc5f94ef33940c0c8962e60fba6d7aa8022e Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 7 Mar 2023 12:07:25 +0000 Subject: [PATCH 02/75] DROP BEFORE MERGE --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 04f377d84f..d1709a8e16 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -48,7 +48,7 @@ jobs: - name: Build Firedrake run: | cd .. - ./firedrake/scripts/firedrake-install $COMPLEX --venv-name build --tinyasm --disable-ssh --minimal-petsc --slepc --documentation-dependencies --install thetis --install gusto --install icepack --install irksome --install femlium --no-package-manager || (cat firedrake-install.log && /bin/false) + ./firedrake/scripts/firedrake-install $COMPLEX --venv-name build --tinyasm --disable-ssh --minimal-petsc --slepc --documentation-dependencies --install thetis --install gusto --install icepack --install irksome --install femlium --no-package-manager --package-branch tsfc pbrubeck/fdm-discontinuous || (cat firedrake-install.log && /bin/false) - name: Install test dependencies run: | . ../build/bin/activate From aa38d80c8fa4140e5bc2642599f0d3d32f7ffdb1 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 7 Mar 2023 12:17:03 +0000 Subject: [PATCH 03/75] replace apostrophe with quotes on test --- tests/regression/test_fdm.py | 142 +++++++++++++++++------------------ 1 file changed, 71 insertions(+), 71 deletions(-) diff --git a/tests/regression/test_fdm.py b/tests/regression/test_fdm.py index 825ee81c36..6934bf0a53 100644 --- a/tests/regression/test_fdm.py +++ b/tests/regression/test_fdm.py @@ -3,19 +3,19 @@ fdmstar = { - 'mat_type': 'matfree', - 'ksp_type': 'cg', - 'ksp_atol': 0.0E0, - 'ksp_rtol': 1.0E-8, - 'ksp_norm_type': 'unpreconditioned', - 'ksp_monitor_true_residual': None, - 'ksp_converged_reason': None, - 'pc_type': 'python', - 'pc_python_type': 'firedrake.P1PC', - 'pmg_mg_coarse': { - 'mat_type': 'aij', - 'ksp_type': 'preonly', - 'pc_type': 'cholesky', + "mat_type": "matfree", + "ksp_type": "cg", + "ksp_atol": 0.0E0, + "ksp_rtol": 1.0E-8, + "ksp_norm_type": "unpreconditioned", + "ksp_monitor_true_residual": None, + "ksp_converged_reason": None, + "pc_type": "python", + "pc_python_type": "firedrake.P1PC", + "pmg_mg_coarse": { + "mat_type": "aij", + "ksp_type": "preonly", + "pc_type": "cholesky", }, "pmg_mg_levels": { "ksp_type": "chebyshev", @@ -40,10 +40,10 @@ @pytest.fixture(params=[2, 3], - ids=['Rectangle', 'Box']) + ids=["Rectangle", "Box"]) def mesh(request): nx = 4 - distribution = {'overlap_type': (DistributedMeshOverlapType.VERTEX, 1)} + distribution = {"overlap_type": (DistributedMeshOverlapType.VERTEX, 1)} m = UnitSquareMesh(nx, nx, quadrilateral=True, distribution_parameters=distribution) if request.param == 3: m = ExtrudedMesh(m, nx) @@ -62,7 +62,7 @@ def expected(mesh): return [8, 8, 8] -@pytest.fixture(params=[None, 'fdm'], ids=['spectral', 'fdm']) +@pytest.fixture(params=[None, "fdm"], ids=["spectral", "fdm"]) def variant(request): return request.param @@ -71,7 +71,7 @@ def variant(request): def test_p_independence(mesh, expected, variant): nits = [] for p in range(3, 6): - e = FiniteElement('Lagrange', cell=mesh.ufl_cell(), degree=p, variant=variant) + e = FiniteElement("Lagrange", cell=mesh.ufl_cell(), degree=p, variant=variant) V = FunctionSpace(mesh, e) u = TrialFunction(V) v = TestFunction(V) @@ -86,9 +86,9 @@ def test_p_independence(mesh, expected, variant): a = inner(grad(v), grad(u))*dx L = inner(v, B)*dx - subs = ('on_boundary',) + subs = ("on_boundary",) if mesh.cell_set._extruded: - subs += ('top', 'bottom') + subs += ("top", "bottom") bcs = [DirichletBC(V, u_exact, sub) for sub in subs] uh = Function(V) @@ -104,7 +104,7 @@ def test_p_independence(mesh, expected, variant): def test_variable_coefficient(mesh): ndim = mesh.geometric_dimension() k = 4 - V = FunctionSpace(mesh, 'Lagrange', k) + V = FunctionSpace(mesh, "Lagrange", k) u = TrialFunction(V) v = TestFunction(V) x = SpatialCoordinate(mesh) @@ -119,9 +119,9 @@ def test_variable_coefficient(mesh): a = (inner(grad(v), dot(alpha, grad(u))) + inner(v, beta*u))*dx(degree=3*k+2) L = inner(v, Constant(1))*dx - subs = ('on_boundary',) + subs = ("on_boundary",) if mesh.cell_set._extruded: - subs += ('top', 'bottom') + subs += ("top", "bottom") bcs = [DirichletBC(V, zero(V.ufl_element().value_shape()), sub) for sub in subs] uh = Function(V) @@ -131,22 +131,22 @@ def test_variable_coefficient(mesh): assert solver.snes.ksp.getIterationNumber() <= 14 -@pytest.fixture(params=['cg', 'dg', 'rt'], - ids=['cg', 'dg', 'rt']) +@pytest.fixture(params=["cg", "dg", "rt"], + ids=["cg", "dg", "rt"]) def fs(request, mesh): degree = 3 ndim = mesh.topological_dimension() cell = mesh.ufl_cell() element = request.param - variant = 'fdm_ipdg' - if element == 'rt': - family = 'RTCF' if ndim == 2 else 'NCF' + variant = "fdm_ipdg" + if element == "rt": + family = "RTCF" if ndim == 2 else "NCF" return FunctionSpace(mesh, FiniteElement(family, cell, degree=degree, variant=variant)) else: if ndim == 1: - family = 'DG' if element == 'dg' else 'CG' + family = "DG" if element == "dg" else "CG" else: - family = 'DQ' if element == 'dg' else 'Q' + family = "DQ" if element == "dg" else "Q" return VectorFunctionSpace(mesh, FiniteElement(family, cell, degree=degree, variant=variant), dim=5-ndim) @@ -187,31 +187,31 @@ def test_ipdg_direct_solver(fs): if ndim > 1: subs += (3,) if extruded: - subs += ('top',) + subs += ("top",) bcs = [DirichletBC(fs, u_exact, sub) for sub in subs] dirichlet_ids = subs - if 'on_boundary' in dirichlet_ids: + if "on_boundary" in dirichlet_ids: neumann_ids = [] else: make_tuple = lambda s: s if type(s) == tuple else (s,) neumann_ids = list(set(mesh.exterior_facets.unique_markers) - set(sum([make_tuple(s) for s in subs if type(s) != str], ()))) if extruded: - if 'top' not in dirichlet_ids: - neumann_ids.append('top') - if 'bottom' not in dirichlet_ids: - neumann_ids.append('bottom') + if "top" not in dirichlet_ids: + neumann_ids.append("top") + if "bottom" not in dirichlet_ids: + neumann_ids.append("bottom") dxq = dx(degree=quad_degree, domain=mesh) if extruded: dS_int = dS_v(degree=quad_degree) + dS_h(degree=quad_degree) - ds_ext = {'on_boundary': ds_v(degree=quad_degree), 'bottom': ds_b(degree=quad_degree), 'top': ds_t(degree=quad_degree)} + ds_ext = {"on_boundary": ds_v(degree=quad_degree), "bottom": ds_b(degree=quad_degree), "top": ds_t(degree=quad_degree)} ds_Dir = [ds_ext.get(s) or ds_v(s, degree=quad_degree) for s in dirichlet_ids] ds_Neu = [ds_ext.get(s) or ds_v(s, degree=quad_degree) for s in neumann_ids] else: dS_int = dS(degree=quad_degree) - ds_ext = {'on_boundary': ds(degree=quad_degree)} + ds_ext = {"on_boundary": ds(degree=quad_degree)} ds_Dir = [ds_ext.get(s) or ds(s, degree=quad_degree) for s in dirichlet_ids] ds_Neu = [ds_ext.get(s) or ds(s, degree=quad_degree) for s in neumann_ids] @@ -221,7 +221,7 @@ def test_ipdg_direct_solver(fs): h = CellVolume(mesh)/FacetArea(mesh) penalty = eta/h - outer_jump = lambda w, n: outer(w('+'), n('+')) + outer(w('-'), n('-')) + outer_jump = lambda w, n: outer(w("+"), n("+")) + outer(w("-"), n("-")) num_flux = lambda w: alpha(avg(penalty/2) * outer_jump(w, n)) num_flux_b = lambda w: alpha((penalty/2) * outer(w, n)) @@ -238,23 +238,23 @@ def test_ipdg_direct_solver(fs): problem = LinearVariationalProblem(a, L, uh, bcs=bcs) solver = LinearVariationalSolver(problem, solver_parameters={ - 'mat_type': 'matfree', - 'ksp_type': 'cg', - 'ksp_atol': 0.0E0, - 'ksp_rtol': 1.0E-8, - 'ksp_max_it': 3, - 'ksp_monitor': None, - 'ksp_norm_type': 'unpreconditioned', - 'pc_type': 'python', - 'pc_python_type': 'firedrake.PoissonFDMPC', - 'fdm_pc_type': 'cholesky', - 'fdm_pc_factor_mat_solver_type': 'mumps', - 'fdm_pc_factor_mat_ordering_type': 'nd', - }, appctx={'eta': eta, }) + "mat_type": "matfree", + "ksp_type": "cg", + "ksp_atol": 0.0E0, + "ksp_rtol": 1.0E-8, + "ksp_max_it": 3, + "ksp_monitor": None, + "ksp_norm_type": "unpreconditioned", + "pc_type": "python", + "pc_python_type": "firedrake.PoissonFDMPC", + "fdm_pc_type": "cholesky", + "fdm_pc_factor_mat_solver_type": "mumps", + "fdm_pc_factor_mat_ordering_type": "nd", + }, appctx={"eta": eta, }) solver.solve() assert solver.snes.ksp.getIterationNumber() == 1 - assert norm(u_exact-uh, 'H1') < 1.0E-8 + assert norm(u_exact-uh, "H1") < 1.0E-8 @pytest.mark.skipcomplex @@ -262,7 +262,7 @@ def test_static_condensation(mesh): degree = 3 quad_degree = 2*degree+1 cell = mesh.ufl_cell() - e = FiniteElement('Lagrange', cell=cell, degree=degree, variant='fdm') + e = FiniteElement("Lagrange", cell=cell, degree=degree, variant="fdm") Z = FunctionSpace(mesh, MixedElement(*[RestrictedElement(e, d) for d in ("interior", "facet")])) z = Function(Z) u = sum(split(z)) @@ -272,29 +272,29 @@ def test_static_condensation(mesh): F = derivative(U, z, TestFunction(Z)) a = derivative(F, z, TrialFunction(Z)) - subs = ['on_boundary'] + subs = ["on_boundary"] if mesh.cell_set._extruded: - subs += ['top', 'bottom'] + subs += ["top", "bottom"] bcs = [DirichletBC(Z.sub(1), zero(), sub) for sub in subs] problem = LinearVariationalProblem(a, -F, z, bcs=bcs) solver = LinearVariationalSolver(problem, solver_parameters={ - 'mat_type': 'matfree', - 'ksp_monitor': None, - 'ksp_type': 'preonly', - 'ksp_norm_type': 'unpreconditioned', - 'pc_type': 'python', - 'pc_python_type': 'firedrake.SCPC', - 'pc_sc_eliminate_fields': '0', - 'condensed_field': { - 'mat_type': 'matfree', - 'ksp_monitor': None, - 'ksp_type': 'preonly', - 'ksp_norm_type': 'unpreconditioned', - 'pc_type': 'python', - 'pc_python_type': 'firedrake.FDMPC', - 'fdm_pc_type': 'lu', - 'fdm_pc_mat_factor_solver_type': 'mumps' + "mat_type": "matfree", + "ksp_monitor": None, + "ksp_type": "preonly", + "ksp_norm_type": "unpreconditioned", + "pc_type": "python", + "pc_python_type": "firedrake.SCPC", + "pc_sc_eliminate_fields": "0", + "condensed_field": { + "mat_type": "matfree", + "ksp_monitor": None, + "ksp_type": "preonly", + "ksp_norm_type": "unpreconditioned", + "pc_type": "python", + "pc_python_type": "firedrake.FDMPC", + "fdm_pc_type": "lu", + "fdm_pc_mat_factor_solver_type": "mumps" } }) solver.solve() From 8554c497414b9dc9f345bee1788fe48eed898ab8 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 7 Mar 2023 16:40:46 +0000 Subject: [PATCH 04/75] fix tests, support more general TP elements in BLAS prolongation kernels --- firedrake/preconditioners/fdm.py | 17 +- firedrake/preconditioners/pmg.py | 922 +++++++++++++++++++++++-------- tests/regression/test_fdm.py | 1 + 3 files changed, 701 insertions(+), 239 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 4542178d7c..88be79b597 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -75,7 +75,6 @@ def initialize(self, pc): use_amat = options.getBool("pc_use_amat", True) pmat_type = options.getString("mat_type", PETSc.Mat.Type.AIJ) - diagonal_scale = options.getBool("diagonal_scale", False) appctx = self.get_appctx(pc) fcp = appctx.get("form_compiler_parameters") @@ -152,7 +151,7 @@ def interp_nullspace(I, nsp): fcp=fcp, options_prefix=options_prefix) # Assemble the FDM preconditioner with sparse local matrices - Pmat, self._assemble_P = self.assemble_fdm_op(V_fdm, J_fdm, bcs_fdm, fcp, appctx, pmat_type, diagonal_scale) + Pmat, self._assemble_P = self.assemble_fdm_op(V_fdm, J_fdm, bcs_fdm, fcp, appctx, pmat_type) self._assemble_P() Pmat.setNullSpace(Amat.getNullSpace()) Pmat.setTransposeNullSpace(Amat.getTransposeNullSpace()) @@ -179,7 +178,7 @@ def interp_nullspace(I, nsp): fdmpc.setFromOptions() @PETSc.Log.EventDecorator("FDMPrealloc") - def assemble_fdm_op(self, V, J, bcs, form_compiler_parameters, appctx, pmat_type, diagonal_scale): + def assemble_fdm_op(self, V, J, bcs, form_compiler_parameters, appctx, pmat_type): """ Assemble the sparse preconditioner with cell-wise constant coefficients. @@ -309,8 +308,6 @@ def get_coeffs(e, result=None): else: Pmat = PETSc.Mat().createNest([[Pmats[Vrow, Vcol] for Vcol in V] for Vrow in V], comm=V.comm) - self.diag = None - @PETSc.Log.EventDecorator("FDMAssemble") def assemble_P(): for _assemble in assembly_callables: @@ -325,12 +322,6 @@ def assemble_P(): P.setValuesRCV(rows, rows, vals, addv) self.set_values(P, Vrow, Vcol, addv) Pmat.assemble() - if diagonal_scale: - diag = Pmat.getDiagonal(result=self.diag) - diag.sqrtabs() - diag.reciprocal() - Pmat.diagonalScale(L=diag, R=diag) - self.diag = diag return Pmat, assemble_P @@ -341,7 +332,7 @@ def update(self, pc): self._assemble_P() def apply(self, pc, x, y): - if hasattr(self, "_ctx_ref"): + if hasattr(self, "fdm_interp"): self.fdm_interp.multTranspose(x, self.work_vec_x) with dmhooks.add_hooks(self._dm, self, appctx=self._ctx_ref): self.pc.apply(self.work_vec_x, self.work_vec_y) @@ -351,7 +342,7 @@ def apply(self, pc, x, y): self.pc.apply(x, y) def applyTranspose(self, pc, x, y): - if hasattr(self, "_ctx_ref"): + if hasattr(self, "fdm_interp"): self.fdm_interp.multTranspose(x, self.work_vec_y) with dmhooks.add_hooks(self._dm, self, appctx=self._ctx_ref): self.pc.applyTranspose(self.work_vec_y, self.work_vec_x) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 88cc0f1b10..81791ebb77 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -7,7 +7,7 @@ get_function_space, set_function_space) from firedrake.solving_utils import _SNESContext from firedrake.tsfc_interface import extract_numbered_coefficients -from firedrake.utils import ScalarType_c, IntType_c +from firedrake.utils import ScalarType_c, IntType_c, cached_property from firedrake.petsc import PETSc import firedrake import ufl @@ -30,9 +30,9 @@ class PMGBase(PCSNESBase): or any other solver in firedrake may be applied to the coarse problem. Other PETSc options inspected by this class are: - - 'pmg_coarse_degree': polynomial degree of the coarse level - - 'pmg_coarse_mat_type': can be either 'aij' or 'matfree' - - 'pmg_coarse_form_compiler_mode': can be 'spectral' (default), 'vanilla', 'coffee', or 'tensor' + - 'pmg_mg_coarse_degree': polynomial degree of the coarse level + - 'pmg_mg_coarse_mat_type': can be either 'aij' or 'matfree' + - 'pmg_mg_coarse_form_compiler_mode': can be 'spectral' (default), 'vanilla', 'coffee', or 'tensor' - 'pmg_mg_levels_transfer_mat_type': can be either 'aij' or 'matfree' The p-coarsening is implemented in the `coarsen_element` routine. @@ -80,6 +80,10 @@ def initialize(self, pc): odm = pc.getDM() ctx = get_appctx(odm) + if ctx is None: + raise ValueError("No context found.") + if not isinstance(ctx, _SNESContext): + raise ValueError("Don't know how to get form from %r", ctx) test, trial = ctx.J.arguments() if test.function_space() != trial.function_space(): @@ -94,10 +98,10 @@ def initialize(self, pc): # Get the coarse degree from PETSc options fcp = ctx._problem.form_compiler_parameters mode = fcp.get("mode", "spectral") if fcp is not None else "spectral" - self.coarse_degree = opts.getInt("coarse_degree", default=1) - self.coarse_mat_type = opts.getString("coarse_mat_type", default=ctx.mat_type) - self.coarse_pmat_type = opts.getString("coarse_pmat_type", default=self.coarse_mat_type) - self.coarse_form_compiler_mode = opts.getString("coarse_form_compiler_mode", default=mode) + self.coarse_degree = opts.getInt("mg_coarse_degree", default=1) + self.coarse_mat_type = opts.getString("mg_coarse_mat_type", default=ctx.mat_type) + self.coarse_pmat_type = opts.getString("mg_coarse_pmat_type", default=self.coarse_mat_type) + self.coarse_form_compiler_mode = opts.getString("mg_coarse_form_compiler_mode", default=mode) # Construct a list with the elements we'll be using V = test.function_space() @@ -176,12 +180,15 @@ def coarsen(self, fdm, comm): fu = fproblem.u cu = firedrake.Function(cV) + is_linear = fu not in fctx.J.coefficients() + fdeg = PMGBase.max_degree(fV.ufl_element()) cdeg = PMGBase.max_degree(cV.ufl_element()) - fine_to_coarse_map = {fu: cu, - test: test.reconstruct(function_space=cV), + fine_to_coarse_map = {test: test.reconstruct(function_space=cV), trial: trial.reconstruct(function_space=cV)} + if not is_linear: + fine_to_coarse_map[fu] = cu def _coarsen_form(a): if isinstance(a, ufl.Form): @@ -195,6 +202,7 @@ def _coarsen_form(a): cJp = _coarsen_form(fctx.Jp) fcp = self.coarsen_quadrature(fproblem.form_compiler_parameters, fdeg, cdeg) cbcs = self.coarsen_bcs(fproblem.bcs, cV) + cF = self.coarsen_residual(cF, cJ, cu) # Coarsen the appctx: the user might want to provide solution-dependant expressions and forms cappctx = dict(fctx.appctx) @@ -227,7 +235,7 @@ def _coarsen_form(a): # Coarsen the problem and the _SNESContext cproblem = firedrake.NonlinearVariationalProblem(cF, cu, bcs=cbcs, J=cJ, Jp=cJp, form_compiler_parameters=fcp, - is_linear=fproblem.is_linear) + is_linear=is_linear) cctx = type(fctx)(cproblem, mat_type, pmat_type, appctx=cappctx, @@ -250,13 +258,16 @@ def _coarsen_form(a): cdm.setCreateInterpolation(self.create_interpolation) cdm.setCreateInjection(self.create_injection) - # injection of the initial state - def inject_state(mat): - with cu.dat.vec_wo as xc, fu.dat.vec_ro as xf: - mat.multTranspose(xf, xc) + interp_petscmat, _ = cdm.createInterpolation(fdm) + inject_petscmat = cdm.createInjection(fdm) + + if not is_linear: + # injection of the initial state + def inject_state(): + with cu.dat.vec_wo as xc, fu.dat.vec_ro as xf: + inject_petscmat.mult(xf, xc) - injection = self.create_injection(cdm, fdm) - add_hook(parent, setup=partial(inject_state, injection), call_setup=True) + add_hook(parent, setup=inject_state, call_setup=True) # coarsen the nullspace basis def coarsen_nullspace(coarse_V, mat, fine_nullspace): @@ -287,14 +298,13 @@ def coarsen_nullspace(coarse_V, mat, fine_nullspace): else: return fine_nullspace - I, _ = self.create_interpolation(cdm, fdm) ises = cV._ises - cctx._nullspace = coarsen_nullspace(cV, I, fctx._nullspace) + cctx._nullspace = coarsen_nullspace(cV, inject_petscmat, fctx._nullspace) cctx.set_nullspace(cctx._nullspace, ises, transpose=False, near=False) - cctx._nullspace_T = coarsen_nullspace(cV, I, fctx._nullspace_T) - cctx.set_nullspace(cctx._nullspace_T, ises, transpose=True, near=False) - cctx._near_nullspace = coarsen_nullspace(cV, injection, fctx._near_nullspace) + cctx._near_nullspace = coarsen_nullspace(cV, inject_petscmat, fctx._near_nullspace) cctx.set_nullspace(cctx._near_nullspace, ises, transpose=False, near=True) + cctx._nullspace_T = coarsen_nullspace(cV, interp_petscmat, fctx._nullspace_T) + cctx.set_nullspace(cctx._nullspace_T, ises, transpose=True, near=False) return cdm def coarsen_quadrature(self, metadata, fdeg, cdeg): @@ -315,25 +325,19 @@ def coarsen_bcs(self, fbcs, cV): for index in bc._indices: cV_ = cV_.sub(index) cbc_value = self.coarsen_bc_value(bc, cV_) - if type(bc) == firedrake.DirichletBC: - cbcs.append(firedrake.DirichletBC(cV_, cbc_value, - bc.sub_domain)) + if isinstance(bc, firedrake.DirichletBC): + cbcs.append(bc.reconstruct(V=cV, g=cbc_value)) else: raise NotImplementedError("Unsupported BC type, please get in touch if you need this") return cbcs @staticmethod @lru_cache(maxsize=20) - def create_transfer(cctx, fctx, mat_type, cbcs, fbcs, inject): + def create_transfer(cctx, fctx, mat_type, cbcs, fbcs): cbcs = cctx._problem.bcs if cbcs else [] fbcs = fctx._problem.bcs if fbcs else [] - if inject: - cV = cctx._problem.u - fV = fctx._problem.u - else: - cV = cctx.J.arguments()[0].function_space() - fV = fctx.J.arguments()[0].function_space() - + cV = cctx.J.arguments()[0].function_space() + fV = fctx.J.arguments()[0].function_space() if mat_type == "matfree": return prolongation_matrix_matfree(fV, cV, fbcs, cbcs) elif mat_type == "aij": @@ -344,13 +348,12 @@ def create_transfer(cctx, fctx, mat_type, cbcs, fbcs, inject): def create_interpolation(self, dmc, dmf): prefix = dmc.getOptionsPrefix() mat_type = PETSc.Options(prefix).getString("mg_levels_transfer_mat_type", default="matfree") - return self.create_transfer(get_appctx(dmc), get_appctx(dmf), mat_type, True, False, False), None + return self.create_transfer(get_appctx(dmc), get_appctx(dmf), mat_type, True, False), None def create_injection(self, dmc, dmf): prefix = dmc.getOptionsPrefix() mat_type = PETSc.Options(prefix).getString("mg_levels_transfer_mat_type", default="matfree") - I = self.create_transfer(get_appctx(dmf), get_appctx(dmc), mat_type, False, False, True) - return PETSc.Mat().createTranspose(I) + return self.create_transfer(get_appctx(dmf), get_appctx(dmc), mat_type, False, False) @staticmethod def max_degree(ele): @@ -392,7 +395,7 @@ def reconstruct_degree(ele, degree): if isinstance(ele, ufl.VectorElement): return type(ele)(PMGBase.reconstruct_degree(ele._sub_element, degree), dim=ele.num_sub_elements()) elif isinstance(ele, ufl.TensorElement): - return type(ele)(PMGBase.reconstruct_degree(ele._sub_element, degree), shape=ele.value_shape(), symmetry=ele.symmetry()) + return type(ele)(PMGBase.reconstruct_degree(ele._sub_element, degree), shape=ele._shape, symmetry=ele.symmetry()) elif isinstance(ele, ufl.EnrichedElement): shift = degree-PMGBase.max_degree(ele) return type(ele)(*(PMGBase.reconstruct_degree(e, PMGBase.max_degree(e)+shift) for e in ele._elements)) @@ -404,8 +407,10 @@ def reconstruct_degree(ele, degree): return type(ele)(*(PMGBase.reconstruct_degree(e, PMGBase.max_degree(e)+shift) for e in ele.sub_elements())) elif isinstance(ele, ufl.WithMapping): return type(ele)(PMGBase.reconstruct_degree(ele.wrapee, degree), ele.mapping()) - elif isinstance(ele, (ufl.HDivElement, ufl.HCurlElement, ufl.BrokenElement, ufl.RestrictedElement)): + elif isinstance(ele, (ufl.HDivElement, ufl.HCurlElement, ufl.BrokenElement)): return type(ele)(PMGBase.reconstruct_degree(ele._element, degree)) + elif isinstance(ele, ufl.RestrictedElement): + return type(ele)(PMGBase.reconstruct_degree(ele._element, degree), restriction_domain=ele._restriction_domain) else: return ele.reconstruct(degree=degree) @@ -431,8 +436,7 @@ def configure_pmg(self, pc, pdm): # for the user, if they haven't already; I don't know any # other way to get PETSc to know this at the right time. opts = PETSc.Options(pc.getOptionsPrefix() + "pmg_") - if "mg_coarse_pc_mg_levels" not in opts: - opts["mg_coarse_pc_mg_levels"] = odm.getRefineLevel() + 1 + opts["mg_coarse_pc_mg_levels"] = odm.getRefineLevel() + 1 return ppc @@ -443,7 +447,10 @@ def applyTranspose(self, pc, x, y): return self.ppc.applyTranspose(x, y) def coarsen_bc_value(self, bc, cV): - return firedrake.zero(cV.shape) + return 0 + + def coarsen_residual(self, Fc, Jc, uc): + return ufl.action(Jc, uc) class PMGSNES(SNESBase, PMGBase): @@ -475,10 +482,8 @@ def configure_pmg(self, snes, pdm): # for the user, if they haven't already; I don't know any # other way to get PETSc to know this at the right time. opts = PETSc.Options(snes.getOptionsPrefix() + "pfas_") - if "fas_coarse_pc_mg_levels" not in opts: - opts["fas_coarse_pc_mg_levels"] = odm.getRefineLevel() + 1 - if "fas_coarse_snes_fas_levels" not in opts: - opts["fas_coarse_snes_fas_levels"] = odm.getRefineLevel() + 1 + opts["fas_coarse_pc_mg_levels"] = odm.getRefineLevel() + 1 + opts["fas_coarse_snes_fas_levels"] = odm.getRefineLevel() + 1 return psnes @@ -499,6 +504,89 @@ def coarsen_bc_value(self, bc, cV): coarse.interpolate(bc._original_arg) return coarse + def coarsen_residual(self, Fc, Jc, uc): + return Fc + + +def load_c_code(code, name, argtypes, comm): + from pyop2.compilation import load + from pyop2.utils import get_petsc_dir + cppargs = ["-I%s/include" % d for d in get_petsc_dir()] + ldargs = (["-L%s/lib" % d for d in get_petsc_dir()] + + ["-Wl,-rpath,%s/lib" % d for d in get_petsc_dir()] + + ["-lpetsc", "-lm"]) + return load(code, "c", name, argtypes=argtypes, + cppargs=cppargs, ldargs=ldargs, + comm=comm) + + +def reference_moments(*args, **kwargs): + import ctypes + from tsfc import compile_form + quad_degree = 1+sum([PMGBase.max_degree(t.ufl_element()) for t in args]) + form = ufl.inner(*args)*ufl.dx(degree=quad_degree) + kernel, = compile_form(form, parameters=dict(mode="spectral"), + log=PETSc.Log.isActive(), **kwargs) + op2kernel = op2.Kernel(kernel.ast, kernel.name, + requires_zeroed_output_arguments=True, + flop_count=kernel.flop_count, + events=(kernel.event,)) + code = op2kernel.code.gencode().replace("static inline void", "void") + coords = None + mesh = form.ufl_domain() + if len(kernel.arguments) > 3-len(form.arguments()): + mesh_element = mesh.coordinates.function_space().finat_element + nodes = mesh_element.fiat_equivalent.dual.get_nodes() + points = [list(node.get_point_dict().keys())[0] for node in nodes] + coords = numpy.array(points, dtype=PETSc.ScalarType) + + argtypes = [ctypes.c_voidp]*len(kernel.arguments) + funptr = load_c_code(code, op2kernel.code.name, argtypes, mesh.comm) + + def _wrapper(*args): + args[0].fill(0.0E0) + _args = list(args) + if coords is not None: + _args.insert(1, coords) + return funptr(*[a.ctypes.data for a in _args]) + + return _wrapper + + +@lru_cache(maxsize=10) +def matfree_reference_prolongator(Vf, Vc): + dimf = Vf.value_size * Vf.finat_element.space_dimension() + dimc = Vc.value_size * Vc.finat_element.space_dimension() + build_Afc = reference_moments(ufl.TestFunction(Vf), ufl.TrialFunction(Vc)) + apply_Aff = reference_moments(ufl.TestFunction(Vf), ufl.Coefficient(Vf)) + diag_Aff = reference_moments(ufl.TestFunction(Vf), ufl.TrialFunction(Vf), diagonal=True) + Ax = numpy.empty((dimf,), dtype=PETSc.ScalarType) + Dx = numpy.empty((dimf,), dtype=PETSc.ScalarType) + diagonal = numpy.empty((dimf,), dtype=PETSc.ScalarType) + result = numpy.empty((dimf, dimc), dtype=PETSc.ScalarType) + + def _afun(x): + nonlocal Ax, Dx, diagonal + numpy.multiply(x, diagonal, out=Dx) + apply_Aff(Ax, Dx) + numpy.multiply(Ax, diagonal, out=Ax) + return Ax + + if Vf.comm.rank == 0: + from scipy.sparse.linalg import cg, LinearOperator + build_Afc(result) + diag_Aff(diagonal) + numpy.sqrt(diagonal, out=diagonal) + numpy.reciprocal(diagonal, out=diagonal) + A = LinearOperator((dimf, dimf), _afun, dtype=result.dtype) + for k in range(dimc): + numpy.multiply(result[:, k], diagonal, out=result[:, k]) + result[:, k], _ = cg(A, result[:, k], tol=1E-12) + numpy.multiply(result[:, k], diagonal, out=result[:, k]) + + result = Vf.comm.bcast(result, root=0) + return result + def prolongation_transfer_kernel_action(Vf, expr): from tsfc import compile_expression_dual_evaluation @@ -515,12 +603,12 @@ def prolongation_transfer_kernel_action(Vf, expr): events=(kernel.event,)), coefficients -@lru_cache(maxsize=10) def expand_element(ele): """ Expand a FiniteElement as an EnrichedElement of TensorProductElements, discarding modifiers. """ - if ele.cell() == ufl.quadrilateral: + + if ele.cell().cellname().startswith("quadrilateral"): quadrilateral_tpc = ufl.TensorProductCell(ufl.interval, ufl.interval) return expand_element(ele.reconstruct(cell=quadrilateral_tpc)) elif ele.cell() == ufl.hexahedron: @@ -528,12 +616,14 @@ def expand_element(ele): return expand_element(ele.reconstruct(cell=hexahedron_tpc)) elif isinstance(ele, (ufl.TensorElement, ufl.VectorElement)): return expand_element(ele._sub_element) - elif isinstance(ele, (ufl.HDivElement, ufl.HCurlElement, ufl.BrokenElement, ufl.RestrictedElement)): + elif isinstance(ele, ufl.MixedElement): + return type(ele)(*[expand_element(e) for e in ele.sub_elements()]) + elif isinstance(ele, ufl.RestrictedElement): + return type(ele)(expand_element(ele._element), restriction_domain=ele._restriction_domain) + elif isinstance(ele, (ufl.HDivElement, ufl.HCurlElement, ufl.BrokenElement)): return expand_element(ele._element) elif isinstance(ele, ufl.WithMapping): return expand_element(ele.wrapee) - elif isinstance(ele, ufl.MixedElement): - return ufl.MixedElement(*[expand_element(e) for e in ele.sub_elements()]) elif isinstance(ele, ufl.EnrichedElement): terms = [] for e in ele._elements: @@ -542,6 +632,7 @@ def expand_element(ele): terms.extend(ee._elements) else: terms.append(ee) + cell, = set([t.cell() for t in terms]) return ufl.EnrichedElement(*terms) elif isinstance(ele, ufl.TensorProductElement): factors = [expand_element(e) for e in ele.sub_elements()] @@ -552,55 +643,188 @@ def expand_element(ele): f_factors = f.sub_elements() if isinstance(f, ufl.TensorProductElement) else (f,) new_terms.extend([t_factors + f_factors for t_factors in terms]) terms = new_terms + if len(terms) == 1: return ufl.TensorProductElement(*terms[0]) else: - return ufl.EnrichedElement(*[ufl.TensorProductElement(*k) for k in terms]) + terms = [ufl.TensorProductElement(*k) for k in terms] + cell, = set([t.cell() for t in terms]) + return ufl.EnrichedElement(*terms) else: return ele +def evaluate_dual(dual, element, key=None): + keys = set(tuple(phi.get_point_dict().keys()) for phi in dual) + pts = list(set(sum(keys, ()))) + if key is None: + key = (0, ) * len(pts[0]) + tab = element.tabulate(sum(key), pts)[key] + result = numpy.empty((len(dual), element.space_dimension()), dtype=tab.dtype) + zero = [(0.0, ())] + for k, phi in enumerate(dual): + wts = phi.get_point_dict() + wts = numpy.array([wts.get(pt, zero)[0][0] for pt in pts]) + result[k] = tab.dot(wts).T + return result + + +def compare_element(e1, e2): + if e1 is e2: + return True + if e1.space_dimension() != e2.space_dimension(): + return False + B = evaluate_dual(e1.dual_basis(), e2) + numpy.fill_diagonal(B, numpy.diagonal(B)-1.0) + return numpy.allclose(B, 0.0, rtol=1E-14, atol=1E-14) + + +def compare_dual(b1, b2): + p1 = b1.get_point_dict() + p2 = b2.get_point_dict() + if len(p1) != len(p2): + return False + + k1 = numpy.array(list(p1.keys())) + k2 = numpy.array(list(p2.keys())) + if not numpy.allclose(k1, k2, rtol=1E-16, atol=1E-16): + return False + + k1 = numpy.array([p1[k][0][0] for k in p1]) + k2 = numpy.array([p2[k][0][0] for k in p2]) + if not numpy.allclose(k1, k2, rtol=1E-16, atol=1E-16): + return False + return True + + +def compare_dual_basis(l1, l2): + if len(l1) != len(l2): + return False + for b1, b2 in zip(l1, l2): + if not compare_dual(b1, b2): + return False + return True + + +@lru_cache(maxsize=10) +@PETSc.Log.EventDecorator("GetLineElements") def get_line_elements(V): from FIAT.reference_element import LINE from tsfc.finatinterface import create_element ele = V.ufl_element() if isinstance(ele, ufl.MixedElement) and not isinstance(ele, (ufl.TensorElement, ufl.VectorElement)): raise ValueError("MixedElements are not decomposed into tensor products") - rvs = ele.reference_value_size() - ele = expand_element(ele) - if isinstance(ele, ufl.EnrichedElement): - ele = ele._elements[0] + ele = expand_element(ele) finat_ele = create_element(ele) - if rvs*finat_ele.space_dimension() != V.value_size*V.finat_element.space_dimension(): - raise ValueError("Failed to decompose %s into a single tensor product" % V.ufl_element()) - factors = finat_ele.factors if hasattr(finat_ele, "factors") else (finat_ele,) + if finat_ele.space_dimension() != V.finat_element.space_dimension(): + raise ValueError("Failed to decompose %s into tensor products" % V.ufl_element()) + + def cyclic_perm(a): + return [a[i:] + a[:i] for i in range(len(a))] + + permutations = [] line_elements = [] - for e in reversed(factors): - fiat_ele = e.fiat_equivalent - if fiat_ele.get_reference_element().shape != LINE: - raise ValueError("Expecting %s to be on the interval" % fiat_ele) - line_elements.append(fiat_ele) - return line_elements + axes_shifts = [] + + terms = finat_ele.elements if hasattr(finat_ele, "elements") else [finat_ele] + for term in terms: + factors = term.factors if hasattr(term, "factors") else (term,) + expansion = tuple(e.fiat_equivalent for e in reversed(factors)) + if not all([e.get_reference_element().shape == LINE for e in expansion]): + raise ValueError("Failed to decompose %s into line elements" % V.ufl_element()) + + shift = -1 + for k, perm in enumerate(permutations): + is_perm = all([e1.space_dimension() == e2.space_dimension() + for e1, e2 in zip(perm, expansion)]) + for e1, e2 in zip(perm, expansion): + if is_perm: + is_perm = compare_element(e1, e2) + + if is_perm: + shift = len(expansion) - k + axes_shifts[-1] = axes_shifts[-1] + (shift, ) + break + + if shift == -1: + line_elements.append(expansion) + axes_shifts.append((0, )) + permutations = cyclic_perm(expansion) + + return line_elements, axes_shifts @lru_cache(maxsize=10) -def get_line_interpolator(felem, celem): - from FIAT import functional, make_quadrature +def fiat_reference_prolongator(felem, celem, derivative=False): + from FIAT.reference_element import flatten_reference_cube + + ref_el = flatten_reference_cube(felem.get_reference_element()) + tdim = ref_el.get_spatial_dimension() + if derivative and tdim > 1: + raise NotImplementedError("Derivative prolongator is only available on the interval") + ckey = (felem.formdegree,) if derivative else (0,)*tdim + fkey = (celem.formdegree,) if derivative else (0,)*tdim + fdual = felem.dual_basis() cdual = celem.dual_basis() - if len(fdual) == len(cdual): - if all(f.get_point_dict() == c.get_point_dict() for f, c in zip(fdual, cdual)): - return numpy.array([]) + if fkey == ckey and compare_dual_basis(fdual, cdual): + return numpy.array([]) + return evaluate_dual(fdual, celem, ckey) + - if all(isinstance(phi, functional.PointEvaluation) for phi in fdual): - pts = [list(phi.get_point_dict().keys())[0] for phi in fdual] - return celem.tabulate(0, pts)[(0,)] +@lru_cache(maxsize=10) +def finat_reference_prolongator(felem, celem): + from finat.quadrature import make_quadrature + from gem.interpreter import evaluate + + ref_el = felem.cell + ndim = ref_el.get_spatial_dimension() + degree = felem.degree + try: + degree = max(degree) + except TypeError: + pass + quad_degree = 2*degree+1 + + def _tabulate(e, ps, entity=None): + results = evaluate(e.basis_evaluation(0, ps, entity).values()) + return results[0].arr.reshape((len(ps.points), -1)) + + is_facet_element = True + entity_dofs = felem.entity_dofs() + for key in entity_dofs: + v = sum(list(entity_dofs[key].values()), []) + if len(v): + edim = sum(key) if type(key) == tuple else key + if edim == ndim: + is_facet_element = False + + if is_facet_element and degree > 5: + entities = [] + quadratures = [] + for key in ref_el.sub_entities: + edim = sum(key) if type(key) == tuple else key + if edim == ndim-1: + sub_entities = ref_el.sub_entities[key] + entities.extend([(key, f) for f in sub_entities]) + quadratures.extend([make_quadrature(ref_el.construct_subelement(key), quad_degree)]*len(sub_entities)) + + wts = numpy.concatenate([evaluate([q.weight_expression])[0].arr.reshape((-1,)) for q in quadratures]) + cphi = numpy.concatenate([_tabulate(celem, q.point_set, entity=e) for q, e in zip(quadratures, entities)]).T + fphi = numpy.concatenate([_tabulate(felem, q.point_set, entity=e) for q, e in zip(quadratures, entities)]).T else: - pts = make_quadrature(felem.get_reference_element(), - felem.space_dimension()).get_points() - return numpy.dot(celem.tabulate(0, pts)[(0,)], - numpy.linalg.inv(felem.tabulate(0, pts)[(0,)])) + quadrature = make_quadrature(ref_el, quad_degree) + wts = evaluate([quadrature.weight_expression])[0].arr.reshape((-1,)) + cphi = _tabulate(celem, quadrature.point_set).T + fphi = _tabulate(felem, quadrature.point_set).T + + numpy.sqrt(wts, out=wts) + numpy.multiply(fphi, wts, out=fphi) + numpy.multiply(cphi, wts, out=cphi) + cphi = cphi.reshape((celem.space_dimension(), -1)) + fphi = fphi.reshape((felem.space_dimension(), -1)) + return numpy.linalg.solve(fphi.dot(fphi.T), fphi.dot(cphi.T)) # Common kernel to compute y = kron(A3, kron(A2, A1)) * x @@ -609,7 +833,7 @@ def get_line_interpolator(felem, celem): #include #include -static inline void kronmxv(PetscBLASInt tflag, +static inline void kronmxv_inplace(PetscBLASInt tflag, PetscBLASInt mx, PetscBLASInt my, PetscBLASInt mz, PetscBLASInt nx, PetscBLASInt ny, PetscBLASInt nz, PetscBLASInt nel, PetscScalar *A1, PetscScalar *A2, PetscScalar *A3, @@ -675,42 +899,206 @@ def get_line_interpolator(felem, celem): *y = ptr[ires]; return; } -""" +static inline void kronmxv(PetscBLASInt tflag, + PetscBLASInt mx, PetscBLASInt my, PetscBLASInt mz, + PetscBLASInt nx, PetscBLASInt ny, PetscBLASInt nz, PetscBLASInt nel, + PetscScalar *A1, PetscScalar *A2, PetscScalar *A3, + PetscScalar *x, PetscScalar *y, PetscScalar *xwork, PetscScalar *ywork){ + + PetscScalar *ptr[2] = {xwork, ywork}; + + if(ptr[0] != x) + for(PetscBLASInt j=0; j 3: - raise ValueError("More than three factors are not supported") + if(ptr[1] != y) + for(PetscBLASInt j=0; j 3 or len(celems) > 3: + raise ValueError("The expansion is too complicated") + + shifts = fshifts + in_place = False + if len(felems) == len(celems): + in_place = all([(len(fs)*Vf.value_size == len(cs)*Vc.value_size) for fs, cs in zip(fshifts, cshifts)]) + psize = Vf.value_size + + if not in_place: + if len(celems) == 1: + psize = Vc.value_size + pelem = celems[0] + perm_name = "perm_%s" % t_in + celems = celems*len(felems) + elif len(felems) == 1: + shifts = cshifts + psize = Vf.value_size + pelem = felems[0] + perm_name = "perm_%s" % t_out + felems = felems*len(celems) + else: + raise ValueError("Cannot assign fine to coarse DOFs") + + for k in range(len(shifts)): + if Vc.value_size*len(shifts[k]) < Vf.value_size: + shifts[k] = shifts[k]*(Vf.value_size//Vc.value_size) + + perm = sum(shifts, tuple()) + perm_data = ", ".join(map(str, perm)) + operator_decl.append(f""" + PetscBLASInt {perm_name}[{len(perm)}] = {{ {perm_data} }}; + """) + + pshape = [e.space_dimension() for e in pelem] + pargs = ", ".join(map(str, pshape+[1]*(3-len(pshape)))) + pstride = psize * numpy.prod(pshape) + if shifts == fshifts: + prolong_code.append(f""" + for({IntType_c} j=1; j<{len(perm)}; j++) + permute_axis({perm_name}[j], {pargs}, {psize}, {t_in}, {t_in}+j*{pstride}); + """) + restrict_code.append(f""" + for({IntType_c} j=1; j<{len(perm)}; j++) + ipermute_axis({perm_name}[j], {pargs}, {psize}, {t_in}, {t_in}+j*{pstride}); + """) + + fskip = 0 + cskip = 0 + Jlen = 0 + Jmats = [] + fshapes = [] + cshapes = [] + has_code = False + for felem, celem, shift in zip(felems, celems, shifts): + if len(felem) != len(celem): + raise ValueError("Fine and coarse elements do not have the same number of factors") + if len(felem) > 3: + raise ValueError("More than three factors are not supported") + + # Declare array shapes to be used as literals inside the kernels + nscal = psize*len(shift) + fshape = [e.space_dimension() for e in felem] + cshape = [e.space_dimension() for e in celem] + fshapes.append((nscal,) + tuple(fshape)) + cshapes.append((nscal,) + tuple(cshape)) + + J = [fiat_reference_prolongator(fe, ce).T for fe, ce in zip(felem, celem)] + if any([Jk.size and numpy.isclose(Jk, 0.0E0).all() for Jk in J]): + prolong_code.append(f""" + for({IntType_c} i=0; i<{nscal*numpy.prod(fshape)}; i++) {t_out}[i+{fskip}] = 0.0E0; + """) + restrict_code.append(f""" + for({IntType_c} i=0; i<{nscal*numpy.prod(cshape)}; i++) {t_in}[i+{cskip}] = 0.0E0; + """) + else: + Jsize = numpy.cumsum([Jlen]+[Jk.size for Jk in J]) + Jptrs = ["%s+%d" % (mat_name, Jsize[k]) if J[k].size else "NULL" for k in range(len(J))] + Jmats.extend(J) + Jlen = Jsize[-1] + + # The Kronecker product routines assume 3D shapes, so in 1D and 2D we pass NULL instead of J + Jargs = ", ".join(Jptrs+["NULL"]*(3-len(Jptrs))) + fargs = ", ".join(map(str, fshape+[1]*(3-len(fshape)))) + cargs = ", ".join(map(str, cshape+[1]*(3-len(cshape)))) + if in_place: + prolong_code.append(f""" + kronmxv_inplace(0, {fargs}, {cargs}, {nscal}, {Jargs}, &{t_in}, &{t_out}); + """) + restrict_code.append(f""" + kronmxv_inplace(1, {cargs}, {fargs}, {nscal}, {Jargs}, &{t_out}, &{t_in}); + """) + elif shifts == fshifts: + if has_code and psize > 1: + raise ValueError("Single tensor product to many tensor products not implemented for vectors") + # Single tensor product to many + prolong_code.append(f""" + kronmxv(0, {fargs}, {cargs}, {nscal}, {Jargs}, {t_in}+{cskip}, {t_out}+{fskip}, {scratch}, {t_out}+{fskip}); + """) + restrict_code.append(f""" + kronmxv(1, {cargs}, {fargs}, {nscal}, {Jargs}, {t_out}+{fskip}, {t_in}+{cskip}, {t_out}+{fskip}, {scratch}); + """) + else: + # Many tensor products to single tensor product + if has_code: + raise ValueError("Many tensor products to single tensor product not implemented") + fskip = 0 + prolong_code.append(f""" + kronmxv(0, {fargs}, {cargs}, {nscal}, {Jargs}, {t_in}+{cskip}, {t_out}+{fskip}, {t_in}+{cskip}, {t_out}+{fskip}); + """) + restrict_code.append(f""" + kronmxv(1, {cargs}, {fargs}, {nscal}, {Jargs}, {t_out}+{fskip}, {t_in}+{cskip}, {t_out}+{fskip}, {t_in}+{cskip}); + """) + has_code = True + fskip += nscal*numpy.prod(fshape) + cskip += nscal*numpy.prod(cshape) # Pass the 1D interpolators as a hexadecimal string - J = [get_line_interpolator(fe, ce) for fe, ce in zip(felems, celems)] - Jdata = ", ".join(map(float.hex, chain(*[Jk.flat for Jk in J]))) - Jsize = numpy.cumsum([0]+[Jk.size for Jk in J]) - Jptrs = ["%s+%d" % (mat_name, Jsize[k]) if J[k].size else "NULL" for k in range(len(J))] - - # The Kronecker product routines assume 3D shapes, so in 1D and 2D we pass NULL instead of J - Jargs = ", ".join(Jptrs+["NULL"]*(3-len(Jptrs))) - fargs = ", ".join(map(str, fshape+[1]*(3-len(fshape)))) - cargs = ", ".join(map(str, cshape+[1]*(3-len(cshape)))) - operator_decl = f""" - PetscScalar {mat_name}[{Jsize[-1]}] = {{ {Jdata} }}; - """ - prolong_code = f""" - kronmxv(0, {fargs}, {cargs}, {nscal}, {Jargs}, &{t_in}, &{t_out}); - """ - restrict_code = f""" - kronmxv(1, {cargs}, {fargs}, {nscal}, {Jargs}, &{t_out}, &{t_in}); - """ + Jdata = ", ".join(map(float.hex, chain(*[Jk.flat for Jk in Jmats]))) + operator_decl.append(f""" + PetscScalar {mat_name}[{Jlen}] = {{ {Jdata} }}; + """) + + operator_decl = "".join(operator_decl) + prolong_code = "".join(prolong_code) + restrict_code = "".join(reversed(restrict_code)) + shapes = [tuple(map(max, zip(*fshapes))), tuple(map(max, zip(*cshapes)))] return operator_decl, prolong_code, restrict_code, shapes @@ -754,6 +1142,8 @@ def cache_generate_code(kernel, comm): def make_mapping_code(Q, fmapping, cmapping, t_in, t_out): + if fmapping == cmapping: + return None domain = Q.ufl_domain() A = get_piola_tensor(cmapping, domain, inverse=False) B = get_piola_tensor(fmapping, domain, inverse=True) @@ -795,28 +1185,20 @@ def make_mapping_code(Q, fmapping, cmapping, t_in, t_out): return coef_decl, prolong_code, restrict_code, mapping_code, coefficients -def get_axes_shift(ele): - """Return the form degree of a FInAT element after discarding modifiers""" - if hasattr(ele, "element"): - return get_axes_shift(ele.element) - else: - return ele.formdegree - - def make_permutation_code(V, vshape, pshape, t_in, t_out, array_name): - shift = get_axes_shift(V.finat_element) - tdim = V.mesh().topological_dimension() - if shift % tdim: + _, shifts = get_line_elements(V) + shift = shifts[0] + if shift != (0,): ndof = numpy.prod(vshape) permutation = numpy.reshape(numpy.arange(ndof), pshape) - axes = numpy.arange(tdim) + axes = numpy.arange(len(shift)) for k in range(permutation.shape[0]): - permutation[k] = numpy.reshape(numpy.transpose(permutation[k], axes=numpy.roll(axes, -shift*k)), pshape[1:]) + permutation[k] = numpy.reshape(numpy.transpose(permutation[k], axes=numpy.roll(axes, -shift[k])), pshape[1:]) nflip = 0 mapping = V.ufl_element().mapping().lower() if mapping == "contravariant piola": # flip the sign of the first component - nflip = ndof//tdim + nflip = ndof//len(shift) elif mapping == "covariant piola": # flip the order of reference components permutation = numpy.flip(permutation, axis=0) @@ -850,30 +1232,38 @@ def make_permutation_code(V, vshape, pshape, t_in, t_out, array_name): return decl, prolong, restrict +@PETSc.Log.EventDecorator("GetPermutedMap") def get_permuted_map(V): """ Return a PermutedMap with the same tensor product shape for every component of H(div) or H(curl) tensor product elements """ - shift = get_axes_shift(V.finat_element) - if shift % V.mesh().topological_dimension() == 0: + expansion, shifts = get_line_elements(V) + if {(0, )} == set(shifts): return V.cell_node_map() - elements = get_line_elements(V) - axes = numpy.arange(len(elements)) - pshape = [-1] + [e.space_dimension() for e in elements] - permutation = numpy.reshape(numpy.arange(V.finat_element.space_dimension()), pshape) - for k in range(permutation.shape[0]): - permutation[k] = numpy.reshape(numpy.transpose(permutation[k], axes=numpy.roll(axes, shift*k)), pshape[1:]) + istart = 0 + perm = [] + for factors, shift in zip(expansion, shifts): + axes = numpy.arange(len(factors)) + pshape = [len(shift)] + [e.space_dimension() for e in factors] + iend = istart + numpy.prod(pshape) + permutation = numpy.reshape(numpy.arange(istart, iend), pshape) + for k in range(permutation.shape[0]): + permutation[k] = numpy.reshape(numpy.transpose(permutation[k], axes=numpy.roll(axes, shift[k])), pshape[1:]) + perm.extend(permutation.flat) + istart = iend - permutation = numpy.reshape(permutation, (-1,)) - return PermutedMap(V.cell_node_map(), permutation) + return PermutedMap(V.cell_node_map(), perm) class StandaloneInterpolationMatrix(object): """ Interpolation matrix for a single standalone space. """ + + _cache_work = {} + def __init__(self, Vf, Vc, Vf_bcs, Vc_bcs): self.Vf_bcs = Vf_bcs self.Vc_bcs = Vc_bcs @@ -881,29 +1271,47 @@ def __init__(self, Vf, Vc, Vf_bcs, Vc_bcs): self.uf = Vf Vf = Vf.function_space() else: - self.uf = firedrake.Function(Vf) + self.uf = self._cache_work.get(Vf, firedrake.Function(Vf)) + self._cache_work[Vf] = self.uf if isinstance(Vc, firedrake.Function): self.uc = Vc Vc = Vc.function_space() else: - self.uc = firedrake.Function(Vc) - - self.weight = self.multiplicity(Vf) - with self.weight.dat.vec as w: + self.uc = self._cache_work.get(Vc, firedrake.Function(Vc)) + self._cache_work[Vc] = self.uc + self.Vf = Vf + self.Vc = Vc + + @cached_property + def _weight(self): + weight = firedrake.Function(self.Vf) + size = self.Vf.finat_element.space_dimension() * self.Vf.value_size + kernel_code = f""" + void weight(PetscScalar *restrict w){{ + for(PetscInt i=0; i<{size}; i++) w[i] += 1.0; + return; + }} + """ + kernel = op2.Kernel(kernel_code, "weight", requires_zeroed_output_arguments=True) + op2.par_loop(kernel, weight.cell_set, weight.dat(op2.INC, weight.cell_node_map())) + with weight.dat.vec as w: w.reciprocal() + return weight + @cached_property + def _kernels(self): try: - uf_map = get_permuted_map(Vf) - uc_map = get_permuted_map(Vc) - prolong_kernel, restrict_kernel, coefficients = self.make_blas_kernels(Vf, Vc) + uf_map = get_permuted_map(self.Vf) + uc_map = get_permuted_map(self.Vc) + prolong_kernel, restrict_kernel, coefficients = self.make_blas_kernels(self.Vf, self.Vc) prolong_args = [prolong_kernel, self.uf.cell_set, self.uf.dat(op2.INC, uf_map), self.uc.dat(op2.READ, uc_map), - self.weight.dat(op2.READ, uf_map)] + self._weight.dat(op2.READ, uf_map)] except ValueError: - uf_map = Vf.cell_node_map() - uc_map = Vc.cell_node_map() - prolong_kernel, restrict_kernel, coefficients = self.make_kernels(Vf, Vc) + uf_map = self.Vf.cell_node_map() + uc_map = self.Vc.cell_node_map() + prolong_kernel, restrict_kernel, coefficients = self.make_kernels(self.Vf, self.Vc) prolong_args = [prolong_kernel, self.uf.cell_set, self.uf.dat(op2.WRITE, uf_map), self.uc.dat(op2.READ, uc_map)] @@ -911,10 +1319,38 @@ def __init__(self, Vf, Vc, Vf_bcs, Vc_bcs): restrict_args = [restrict_kernel, self.uf.cell_set, self.uc.dat(op2.INC, uc_map), self.uf.dat(op2.READ, uf_map), - self.weight.dat(op2.READ, uf_map)] + self._weight.dat(op2.READ, uf_map)] coefficient_args = [c.dat(op2.READ, c.cell_node_map()) for c in coefficients] - self._prolong = partial(op2.par_loop, *prolong_args, *coefficient_args) - self._restrict = partial(op2.par_loop, *restrict_args, *coefficient_args) + prolong = partial(op2.par_loop, *prolong_args, *coefficient_args) + restrict = partial(op2.par_loop, *restrict_args, *coefficient_args) + return prolong, restrict + + def view(self, mat, viewer=None): + if viewer is None: + return + typ = viewer.getType() + if typ != PETSc.Viewer.Type.ASCII: + return + viewer.printfASCII("Firedrake matrix-free prolongator %s\n" % + type(self).__name__) + + def getInfo(self, mat, info=None): + from mpi4py import MPI + memory = self.uf.dat.nbytes + self.uc.dat.nbytes + if self._weight is not None: + memory += self._weight.dat.nbytes + if info is None: + info = PETSc.Mat.InfoType.GLOBAL_SUM + if info == PETSc.Mat.InfoType.LOCAL: + return {"memory": memory} + elif info == PETSc.Mat.InfoType.GLOBAL_SUM: + gmem = mat.comm.tompi4py().allreduce(memory, op=MPI.SUM) + return {"memory": gmem} + elif info == PETSc.Mat.InfoType.GLOBAL_MAX: + gmem = mat.comm.tompi4py().allreduce(memory, op=MPI.MAX) + return {"memory": gmem} + else: + raise ValueError("Unknown info type %s" % info) @staticmethod def make_blas_kernels(Vf, Vc): @@ -922,8 +1358,8 @@ def make_blas_kernels(Vf, Vc): Interpolation and restriction kernels between CG / DG tensor product spaces on quads and hexes. - Works by tabulating the coarse 1D Lagrange basis - functions as the (fdegree+1)-by-(cdegree+1) matrix Jhat, + Works by tabulating the coarse 1D basis functions + as the (fdegree+1)-by-(cdegree+1) matrix Jhat, and using the fact that the 2D / 3D tabulation is the tensor product J = kron(Jhat, kron(Jhat, Jhat)) """ @@ -936,43 +1372,47 @@ def make_blas_kernels(Vf, Vc): coefficients = [] mapping_code = "" coef_decl = "" + if fmapping == cmapping: # interpolate on each direction via Kroncker product - operator_decl, prolong_code, restrict_code, shapes = make_kron_code(Vf, Vc, "t0", "t1", "J0") + operator_decl, prolong_code, restrict_code, shapes = make_kron_code(Vf, Vc, "t0", "t1", "J0", "t2") else: decl = [""]*4 prolong = [""]*5 restrict = [""]*5 # get embedding element for Vf with identity mapping and collocated vector component DOFs try: - Q = Vf if fmapping == "identity" else firedrake.FunctionSpace(Vf.ufl_domain(), - felem.reconstruct(mapping="identity")) - mapping_output = make_mapping_code(Q, fmapping, cmapping, "t0", "t1") + qelem = felem + if qelem.mapping() != "identity": + qelem = qelem.reconstruct(mapping="identity") + Qf = Vf if qelem == felem else firedrake.FunctionSpace(Vf.ufl_domain(), qelem) + mapping_output = make_mapping_code(Qf, fmapping, cmapping, "t0", "t1") in_place_mapping = True except Exception: - Qe = ufl.FiniteElement("DQ", cell=felem.cell(), degree=PMGBase.max_degree(felem)) + qelem = ufl.FiniteElement("DQ", cell=felem.cell(), degree=PMGBase.max_degree(felem)) if felem.value_shape(): - Qe = ufl.TensorElement(Qe, shape=felem.value_shape(), symmetry=felem.symmetry()) - Q = firedrake.FunctionSpace(Vf.ufl_domain(), Qe) - mapping_output = make_mapping_code(Q, fmapping, cmapping, "t0", "t1") - - qshape = (Q.value_size, Q.finat_element.space_dimension()) - # interpolate to embedding fine space, permute to FInAT ordering, and apply the mapping - decl[0], prolong[0], restrict[0], shapes = make_kron_code(Q, Vc, "t0", "t1", "J0") - decl[1], restrict[1], prolong[1] = make_permutation_code(Vc, qshape, shapes[0], "t0", "t1", "perm0") - coef_decl, prolong[2], restrict[2], mapping_code, coefficients = mapping_output - - if not in_place_mapping: - # permute to Kronecker-friendly ordering and interpolate to fine space - decl[2], prolong[3], restrict[3] = make_permutation_code(Vf, qshape, shapes[0], "t1", "t0", "perm1") - decl[3], prolong[4], restrict[4], _shapes = make_kron_code(Vf, Q, "t0", "t1", "J1") - shapes.extend(_shapes) + qelem = ufl.TensorElement(qelem, shape=felem._shape, symmetry=felem.symmetry()) + Qf = firedrake.FunctionSpace(Vf.ufl_domain(), qelem) + mapping_output = make_mapping_code(Qf, fmapping, cmapping, "t0", "t1") + + qshape = (Qf.value_size, Qf.finat_element.space_dimension()) + # interpolate to embedding fine space + decl[0], prolong[0], restrict[0], shapes = make_kron_code(Qf, Vc, "t0", "t1", "J0", "t2") + + if mapping_output is not None: + # permute to FInAT ordering, and apply the mapping + decl[1], restrict[1], prolong[1] = make_permutation_code(Vc, qshape, shapes[0], "t0", "t1", "perm0") + coef_decl, prolong[2], restrict[2], mapping_code, coefficients = mapping_output + if not in_place_mapping: + # permute to Kronecker-friendly ordering and interpolate to fine space + decl[2], prolong[3], restrict[3] = make_permutation_code(Vf, qshape, shapes[0], "t1", "t0", "perm1") + decl[3], prolong[4], restrict[4], _shapes = make_kron_code(Vf, Qf, "t0", "t1", "J1", "t2") + shapes.extend(_shapes) operator_decl = "".join(decl) prolong_code = "".join(prolong) restrict_code = "".join(reversed(restrict)) - lwork = numpy.prod([max(*dims) for dims in zip(*shapes)]) # FInAT elements order the component DOFs related to the same node contiguously. # We transpose before and after the multiplication times J to have each component # stored contiguously as a scalar field, thus reducing the number of dgemm calls. @@ -982,6 +1422,10 @@ def make_blas_kernels(Vf, Vc): fshape = (Vf.value_size, Vf.finat_element.space_dimension()) cshape = (Vc.value_size, Vc.finat_element.space_dimension()) + + lwork = numpy.prod([max(*dims) for dims in zip(*shapes)]) + lwork = max(lwork, max(numpy.prod(fshape), numpy.prod(cshape))) + if cshape[0] == 1: coarse_read = f"""for({IntType_c} i=0; i<{numpy.prod(cshape)}; i++) t0[i] = x[i];""" coarse_write = f"""for({IntType_c} i=0; i<{numpy.prod(cshape)}; i++) x[i] += t0[i];""" @@ -1017,9 +1461,10 @@ def make_blas_kernels(Vf, Vc): void prolongation(PetscScalar *restrict y, const PetscScalar *restrict x, const PetscScalar *restrict w{coef_decl}){{ - PetscScalar work[2][{lwork}]; + PetscScalar work[3][{lwork}] = {{0.0E0}}; PetscScalar *t0 = work[0]; PetscScalar *t1 = work[1]; + PetscScalar *t2 = work[2]; {operator_decl} {coarse_read} {prolong_code} @@ -1029,9 +1474,10 @@ def make_blas_kernels(Vf, Vc): void restriction(PetscScalar *restrict x, const PetscScalar *restrict y, const PetscScalar *restrict w{coef_decl}){{ - PetscScalar work[2][{lwork}]; + PetscScalar work[3][{lwork}] = {{0.0E0}}; PetscScalar *t0 = work[0]; PetscScalar *t1 = work[1]; + PetscScalar *t2 = work[2]; {operator_decl} {fine_read} {restrict_code} @@ -1052,47 +1498,67 @@ def make_kernels(self, Vf, Vc): This is temporary while we wait for dual evaluation in FInAT. """ - prolong_kernel, _ = prolongation_transfer_kernel_action(Vf, self.uc) - matrix_kernel, coefficients = prolongation_transfer_kernel_action(Vf, firedrake.TestFunction(Vc)) - # The way we transpose the prolongation kernel is suboptimal. - # A local matrix is generated each time the kernel is executed. - element_kernel = loopy.generate_code_v2(matrix_kernel.code).device_code() - element_kernel = element_kernel.replace("void expression_kernel", "static void expression_kernel") - dimc = Vc.finat_element.space_dimension() * Vc.value_size - dimf = Vf.finat_element.space_dimension() * Vf.value_size - - coef_args = "".join([", c%d" % i for i in range(len(coefficients))]) - coef_decl = "".join([", const %s *restrict c%d" % (ScalarType_c, i) for i in range(len(coefficients))]) - restrict_code = f""" - {element_kernel} - - void restriction({ScalarType_c} *restrict Rc, const {ScalarType_c} *restrict Rf, const {ScalarType_c} *restrict w{coef_decl}) - {{ - {ScalarType_c} Afc[{dimf}*{dimc}] = {{0}}; - expression_kernel(Afc{coef_args}); - for ({IntType_c} i = 0; i < {dimf}; i++) - for ({IntType_c} j = 0; j < {dimc}; j++) - Rc[j] += Afc[i*{dimc} + j] * Rf[i] * w[i]; - }} - """ - restrict_kernel = op2.Kernel(restrict_code, "restriction", requires_zeroed_output_arguments=True) - return prolong_kernel, restrict_kernel, coefficients + try: + prolong_kernel, _ = prolongation_transfer_kernel_action(Vf, self.uc) + matrix_kernel, coefficients = prolongation_transfer_kernel_action(Vf, firedrake.TestFunction(Vc)) + # The way we transpose the prolongation kernel is suboptimal. + # A local matrix is generated each time the kernel is executed. + element_kernel = loopy.generate_code_v2(matrix_kernel.code).device_code() + element_kernel = element_kernel.replace("void expression_kernel", "static void expression_kernel") + coef_args = "".join([", c%d" % i for i in range(len(coefficients))]) + coef_decl = "".join([", const %s *restrict c%d" % (ScalarType_c, i) for i in range(len(coefficients))]) + dimc = Vc.finat_element.space_dimension() * Vc.value_size + dimf = Vf.finat_element.space_dimension() * Vf.value_size + restrict_code = f""" + {element_kernel} + + void restriction({ScalarType_c} *restrict Rc, const {ScalarType_c} *restrict Rf, const {ScalarType_c} *restrict w{coef_decl}) + {{ + {ScalarType_c} Afc[{dimf}*{dimc}] = {{0}}; + expression_kernel(Afc{coef_args}); + for ({IntType_c} i = 0; i < {dimf}; i++) + for ({IntType_c} j = 0; j < {dimc}; j++) + Rc[j] += Afc[i*{dimc} + j] * Rf[i] * w[i]; + }} + """ + restrict_kernel = op2.Kernel(restrict_code, "restriction", requires_zeroed_output_arguments=True) + except NotImplementedError: + if Vc.ufl_element().mapping() != Vf.ufl_element().mapping(): + raise NotImplementedError("Prolongation not supported from %s to %s" % (Vc.ufl_element(), Vf.ufl_element())) + if Vf.finat_element.space_dimension() < 400: + Jmat = finat_reference_prolongator(Vf.finat_element, Vc.finat_element) + else: + Jmat = matfree_reference_prolongator(Vf, Vc) + dimf, dimc = Jmat.shape + vsize = (Vc.value_size*Vc.finat_element.space_dimension())//dimc + Jdata = ", ".join(map(float.hex, Jmat.flat)) + kernel_code = f""" + void prolongation({ScalarType_c} *restrict uf, const {ScalarType_c} *restrict uc) + {{ + {ScalarType_c} Afc[{dimf}*{dimc}] = {{ {Jdata} }}; + for ({IntType_c} i = 0; i < {vsize}*{dimf}; i++) + uf[i] = 0.0E0; + + for ({IntType_c} i = 0; i < {dimf}; i++) + for ({IntType_c} j = 0; j < {dimc}; j++) + for ({IntType_c} k = 0; k < {vsize}; k++) + uf[i*{vsize}+k] += Afc[i*{dimc} + j] * uc[j*{vsize}+k]; + }} + + void restriction({ScalarType_c} *restrict Rc, const {ScalarType_c} *restrict Rf, const {ScalarType_c} *restrict w) + {{ + {ScalarType_c} Afc[{dimf}*{dimc}] = {{ {Jdata} }}; + for ({IntType_c} i = 0; i < {dimf}; i++) + for ({IntType_c} j = 0; j < {dimc}; j++) + for ({IntType_c} k = 0; k < {vsize}; k++) + Rc[j*{vsize}+k] += Afc[i*{dimc} + j] * Rf[i*{vsize}+k] * w[i*{vsize}+k]; + }} + """ + prolong_kernel = op2.Kernel(kernel_code, "prolongation", requires_zeroed_output_arguments=True) + restrict_kernel = op2.Kernel(kernel_code, "restriction", requires_zeroed_output_arguments=True) + coefficients = [] - @staticmethod - def multiplicity(V): - # Lawrence's magic code for calculating dof multiplicities - shapes = (V.finat_element.space_dimension(), - numpy.prod(V.shape)) - domain = "{[i,j]: 0 <= i < %d and 0 <= j < %d}" % shapes - instructions = """ - for i, j - w[i,j] = w[i,j] + 1 - end - """ - weight = firedrake.Function(V) - firedrake.par_loop((domain, instructions), firedrake.dx, - {"w": (weight, op2.INC)}, is_loopy_kernel=True) - return weight + return prolong_kernel, restrict_kernel, coefficients def multTranspose(self, mat, rf, rc): """ @@ -1105,7 +1571,7 @@ def multTranspose(self, mat, rf, rc): with self.uc.dat.vec_wo as uc: uc.set(0.0E0) - self._restrict() + self._kernels[1]() for bc in self.Vc_bcs: bc.zero(self.uc) @@ -1123,7 +1589,7 @@ def mult(self, mat, xc, xf, inc=False): with self.uf.dat.vec_wo as uf: uf.set(0.0E0) - self._prolong() + self._kernels[0]() for bc in self.Vf_bcs: bc.zero(self.uf) @@ -1146,25 +1612,29 @@ class MixedInterpolationMatrix(StandaloneInterpolationMatrix): """ Interpolation matrix for a mixed finite element space. """ - def __init__(self, Vf, Vc, Vf_bcs, Vc_bcs): - self.Vf_bcs = Vf_bcs - self.Vc_bcs = Vc_bcs - self.uf = Vf if isinstance(Vf, firedrake.Function) else firedrake.Function(Vf) - self.uc = Vc if isinstance(Vc, firedrake.Function) else firedrake.Function(Vc) + @cached_property + def _weight(self): + return None - self.standalones = [] + @cached_property + def _standalones(self): + standalones = [] for (i, (uf_sub, uc_sub)) in enumerate(zip(self.uf.subfunctions, self.uc.subfunctions)): - Vf_sub_bcs = [bc for bc in Vf_bcs if bc.function_space().index == i] - Vc_sub_bcs = [bc for bc in Vc_bcs if bc.function_space().index == i] + Vf_sub_bcs = [bc for bc in self.Vf_bcs if bc.function_space().index == i] + Vc_sub_bcs = [bc for bc in self.Vc_bcs if bc.function_space().index == i] standalone = StandaloneInterpolationMatrix(uf_sub, uc_sub, Vf_sub_bcs, Vc_sub_bcs) - self.standalones.append(standalone) + standalones.append(standalone) + return standalones - self._prolong = lambda: [standalone._prolong() for standalone in self.standalones] - self._restrict = lambda: [standalone._restrict() for standalone in self.standalones] + @cached_property + def _kernels(self): + prolong = lambda: [standalone._kernels[0]() for standalone in self._standalones] + restrict = lambda: [standalone._kernels[1]() for standalone in self._standalones] + return prolong, restrict def getNestSubMatrix(self, i, j): if i == j: - s = self.standalones[i] + s = self._standalones[i] sizes = (s.uf.dof_dset.layout_vec.getSizes(), s.uc.dof_dset.layout_vec.getSizes()) M_shll = PETSc.Mat().createPython(sizes, s, comm=s.uf._comm) M_shll.setUp() diff --git a/tests/regression/test_fdm.py b/tests/regression/test_fdm.py index 6934bf0a53..cefb464680 100644 --- a/tests/regression/test_fdm.py +++ b/tests/regression/test_fdm.py @@ -12,6 +12,7 @@ "ksp_converged_reason": None, "pc_type": "python", "pc_python_type": "firedrake.P1PC", + "pmg_coarse_mat_type": "aij", "pmg_mg_coarse": { "mat_type": "aij", "ksp_type": "preonly", From d0a7b01d197398ded05dc9fabdc05c538e180e1f Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 8 Mar 2023 10:50:19 +0000 Subject: [PATCH 05/75] pfas tests now passing, but need to create the transpose of injection --- firedrake/preconditioners/pmg.py | 46 ++++++++++++++++------------- tests/multigrid/test_p_multigrid.py | 23 ++++++++------- 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 81791ebb77..bc533370c0 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -48,6 +48,7 @@ class PMGBase(PCSNESBase): """ _prefix = "pmg_" + _is_linear = False def coarsen_element(self, ele): """ @@ -95,13 +96,19 @@ def initialize(self, pc): pdm = PETSc.DMShell().create(comm=pc.comm) pdm.setOptionsPrefix(options_prefix) + self.ppc = self.configure_pmg(pc, pdm) + self.ppc.setFromOptions() + + print(self.ppc.getOptionsPrefix()) + copts = PETSc.Options(self.ppc.getOptionsPrefix()+self.ppc.getType()+"_coarse_") + # Get the coarse degree from PETSc options fcp = ctx._problem.form_compiler_parameters mode = fcp.get("mode", "spectral") if fcp is not None else "spectral" - self.coarse_degree = opts.getInt("mg_coarse_degree", default=1) - self.coarse_mat_type = opts.getString("mg_coarse_mat_type", default=ctx.mat_type) - self.coarse_pmat_type = opts.getString("mg_coarse_pmat_type", default=self.coarse_mat_type) - self.coarse_form_compiler_mode = opts.getString("mg_coarse_form_compiler_mode", default=mode) + self.coarse_degree = copts.getInt("degree", default=1) + self.coarse_mat_type = copts.getString("mat_type", default=ctx.mat_type) + self.coarse_pmat_type = copts.getString("pmat_type", default=self.coarse_mat_type) + self.coarse_form_compiler_mode = copts.getString("form_compiler_mode", default=mode) # Construct a list with the elements we'll be using V = test.function_space() @@ -125,8 +132,8 @@ def initialize(self, pc): pdm.setCreateInterpolation(self.create_interpolation) # We need this for p-FAS pdm.setCreateInjection(self.create_injection) - pdm.setSNESJacobian(_SNESContext.form_jacobian) pdm.setSNESFunction(_SNESContext.form_function) + pdm.setSNESJacobian(_SNESContext.form_jacobian) pdm.setKSPComputeOperators(_SNESContext.compute_operators) set_function_space(pdm, get_function_space(odm)) @@ -135,11 +142,9 @@ def initialize(self, pc): assert parent is not None add_hook(parent, setup=partial(push_parent, pdm, parent), teardown=partial(pop_parent, pdm, parent), call_setup=True) add_hook(parent, setup=partial(push_appctx, pdm, ctx), teardown=partial(pop_appctx, pdm, ctx), call_setup=True) - - self.ppc = self.configure_pmg(pc, pdm) - self.ppc.setFromOptions() self.ppc.setUp() + def update(self, pc): pass @@ -147,7 +152,8 @@ def view(self, pc, viewer=None): if viewer is None: viewer = PETSc.Viewer.STDOUT viewer.printfASCII("p-multigrid PC\n") - self.ppc.view(viewer) + if hasattr(self, "ppc"): + self.ppc.view(viewer) def destroy(self, pc): if hasattr(self, "ppc"): @@ -180,14 +186,12 @@ def coarsen(self, fdm, comm): fu = fproblem.u cu = firedrake.Function(cV) - is_linear = fu not in fctx.J.coefficients() - fdeg = PMGBase.max_degree(fV.ufl_element()) cdeg = PMGBase.max_degree(cV.ufl_element()) fine_to_coarse_map = {test: test.reconstruct(function_space=cV), trial: trial.reconstruct(function_space=cV)} - if not is_linear: + if not self._is_linear: fine_to_coarse_map[fu] = cu def _coarsen_form(a): @@ -235,7 +239,7 @@ def _coarsen_form(a): # Coarsen the problem and the _SNESContext cproblem = firedrake.NonlinearVariationalProblem(cF, cu, bcs=cbcs, J=cJ, Jp=cJp, form_compiler_parameters=fcp, - is_linear=is_linear) + is_linear=self._is_linear) cctx = type(fctx)(cproblem, mat_type, pmat_type, appctx=cappctx, @@ -261,11 +265,11 @@ def _coarsen_form(a): interp_petscmat, _ = cdm.createInterpolation(fdm) inject_petscmat = cdm.createInjection(fdm) - if not is_linear: + if not self._is_linear: # injection of the initial state def inject_state(): with cu.dat.vec_wo as xc, fu.dat.vec_ro as xf: - inject_petscmat.mult(xf, xc) + inject_petscmat.multTranspose(xf, xc) add_hook(parent, setup=inject_state, call_setup=True) @@ -326,7 +330,7 @@ def coarsen_bcs(self, fbcs, cV): cV_ = cV_.sub(index) cbc_value = self.coarsen_bc_value(bc, cV_) if isinstance(bc, firedrake.DirichletBC): - cbcs.append(bc.reconstruct(V=cV, g=cbc_value)) + cbcs.append(bc.reconstruct(V=cV_, g=cbc_value)) else: raise NotImplementedError("Unsupported BC type, please get in touch if you need this") return cbcs @@ -353,7 +357,8 @@ def create_interpolation(self, dmc, dmf): def create_injection(self, dmc, dmf): prefix = dmc.getOptionsPrefix() mat_type = PETSc.Options(prefix).getString("mg_levels_transfer_mat_type", default="matfree") - return self.create_transfer(get_appctx(dmf), get_appctx(dmc), mat_type, False, False) + I = self.create_transfer(get_appctx(dmf), get_appctx(dmc), mat_type, False, False) + return PETSc.Mat().createTranspose(I) @staticmethod def max_degree(ele): @@ -417,6 +422,7 @@ def reconstruct_degree(ele, degree): class PMGPC(PCBase, PMGBase): _prefix = "pmg_" + _is_linear = True def configure_pmg(self, pc, pdm): odm = pc.getDM() @@ -455,6 +461,7 @@ def coarsen_residual(self, Fc, Jc, uc): class PMGSNES(SNESBase, PMGBase): _prefix = "pfas_" + _is_linear = False def configure_pmg(self, snes, pdm): odm = snes.getDM() @@ -470,7 +477,6 @@ def configure_pmg(self, snes, pdm): psnes.setFunction(fun, f.duplicate(), args=args, kargs=kargs) pdm.setGlobalVector(f.duplicate()) - self.dummy = f.duplicate() psnes.setSolution(f.duplicate()) # PETSc unfortunately requires us to make an ugly hack. @@ -491,7 +497,7 @@ def step(self, snes, x, f, y): ctx = get_appctx(snes.dm) push_appctx(self.ppc.dm, ctx) x.copy(y) - self.ppc.solve(snes.vec_rhs or self.dummy, y) + self.ppc.solve(snes.vec_rhs or None, y) y.aypx(-1, x) snes.setConvergedReason(self.ppc.getConvergedReason()) pop_appctx(self.ppc.dm) @@ -1391,7 +1397,7 @@ def make_blas_kernels(Vf, Vc): except Exception: qelem = ufl.FiniteElement("DQ", cell=felem.cell(), degree=PMGBase.max_degree(felem)) if felem.value_shape(): - qelem = ufl.TensorElement(qelem, shape=felem._shape, symmetry=felem.symmetry()) + qelem = ufl.TensorElement(qelem, shape=felem.value_shape(), symmetry=felem.symmetry()) Qf = firedrake.FunctionSpace(Vf.ufl_domain(), qelem) mapping_output = make_mapping_code(Qf, fmapping, cmapping, "t0", "t1") diff --git a/tests/multigrid/test_p_multigrid.py b/tests/multigrid/test_p_multigrid.py index 17e4e33296..17f4ba790e 100644 --- a/tests/multigrid/test_p_multigrid.py +++ b/tests/multigrid/test_p_multigrid.py @@ -52,7 +52,7 @@ def test_prolongation_matrix_matfree(): if u != v: v.assign(0) P = prolongation_matrix_matfree(v, u).getPythonContext() - P._prolong() + P._kernels[0]() assert norm(v-expr, "L2") < tol @@ -240,7 +240,8 @@ def test_p_multigrid_mixed(mat_type): "ksp_max_it": 3, "pc_type": "jacobi"} - coarse = {"ksp_type": "richardson", + coarse = {"mat_type": "aij", + "ksp_type": "richardson", "ksp_max_it": 1, "ksp_norm_type": "unpreconditioned", "ksp_monitor": None, @@ -255,12 +256,12 @@ def test_p_multigrid_mixed(mat_type): "ksp_monitor_true_residual": None, "pc_type": "python", "pc_python_type": "firedrake.PMGPC", - # "mat_type": mat_type, # FIXME bug with mat-free jacobi on MixedFunctionSpace + "mat_type": mat_type, "pmg_pc_mg_type": "multiplicative", "pmg_mg_levels": relax, "pmg_mg_coarse": coarse} - basis = VectorSpaceBasis([assemble(TestFunction(Z.sub(1))*dx)]) + basis = VectorSpaceBasis([interpolate(Constant(1), Z.sub(1))]) basis.orthonormalize() nullspace = MixedVectorSpaceBasis(Z, [Z.sub(0), basis]) problem = NonlinearVariationalProblem(F, z, bcs) @@ -313,6 +314,7 @@ def test_p_fas_scalar(): atol = rtol * Fnorm coarse = { + "mat_type": "aij", "ksp_type": "preonly", "ksp_norm_type": None, "pc_type": "cholesky"} @@ -321,7 +323,6 @@ def test_p_fas_scalar(): "ksp_type": "chebyshev", "ksp_monitor_true_residual": None, "ksp_norm_type": "unpreconditioned", - "ksp_max_it": 3, "pc_type": "jacobi"} pmg = { @@ -340,7 +341,7 @@ def test_p_fas_scalar(): "pmg_mg_coarse": coarse} pfas = { - "mat_type": "aij", + "mat_type": mat_type, "snes_monitor": None, "snes_converged_reason": None, "snes_atol": atol, @@ -364,23 +365,23 @@ def test_p_fas_scalar(): @pytest.mark.skipcomplex def test_p_fas_nonlinear_scalar(): mat_type = "matfree" - N = 4 - dxq = dx(degree=3*N+2) # here we also test coarsening of quadrature degree + degree = 4 + dxq = dx(degree=3*degree+2) # here we also test coarsening of quadrature degree mesh = UnitSquareMesh(4, 4, quadrilateral=True) - V = FunctionSpace(mesh, "CG", N) + V = FunctionSpace(mesh, "CG", degree) u = Function(V) f = Constant(1) bcs = DirichletBC(V, 0, "on_boundary") # Regularized p-Laplacian p = 5 - eps = 1 + eps = Constant(1) y = eps + inner(grad(u), grad(u)) E = (1/p)*(y**(p/2))*dxq - inner(f, u)*dxq F = derivative(E, u, TestFunction(V)) - fcp = {"quadrature_degree": 3*N+2} + fcp = {"quadrature_degree": 3*degree+2} problem = NonlinearVariationalProblem(F, u, bcs, form_compiler_parameters=fcp) # Due to the convoluted nature of the nested iteration From 5c5d861ce43acf4252c7f42beab769e16f09dde2 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 8 Mar 2023 15:31:43 +0000 Subject: [PATCH 06/75] fix tests --- firedrake/preconditioners/pmg.py | 28 +++++++++++----------------- tests/multigrid/test_p_multigrid.py | 2 +- tests/regression/test_fdm.py | 1 - 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index bc533370c0..c0378afd8f 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -48,7 +48,6 @@ class PMGBase(PCSNESBase): """ _prefix = "pmg_" - _is_linear = False def coarsen_element(self, ele): """ @@ -92,7 +91,6 @@ def initialize(self, pc): prefix = pc.getOptionsPrefix() options_prefix = prefix + self._prefix - opts = PETSc.Options(options_prefix) pdm = PETSc.DMShell().create(comm=pc.comm) pdm.setOptionsPrefix(options_prefix) @@ -144,7 +142,6 @@ def initialize(self, pc): add_hook(parent, setup=partial(push_appctx, pdm, ctx), teardown=partial(pop_appctx, pdm, ctx), call_setup=True) self.ppc.setUp() - def update(self, pc): pass @@ -189,10 +186,9 @@ def coarsen(self, fdm, comm): fdeg = PMGBase.max_degree(fV.ufl_element()) cdeg = PMGBase.max_degree(cV.ufl_element()) - fine_to_coarse_map = {test: test.reconstruct(function_space=cV), + fine_to_coarse_map = {fu: cu, + test: test.reconstruct(function_space=cV), trial: trial.reconstruct(function_space=cV)} - if not self._is_linear: - fine_to_coarse_map[fu] = cu def _coarsen_form(a): if isinstance(a, ufl.Form): @@ -238,8 +234,7 @@ def _coarsen_form(a): # Coarsen the problem and the _SNESContext cproblem = firedrake.NonlinearVariationalProblem(cF, cu, bcs=cbcs, J=cJ, Jp=cJp, - form_compiler_parameters=fcp, - is_linear=self._is_linear) + form_compiler_parameters=fcp) cctx = type(fctx)(cproblem, mat_type, pmat_type, appctx=cappctx, @@ -265,11 +260,11 @@ def _coarsen_form(a): interp_petscmat, _ = cdm.createInterpolation(fdm) inject_petscmat = cdm.createInjection(fdm) - if not self._is_linear: + if cu in cJ.coefficients(): # injection of the initial state def inject_state(): with cu.dat.vec_wo as xc, fu.dat.vec_ro as xf: - inject_petscmat.multTranspose(xf, xc) + inject_petscmat.mult(xf, xc) add_hook(parent, setup=inject_state, call_setup=True) @@ -303,9 +298,9 @@ def coarsen_nullspace(coarse_V, mat, fine_nullspace): return fine_nullspace ises = cV._ises - cctx._nullspace = coarsen_nullspace(cV, inject_petscmat, fctx._nullspace) + cctx._nullspace = coarsen_nullspace(cV, interp_petscmat, fctx._nullspace) cctx.set_nullspace(cctx._nullspace, ises, transpose=False, near=False) - cctx._near_nullspace = coarsen_nullspace(cV, inject_petscmat, fctx._near_nullspace) + cctx._near_nullspace = coarsen_nullspace(cV, interp_petscmat, fctx._near_nullspace) cctx.set_nullspace(cctx._near_nullspace, ises, transpose=False, near=True) cctx._nullspace_T = coarsen_nullspace(cV, interp_petscmat, fctx._nullspace_T) cctx.set_nullspace(cctx._nullspace_T, ises, transpose=True, near=False) @@ -352,13 +347,14 @@ def create_transfer(cctx, fctx, mat_type, cbcs, fbcs): def create_interpolation(self, dmc, dmf): prefix = dmc.getOptionsPrefix() mat_type = PETSc.Options(prefix).getString("mg_levels_transfer_mat_type", default="matfree") - return self.create_transfer(get_appctx(dmc), get_appctx(dmf), mat_type, True, False), None + interpolate = self.create_transfer(get_appctx(dmc), get_appctx(dmf), mat_type, True, False) + rscale = interpolate.createVecRight() # only used as a workaround in the creation of coarse vecs + return interpolate, rscale def create_injection(self, dmc, dmf): prefix = dmc.getOptionsPrefix() mat_type = PETSc.Options(prefix).getString("mg_levels_transfer_mat_type", default="matfree") - I = self.create_transfer(get_appctx(dmf), get_appctx(dmc), mat_type, False, False) - return PETSc.Mat().createTranspose(I) + return self.create_transfer(get_appctx(dmf), get_appctx(dmc), mat_type, False, False) @staticmethod def max_degree(ele): @@ -422,7 +418,6 @@ def reconstruct_degree(ele, degree): class PMGPC(PCBase, PMGBase): _prefix = "pmg_" - _is_linear = True def configure_pmg(self, pc, pdm): odm = pc.getDM() @@ -461,7 +456,6 @@ def coarsen_residual(self, Fc, Jc, uc): class PMGSNES(SNESBase, PMGBase): _prefix = "pfas_" - _is_linear = False def configure_pmg(self, snes, pdm): odm = snes.getDM() diff --git a/tests/multigrid/test_p_multigrid.py b/tests/multigrid/test_p_multigrid.py index 17f4ba790e..89d7c94efb 100644 --- a/tests/multigrid/test_p_multigrid.py +++ b/tests/multigrid/test_p_multigrid.py @@ -261,7 +261,7 @@ def test_p_multigrid_mixed(mat_type): "pmg_mg_levels": relax, "pmg_mg_coarse": coarse} - basis = VectorSpaceBasis([interpolate(Constant(1), Z.sub(1))]) + basis = VectorSpaceBasis([assemble(TestFunction(Z.sub(1))*dx)]) basis.orthonormalize() nullspace = MixedVectorSpaceBasis(Z, [Z.sub(0), basis]) problem = NonlinearVariationalProblem(F, z, bcs) diff --git a/tests/regression/test_fdm.py b/tests/regression/test_fdm.py index cefb464680..6934bf0a53 100644 --- a/tests/regression/test_fdm.py +++ b/tests/regression/test_fdm.py @@ -12,7 +12,6 @@ "ksp_converged_reason": None, "pc_type": "python", "pc_python_type": "firedrake.P1PC", - "pmg_coarse_mat_type": "aij", "pmg_mg_coarse": { "mat_type": "aij", "ksp_type": "preonly", From d9a743ca098d8a72905178294b66727d3f0437da Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 8 Mar 2023 16:20:30 +0000 Subject: [PATCH 07/75] fix docs --- firedrake/preconditioners/fdm.py | 12 ++++++++---- firedrake/preconditioners/pmg.py | 5 +++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 88be79b597..1cd769ae27 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -182,10 +182,12 @@ def assemble_fdm_op(self, V, J, bcs, form_compiler_parameters, appctx, pmat_type """ Assemble the sparse preconditioner with cell-wise constant coefficients. - :arg V: the :class:`firedrake.FunctionSpace` of the form arguments + :arg V: the :class:`.FunctionSpace` of the form arguments :arg J: the Jacobian bilinear form :arg bcs: an iterable of boundary conditions on V + :arg form_compiler_parameters: parameters to assemble diagonal factors :arg appctx: the application context + :pmat_type: the preconditioner `PETSc.Mat.Type` :returns: 2-tuple with the preconditioner :class:`PETSc.Mat` and its assembly callable """ @@ -1139,8 +1141,10 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): Assemble the stiffness matrix in the FDM basis using Kronecker products of interval matrices :arg A: the :class:`PETSc.Mat` to assemble - :arg Vrow: the :class:`firedrake.FunctionSpace` test space - :arg Vcol: the :class:`firedrake.FunctionSpace` trial space + :arg Vrow: the :class:`.FunctionSpace` test space + :arg Vcol: the :class:`.FunctionSpace` trial space + :arg addv: a `PETSc.Mat.InsertMode` + :arg triu: are we assembling only the upper triangular part? """ set_values_csr = self.load_set_values(triu=triu) update_A = lambda A, Ae, rindices: set_values_csr(A, Ae, rindices, rindices, addv) @@ -1632,7 +1636,7 @@ def get_interior_facet_maps(V): """ Extrude V.interior_facet_node_map and V.ufl_domain().interior_facets.local_facet_dat - :arg V: a :class:`FunctionSpace` + :arg V: a :class:`.FunctionSpace` :returns: the 3-tuple of facet_to_nodes_fun: maps interior facets to the nodes of the two cells sharing it, diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index c0378afd8f..74120b81a2 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -234,7 +234,8 @@ def _coarsen_form(a): # Coarsen the problem and the _SNESContext cproblem = firedrake.NonlinearVariationalProblem(cF, cu, bcs=cbcs, J=cJ, Jp=cJp, - form_compiler_parameters=fcp) + form_compiler_parameters=fcp, + is_linear=fproblem.is_linear) cctx = type(fctx)(cproblem, mat_type, pmat_type, appctx=cappctx, @@ -471,7 +472,7 @@ def configure_pmg(self, snes, pdm): psnes.setFunction(fun, f.duplicate(), args=args, kargs=kargs) pdm.setGlobalVector(f.duplicate()) - psnes.setSolution(f.duplicate()) + psnes.setSolution(snes.getSolution()) # PETSc unfortunately requires us to make an ugly hack. # We would like to use GMG for the coarse solve, at least From 49a34f0cd805183c480f54ba037377b5434c0f7b Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 8 Mar 2023 17:21:33 +0000 Subject: [PATCH 08/75] add FDM tests for H(curl) and H(div) --- tests/regression/test_fdm.py | 52 +++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/tests/regression/test_fdm.py b/tests/regression/test_fdm.py index 6934bf0a53..aa4946c24b 100644 --- a/tests/regression/test_fdm.py +++ b/tests/regression/test_fdm.py @@ -70,15 +70,14 @@ def variant(request): @pytest.mark.skipcomplex def test_p_independence(mesh, expected, variant): nits = [] - for p in range(3, 6): - e = FiniteElement("Lagrange", cell=mesh.ufl_cell(), degree=p, variant=variant) + for degree, nits in zip(range(3, 6), expected): + e = FiniteElement("Lagrange", cell=mesh.ufl_cell(), degree=degree, variant=variant) V = FunctionSpace(mesh, e) u = TrialFunction(V) v = TestFunction(V) - ndim = mesh.geometric_dimension() x = SpatialCoordinate(mesh) - x -= Constant([0.5]*ndim) + x -= Constant([0.5]*len(x)) u_exact = dot(x, x) f_exact = grad(u_exact) B = -div(f_exact) @@ -95,9 +94,50 @@ def test_p_independence(mesh, expected, variant): problem = LinearVariationalProblem(a, L, uh, bcs=bcs) solver = LinearVariationalSolver(problem, solver_parameters=fdmstar) solver.solve() - nits.append(solver.snes.ksp.getIterationNumber()) + assert solver.snes.ksp.getIterationNumber() <= nits assert norm(u_exact-uh, "H1") < 2.0E-7 - assert nits <= expected + + +def solve_riesz_map(V, d): + beta = Constant(1E-8) + subs = [(1, 3)] + + x = SpatialCoordinate(V.mesh()) + x -= Constant([0.5]*len(x)) + expr = x * exp(-10*dot(x, x)) + if V.mesh().extruded: + subs += ["top"] + + u_exact = Function(V) + u_exact.project(expr, solver_parameters={"mat_type": "matfree", "pc_type": "jacobi"}) + bcs = [DirichletBC(V, u_exact, sub) for sub in subs] + + uh = Function(V) + test = TestFunction(V) + trial = TrialFunction(V) + a = lambda v, u: inner(v, beta*u)*dx + inner(d(v), d(u))*dx + problem = LinearVariationalProblem(a(test, trial), a(test, u_exact), uh, bcs=bcs) + solver = LinearVariationalSolver(problem, solver_parameters=fdmstar) + solver.solve() + return solver.snes.ksp.getIterationNumber() + + +@pytest.mark.skipcomplex +def test_hcurl(mesh, expected): + family = "NCE" if mesh.topological_dimension() == 3 else "RTCE" + for degree, nits in zip(range(3, 6), expected): + element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant="fdm") + V = FunctionSpace(mesh, element) + assert solve_riesz_map(V, curl) <= nits + + +@pytest.mark.skipcomplex +def test_hdiv(mesh, expected): + family = "NCF" if mesh.topological_dimension() == 3 else "RTCF" + for degree, nits in zip(range(3, 6), expected): + element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant="fdm") + V = FunctionSpace(mesh, element) + assert solve_riesz_map(V, div) <= nits @pytest.mark.skipcomplex From 73976775b4d2888c434b2b38e38b7f24fb4dc319 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 9 Mar 2023 09:53:42 +0000 Subject: [PATCH 09/75] fix serendipity p1pc test --- tests/multigrid/test_poisson_p1pcmg_extruded_serendipity.py | 2 +- tests/regression/test_fdm.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/multigrid/test_poisson_p1pcmg_extruded_serendipity.py b/tests/multigrid/test_poisson_p1pcmg_extruded_serendipity.py index f82acdf62e..b9eb6e830a 100644 --- a/tests/multigrid/test_poisson_p1pcmg_extruded_serendipity.py +++ b/tests/multigrid/test_poisson_p1pcmg_extruded_serendipity.py @@ -10,12 +10,12 @@ def run_poisson(): "ksp_monitor": None, "pc_type": "python", "pc_python_type": "firedrake.P1PC", - "pmg_coarse_degree": coarse_deg, "pmg_mg_levels": { "ksp_type": "chebyshev", "ksp_max_it": 2, "pc_type": "jacobi"}, "pmg_mg_coarse": { + "degree": coarse_deg, "ksp_type": "preonly", "pc_type": "lu", "pc_factor_mat_solver_type": "mumps" diff --git a/tests/regression/test_fdm.py b/tests/regression/test_fdm.py index aa4946c24b..125881797d 100644 --- a/tests/regression/test_fdm.py +++ b/tests/regression/test_fdm.py @@ -101,12 +101,12 @@ def test_p_independence(mesh, expected, variant): def solve_riesz_map(V, d): beta = Constant(1E-8) subs = [(1, 3)] + if V.mesh().extruded: + subs += ["top"] x = SpatialCoordinate(V.mesh()) x -= Constant([0.5]*len(x)) expr = x * exp(-10*dot(x, x)) - if V.mesh().extruded: - subs += ["top"] u_exact = Function(V) u_exact.project(expr, solver_parameters={"mat_type": "matfree", "pc_type": "jacobi"}) From da2c6b50c33620c1e6162a7e4afaee68f9e8fda4 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 9 Mar 2023 10:04:54 +0000 Subject: [PATCH 10/75] remove print statement --- firedrake/preconditioners/pmg.py | 1 - 1 file changed, 1 deletion(-) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 74120b81a2..0e00b2008f 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -97,7 +97,6 @@ def initialize(self, pc): self.ppc = self.configure_pmg(pc, pdm) self.ppc.setFromOptions() - print(self.ppc.getOptionsPrefix()) copts = PETSc.Options(self.ppc.getOptionsPrefix()+self.ppc.getType()+"_coarse_") # Get the coarse degree from PETSc options From 8b2a415bd91f4d04880c88fd819113c02578f4c8 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 9 Mar 2023 15:31:03 +0000 Subject: [PATCH 11/75] fix typo in ValueError --- firedrake/preconditioners/gtmg.py | 6 +++--- firedrake/preconditioners/patch.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/firedrake/preconditioners/gtmg.py b/firedrake/preconditioners/gtmg.py index 35f1f1e570..d11cc548c1 100644 --- a/firedrake/preconditioners/gtmg.py +++ b/firedrake/preconditioners/gtmg.py @@ -29,7 +29,7 @@ def initialize(self, pc): if ctx is None: raise ValueError("No context found.") if not isinstance(ctx, _SNESContext): - raise ValueError("Don't know how to get form from %r", ctx) + raise ValueError("Don't know how to get form from %r" % ctx) prefix = pc.getOptionsPrefix() options_prefix = prefix + self._prefix @@ -41,7 +41,7 @@ def initialize(self, pc): if ictx is None: raise ValueError("No context found on matrix") if not isinstance(ictx, ImplicitMatrixContext): - raise ValueError("Don't know how to get form from %r", ictx) + raise ValueError("Don't know how to get form from %r" % ictx) fine_operator = ictx.a fine_bcs = ictx.row_bcs @@ -70,7 +70,7 @@ def initialize(self, pc): fine_petscmat.setTransposeNullSpace(fine_transpose_nullspace) # Handle the coarse operator - coarse_options_prefix = options_prefix + "mg_coarse" + coarse_options_prefix = options_prefix + "mg_coarse_" coarse_mat_type = opts.getString(coarse_options_prefix + "mat_type", parameters["default_matrix_type"]) diff --git a/firedrake/preconditioners/patch.py b/firedrake/preconditioners/patch.py index 2a20f2e2f8..8c1919aecd 100644 --- a/firedrake/preconditioners/patch.py +++ b/firedrake/preconditioners/patch.py @@ -747,14 +747,14 @@ def initialize(self, obj): if ctx is None: raise ValueError("No context found on form") if not isinstance(ctx, _SNESContext): - raise ValueError("Don't know how to get form from %r", ctx) + raise ValueError("Don't know how to get form from %r" % ctx) if P.getType() == "python": ictx = P.getPythonContext() if ictx is None: raise ValueError("No context found on matrix") if not isinstance(ictx, ImplicitMatrixContext): - raise ValueError("Don't know how to get form from %r", ictx) + raise ValueError("Don't know how to get form from %r" % ictx) J = ictx.a bcs = ictx.row_bcs if bcs != ictx.col_bcs: From 15034350791ebd9c4bcd13b92a4bbbc1e6a50180 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 9 Mar 2023 17:12:31 +0000 Subject: [PATCH 12/75] address review comments --- firedrake/preconditioners/fdm.py | 52 ++------------ firedrake/preconditioners/pmg.py | 22 ++++-- tests/regression/test_fdm.py | 116 ++++++++++++------------------- 3 files changed, 68 insertions(+), 122 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 1cd769ae27..9c02a5c1f0 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -387,9 +387,9 @@ def RtAP(R, A, P, result=None): if Vrow == Vcol: get_cindices = lambda e, result=None: result update_A = lambda Ae, rindices, cindices: set_values_csr(A, Ae, rindices, rindices, addv) - rtensor = self.reference_tensor_on_diag.get(Vrow, None) or self.assemble_reference_tensor(Vrow) + rtensor = self.reference_tensor_on_diag.get(Vrow) or self.assemble_reference_tensor(Vrow) assemble_element_mat = lambda De, result=None: De.PtAP(rtensor, result=result) - condense_element_mat = self.get_static_condensation.get(Vrow, None) + condense_element_mat = self.get_static_condensation.get(Vrow) else: get_cindices = self.cell_to_global[Vcol] update_A = lambda Ae, rindices, cindices: set_values_csr(A, Ae, rindices, cindices, addv) @@ -543,48 +543,6 @@ def assemble_coef(self, J, form_compiler_parameters): key = (mixed_form.signature(), mesh) block_diagonal = True - if key not in self._coefficient_cache and False: - M = assemble(mixed_form, mat_type="matfree", - form_compiler_parameters=form_compiler_parameters) - - coefs = [] - mats = [] - for iset in Z.dof_dset.field_ises: - Msub = M.petscmat.createSubMatrix(iset, iset) - coefs.append(Msub.getPythonContext()._diagonal) - mats.append(Msub) - - def scale_coefficients(): - for Msub, coef in zip(mats, coefs): - ksp = PETSc.KSP().create(comm=V.comm) - ksp.setOperators(A=Msub, P=Msub) - ksp.setType(PETSc.KSP.Type.CG) - ksp.setNormType(PETSc.KSP.NormType.NATURAL) - ksp.pc.setType(PETSc.PC.Type.JACOBI) - ksp.setTolerances(rtol=1E-3, atol=0.0E0, max_it=8) - ksp.setComputeEigenvalues(True) - ksp.setUp() - - x = Msub.createVecRight() - b = Msub.createVecLeft() - x.set(0) - b.setRandom() - ksp.solve(b, x) - ew = numpy.real(ksp.computeEigenvalues()) - ksp.destroy() - x.destroy() - b.destroy() - dscale = (max(ew) + min(ew))/2 - dscale = sum(ew) / len(ew) - scale = dscale if dscale == dscale else 1 - with coef.dat.vec as diag: - diag.scale(scale) - - coefficients = {"beta": coefs[0], "alpha": coefs[1]} - assembly_callables = [scale_coefficients] - self._coefficient_cache[key] = (coefficients, assembly_callables) - return self._coefficient_cache[key] - if key not in self._coefficient_cache: if not block_diagonal or not V.shape: tensor = firedrake.Function(Z) @@ -621,8 +579,7 @@ def assemble_reference_tensor(self, V): key = (degree, ndim, formdegree, V.value_size, is_interior, is_facet) cache = self._reference_tensor_cache if key not in cache: - full_key = (degree, ndim, formdegree, V.value_size, 0, 0) - + full_key = (degree, ndim, formdegree, V.value_size, False, False) if is_facet and full_key in cache: result = cache[full_key] noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.comm) @@ -1148,11 +1105,10 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): """ set_values_csr = self.load_set_values(triu=triu) update_A = lambda A, Ae, rindices: set_values_csr(A, Ae, rindices, rindices, addv) - condense_element_mat = self.get_static_condensation.get(Vrow, lambda x: x) condense_element_mat = lambda x: x get_rindices = self.cell_to_global[Vrow] - rtensor = self.reference_tensor_on_diag.get(Vrow, None) or self.assemble_reference_tensor(Vrow) + rtensor = self.reference_tensor_on_diag.get(Vrow) or self.assemble_reference_tensor(Vrow) self.reference_tensor_on_diag[Vrow] = rtensor Afdm, Dfdm, bdof = rtensor diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 0e00b2008f..56341599a6 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -83,7 +83,7 @@ def initialize(self, pc): if ctx is None: raise ValueError("No context found.") if not isinstance(ctx, _SNESContext): - raise ValueError("Don't know how to get form from %r", ctx) + raise ValueError("Don't know how to get form from %r" % ctx) test, trial = ctx.J.arguments() if test.function_space() != trial.function_space(): @@ -521,6 +521,14 @@ def load_c_code(code, name, argtypes, comm): def reference_moments(*args, **kwargs): + """ + Return a python function that computes the L2 inner product of the + arguments in the reference cell. + + :arg test: the test `ufl.Argument` + :arg trial: the trial `ufl.Argument` or `ufl.Coefficient` + :kwarg diagonal: are we assembling the diagonal of the bilinear form? + """ import ctypes from tsfc import compile_form quad_degree = 1+sum([PMGBase.max_degree(t.ufl_element()) for t in args]) @@ -555,6 +563,9 @@ def _wrapper(*args): @lru_cache(maxsize=10) def matfree_reference_prolongator(Vf, Vc): + """ + Return the prolongation from Vc to Vf on the reference element. + """ dimf = Vf.value_size * Vf.finat_element.space_dimension() dimc = Vc.value_size * Vc.finat_element.space_dimension() build_Afc = reference_moments(ufl.TestFunction(Vf), ufl.TrialFunction(Vc)) @@ -607,8 +618,8 @@ def expand_element(ele): """ Expand a FiniteElement as an EnrichedElement of TensorProductElements, discarding modifiers. """ - if ele.cell().cellname().startswith("quadrilateral"): + # Handle immersed quadrilaterals quadrilateral_tpc = ufl.TensorProductCell(ufl.interval, ufl.interval) return expand_element(ele.reconstruct(cell=quadrilateral_tpc)) elif ele.cell() == ufl.hexahedron: @@ -967,6 +978,9 @@ def _tabulate(e, ps, entity=None): @PETSc.Log.EventDecorator("MakeKronCode") def make_kron_code(Vf, Vc, t_in, t_out, mat_name, scratch): + """ + Return interpolation and restriction sub-kernels between enriched tensor product elements + """ operator_decl = [] prolong_code = [] restrict_code = [] @@ -1271,13 +1285,13 @@ def __init__(self, Vf, Vc, Vf_bcs, Vc_bcs): self.uf = Vf Vf = Vf.function_space() else: - self.uf = self._cache_work.get(Vf, firedrake.Function(Vf)) + self.uf = self._cache_work.get(Vf) or firedrake.Function(Vf) self._cache_work[Vf] = self.uf if isinstance(Vc, firedrake.Function): self.uc = Vc Vc = Vc.function_space() else: - self.uc = self._cache_work.get(Vc, firedrake.Function(Vc)) + self.uc = self._cache_work.get(Vc) or firedrake.Function(Vc) self._cache_work[Vc] = self.uc self.Vf = Vf self.Vc = Vc diff --git a/tests/regression/test_fdm.py b/tests/regression/test_fdm.py index 125881797d..4699b129b5 100644 --- a/tests/regression/test_fdm.py +++ b/tests/regression/test_fdm.py @@ -7,9 +7,8 @@ "ksp_type": "cg", "ksp_atol": 0.0E0, "ksp_rtol": 1.0E-8, - "ksp_norm_type": "unpreconditioned", - "ksp_monitor_true_residual": None, - "ksp_converged_reason": None, + "ksp_norm_type": "natural", + "ksp_monitor": None, "pc_type": "python", "pc_python_type": "firedrake.P1PC", "pmg_mg_coarse": { @@ -39,6 +38,34 @@ } +def solve_riesz_map(V, d): + beta = Constant(1E-8) + subs = [(1, 3)] + if V.mesh().cell_set._extruded: + subs += ["top"] + + x = SpatialCoordinate(V.mesh()) + x -= Constant([0.5]*len(x)) + if V.ufl_element().value_shape() == (): + u_exact = exp(-10*dot(x, x)) + u_bc = u_exact + else: + u_exact = x * exp(-10*dot(x, x)) + u_bc = Function(V) + u_bc.project(u_exact, solver_parameters={"mat_type": "matfree", "pc_type": "jacobi"}) + + bcs = [DirichletBC(V, u_bc, sub) for sub in subs] + + uh = Function(V) + test = TestFunction(V) + trial = TrialFunction(V) + a = lambda v, u: inner(v, beta*u)*dx + inner(d(v), d(u))*dx + problem = LinearVariationalProblem(a(test, trial), a(test, u_exact), uh, bcs=bcs) + solver = LinearVariationalSolver(problem, solver_parameters=fdmstar) + solver.solve() + return solver.snes.ksp.getIterationNumber() + + @pytest.fixture(params=[2, 3], ids=["Rectangle", "Box"]) def mesh(request): @@ -54,90 +81,39 @@ def mesh(request): return m -@pytest.fixture -def expected(mesh): - if mesh.topological_dimension() == 2: - return [5, 5, 5] - elif mesh.topological_dimension() == 3: - return [8, 8, 8] - - @pytest.fixture(params=[None, "fdm"], ids=["spectral", "fdm"]) def variant(request): return request.param @pytest.mark.skipcomplex -def test_p_independence(mesh, expected, variant): - nits = [] - for degree, nits in zip(range(3, 6), expected): - e = FiniteElement("Lagrange", cell=mesh.ufl_cell(), degree=degree, variant=variant) - V = FunctionSpace(mesh, e) - u = TrialFunction(V) - v = TestFunction(V) - - x = SpatialCoordinate(mesh) - x -= Constant([0.5]*len(x)) - u_exact = dot(x, x) - f_exact = grad(u_exact) - B = -div(f_exact) - - a = inner(grad(v), grad(u))*dx - L = inner(v, B)*dx - - subs = ("on_boundary",) - if mesh.cell_set._extruded: - subs += ("top", "bottom") - bcs = [DirichletBC(V, u_exact, sub) for sub in subs] - - uh = Function(V) - problem = LinearVariationalProblem(a, L, uh, bcs=bcs) - solver = LinearVariationalSolver(problem, solver_parameters=fdmstar) - solver.solve() - assert solver.snes.ksp.getIterationNumber() <= nits - assert norm(u_exact-uh, "H1") < 2.0E-7 - - -def solve_riesz_map(V, d): - beta = Constant(1E-8) - subs = [(1, 3)] - if V.mesh().extruded: - subs += ["top"] - - x = SpatialCoordinate(V.mesh()) - x -= Constant([0.5]*len(x)) - expr = x * exp(-10*dot(x, x)) - - u_exact = Function(V) - u_exact.project(expr, solver_parameters={"mat_type": "matfree", "pc_type": "jacobi"}) - bcs = [DirichletBC(V, u_exact, sub) for sub in subs] - - uh = Function(V) - test = TestFunction(V) - trial = TrialFunction(V) - a = lambda v, u: inner(v, beta*u)*dx + inner(d(v), d(u))*dx - problem = LinearVariationalProblem(a(test, trial), a(test, u_exact), uh, bcs=bcs) - solver = LinearVariationalSolver(problem, solver_parameters=fdmstar) - solver.solve() - return solver.snes.ksp.getIterationNumber() +def test_p_independence_hgrad(mesh, variant): + family = "Lagrange" + expected = 9 if mesh.topological_dimension() == 3 else 5 + for degree in range(3, 6): + element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant=variant) + V = FunctionSpace(mesh, element) + assert solve_riesz_map(V, grad) <= expected @pytest.mark.skipcomplex -def test_hcurl(mesh, expected): +def test_p_independence_hcurl(mesh): family = "NCE" if mesh.topological_dimension() == 3 else "RTCE" - for degree, nits in zip(range(3, 6), expected): + expected = 6 if mesh.topological_dimension() == 3 else 3 + for degree in range(3, 6): element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant="fdm") V = FunctionSpace(mesh, element) - assert solve_riesz_map(V, curl) <= nits + assert solve_riesz_map(V, curl) <= expected @pytest.mark.skipcomplex -def test_hdiv(mesh, expected): +def test_p_independence_hdiv(mesh): family = "NCF" if mesh.topological_dimension() == 3 else "RTCF" - for degree, nits in zip(range(3, 6), expected): + expected = 2 + for degree in range(3, 6): element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant="fdm") V = FunctionSpace(mesh, element) - assert solve_riesz_map(V, div) <= nits + assert solve_riesz_map(V, div) <= expected @pytest.mark.skipcomplex @@ -148,7 +124,7 @@ def test_variable_coefficient(mesh): u = TrialFunction(V) v = TestFunction(V) x = SpatialCoordinate(mesh) - x -= Constant([0.5]*ndim) + x -= Constant([0.5]*len(x)) # variable coefficients alphas = [0.1+10*dot(x, x)]*ndim From 93088e6d38c526977819bb571b5d6b981acc7958 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 9 Mar 2023 18:19:39 +0000 Subject: [PATCH 13/75] add facetsplit tests --- tests/regression/test_fdm.py | 121 +++++++++++++++++------------------ 1 file changed, 57 insertions(+), 64 deletions(-) diff --git a/tests/regression/test_fdm.py b/tests/regression/test_fdm.py index 4699b129b5..63129c455b 100644 --- a/tests/regression/test_fdm.py +++ b/tests/regression/test_fdm.py @@ -1,29 +1,31 @@ import pytest from firedrake import * - -fdmstar = { +ksp = { "mat_type": "matfree", "ksp_type": "cg", "ksp_atol": 0.0E0, "ksp_rtol": 1.0E-8, "ksp_norm_type": "natural", "ksp_monitor": None, +} + +coarse = { + "mat_type": "aij", + "ksp_type": "preonly", + "pc_type": "cholesky", +} + +fdmstar = { "pc_type": "python", "pc_python_type": "firedrake.P1PC", - "pmg_mg_coarse": { - "mat_type": "aij", - "ksp_type": "preonly", - "pc_type": "cholesky", - }, + "pmg_mg_coarse": coarse, "pmg_mg_levels": { "ksp_type": "chebyshev", "ksp_norm_type": "none", "esteig_ksp_type": "cg", "esteig_ksp_norm_type": "natural", "ksp_chebyshev_esteig": "0.75,0.25,0.0,1.0", - "ksp_chebyshev_esteig_noisy": True, - "ksp_chebyshev_esteig_steps": 8, "pc_type": "python", "pc_python_type": "firedrake.FDMPC", "fdm": { @@ -31,12 +33,44 @@ "pc_python_type": "firedrake.ASMExtrudedStarPC", "pc_star_mat_ordering_type": "nd", "pc_star_sub_sub_pc_type": "cholesky", - "pc_star_sub_sub_pc_factor_mat_solver_type": "petsc", - "pc_star_sub_sub_pc_factor_mat_ordering_type": "natural", } } } +facetstar = { + "pc_type": "python", + "pc_python_type": "firedrake.FacetSplitPC", + "facet_pc_type": "python", + "facet_pc_python_type": "firedrake.FDMPC", + "facet_fdm_pc_use_amat": False, + "facet_fdm_pc_type": "fieldsplit", + "facet_fdm_pc_fieldsplit_type": "symmetric_multiplicative", + "facet_fdm_fieldsplit_0": { + "ksp_type": "preonly", + "pc_type": "icc", + }, + "facet_fdm_fieldsplit_1": { + "ksp_type": "preonly", + "pc_type": "python", + "pc_python_type": "firedrake.P1PC", + "pmg_mg_coarse": coarse, + "pmg_mg_levels": { + "ksp_type": "chebyshev", + "ksp_norm_type": "none", + "esteig_ksp_type": "cg", + "esteig_ksp_norm_type": "natural", + "ksp_chebyshev_esteig": "0.75,0.25,0.0,1.0", + "pc_type": "python", + "pc_python_type": "firedrake.ASMExtrudedStarPC", + "pc_star_mat_ordering_type": "nd", + "pc_star_sub_sub_pc_type": "cholesky", + } + } +} + +fdmstar.update(ksp) +facetstar.update(ksp) + def solve_riesz_map(V, d): beta = Constant(1E-8) @@ -61,9 +95,13 @@ def solve_riesz_map(V, d): trial = TrialFunction(V) a = lambda v, u: inner(v, beta*u)*dx + inner(d(v), d(u))*dx problem = LinearVariationalProblem(a(test, trial), a(test, u_exact), uh, bcs=bcs) - solver = LinearVariationalSolver(problem, solver_parameters=fdmstar) - solver.solve() - return solver.snes.ksp.getIterationNumber() + its = [] + for sparams in [fdmstar, facetstar]: + uh.assign(0) + solver = LinearVariationalSolver(problem, solver_parameters=sparams) + solver.solve() + its.append(solver.snes.ksp.getIterationNumber()) + return its @pytest.fixture(params=[2, 3], @@ -87,11 +125,11 @@ def variant(request): @pytest.mark.skipcomplex -def test_p_independence_hgrad(mesh, variant): +def test_p_independence_hgrad(mesh): family = "Lagrange" - expected = 9 if mesh.topological_dimension() == 3 else 5 + expected = [9, 9] if mesh.topological_dimension() == 3 else [5, 5] for degree in range(3, 6): - element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant=variant) + element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant="fdm") V = FunctionSpace(mesh, element) assert solve_riesz_map(V, grad) <= expected @@ -99,7 +137,7 @@ def test_p_independence_hgrad(mesh, variant): @pytest.mark.skipcomplex def test_p_independence_hcurl(mesh): family = "NCE" if mesh.topological_dimension() == 3 else "RTCE" - expected = 6 if mesh.topological_dimension() == 3 else 3 + expected = [6, 6] if mesh.topological_dimension() == 3 else [3, 3] for degree in range(3, 6): element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant="fdm") V = FunctionSpace(mesh, element) @@ -109,7 +147,7 @@ def test_p_independence_hcurl(mesh): @pytest.mark.skipcomplex def test_p_independence_hdiv(mesh): family = "NCF" if mesh.topological_dimension() == 3 else "RTCF" - expected = 2 + expected = [2, 2] for degree in range(3, 6): element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant="fdm") V = FunctionSpace(mesh, element) @@ -271,48 +309,3 @@ def test_ipdg_direct_solver(fs): assert solver.snes.ksp.getIterationNumber() == 1 assert norm(u_exact-uh, "H1") < 1.0E-8 - - -@pytest.mark.skipcomplex -def test_static_condensation(mesh): - degree = 3 - quad_degree = 2*degree+1 - cell = mesh.ufl_cell() - e = FiniteElement("Lagrange", cell=cell, degree=degree, variant="fdm") - Z = FunctionSpace(mesh, MixedElement(*[RestrictedElement(e, d) for d in ("interior", "facet")])) - z = Function(Z) - u = sum(split(z)) - - f = Constant(1) - U = ((1/2)*inner(grad(u), grad(u)) - inner(u, f))*dx(degree=quad_degree) - F = derivative(U, z, TestFunction(Z)) - a = derivative(F, z, TrialFunction(Z)) - - subs = ["on_boundary"] - if mesh.cell_set._extruded: - subs += ["top", "bottom"] - bcs = [DirichletBC(Z.sub(1), zero(), sub) for sub in subs] - - problem = LinearVariationalProblem(a, -F, z, bcs=bcs) - solver = LinearVariationalSolver(problem, solver_parameters={ - "mat_type": "matfree", - "ksp_monitor": None, - "ksp_type": "preonly", - "ksp_norm_type": "unpreconditioned", - "pc_type": "python", - "pc_python_type": "firedrake.SCPC", - "pc_sc_eliminate_fields": "0", - "condensed_field": { - "mat_type": "matfree", - "ksp_monitor": None, - "ksp_type": "preonly", - "ksp_norm_type": "unpreconditioned", - "pc_type": "python", - "pc_python_type": "firedrake.FDMPC", - "fdm_pc_type": "lu", - "fdm_pc_mat_factor_solver_type": "mumps" - } - }) - solver.solve() - residual = solver.snes.ksp.buildResidual() - assert residual.norm() < 1E-14 From 4f95d7d781523f3efefd6e850d5d75985a0ceca1 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Fri, 10 Mar 2023 17:51:45 +0000 Subject: [PATCH 14/75] test statically-condensed star-relaxation --- firedrake/preconditioners/fdm.py | 113 +++++++++++----------- firedrake/preconditioners/pmg.py | 63 +++++++----- tests/multigrid/test_p_multigrid.py | 144 ++++++++++++++++++---------- tests/regression/test_fdm.py | 34 +++---- 4 files changed, 207 insertions(+), 147 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 9c02a5c1f0..8918bc56e3 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -36,11 +36,11 @@ class FDMPC(PCBase): Here we assume that the volume integrals in the Jacobian can be expressed as: - inner(d(v), alpha(d(u)))*dx + inner(v, beta(u))*dx + inner(d(v), alpha * d(u))*dx + inner(v, beta * u)*dx - where alpha and beta are linear functions (tensor contractions). - The sparse matrix is obtained by approximating (v, alpha u) and (v, beta u) as - diagonal mass matrices + where alpha and beta are possibly tensor-valued. The sparse matrix is + obtained by approximating (v, alpha * u) and (v, beta * u) as diagonal mass + matrices. """ _prefix = "fdm_" @@ -107,7 +107,12 @@ def initialize(self, pc): # Matrix-free assembly of the transformed Jacobian V_fdm = firedrake.FunctionSpace(V.mesh(), e_fdm) J_fdm = J(*[t.reconstruct(function_space=V_fdm) for t in J.arguments()], coefficients={}) - bcs_fdm = tuple(bc.reconstruct(V=V_fdm, g=0) for bc in bcs) + bcs_fdm = [] + for bc in bcs: + W = V_fdm + for index in bc._indices: + W = W.sub(index) + bcs_fdm.append(bc.reconstruct(V=W, g=0)) self.fdm_interp = prolongation_matrix_matfree(V, V_fdm, [], bcs_fdm) self.work_vec_x = Amat.createVecLeft() @@ -280,7 +285,6 @@ def get_coeffs(e, result=None): triu = on_diag and symmetric ptype = pmat_type if on_diag else PETSc.Mat.Type.AIJ sizes = tuple(Vsub.dof_dset.layout_vec.getSizes() for Vsub in (Vrow, Vcol)) - # bsizes = tuple(Vsub.dof_dset.layout_vec.getBlockSize() for Vsub in (Vrow, Vcol)) preallocator = PETSc.Mat().create(comm=self.comm) preallocator.setType(PETSc.Mat.Type.PREALLOCATOR) @@ -297,7 +301,6 @@ def get_coeffs(e, result=None): P = PETSc.Mat().create(comm=self.comm) P.setType(ptype) P.setSizes(sizes) - # P.setBlockSizes(*bsizes) P.setPreallocationNNZ((d_nnz, o_nnz)) P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) if ptype.endswith("sbaij"): @@ -483,10 +486,10 @@ def assemble_coef(self, J, form_compiler_parameters): splitter = ExtractSubBlock() J = splitter.split(J, argument_indices=(index, index)) - mesh = J.ufl_domain() - ndim = mesh.topological_dimension() args_J = J.arguments() e = args_J[0].ufl_element() + mesh = args_J[0].function_space().mesh() + tdim = mesh.topological_dimension() if isinstance(e, (ufl.VectorElement, ufl.TensorElement)): e = e._sub_element e = unrestrict_element(e) @@ -501,7 +504,7 @@ def assemble_coef(self, J, form_compiler_parameters): dku = ufl.div(u) if sobolev == ufl.HDiv else ufl.curl(u) eps = expand_derivatives(ufl.diff(ufl.replace(expand_derivatives(dku), {ufl.grad(u): du}), du)) if sobolev == ufl.HDiv: - map_grad = lambda p: ufl.outer(p, eps/ndim) + map_grad = lambda p: ufl.outer(p, eps/tdim) elif len(eps.ufl_shape) == 3: map_grad = lambda p: ufl.dot(p, eps/2) else: @@ -515,12 +518,12 @@ def assemble_coef(self, J, form_compiler_parameters): except TypeError: pass qdeg = degree - if formdegree == ndim: - qfam = "DG" if ndim == 1 else "DQ" + if formdegree == tdim: + qfam = "DG" if tdim == 1 else "DQ" qdeg = 0 elif formdegree == 0: - qfam = "DG" if ndim == 1 else "RTCE" if ndim == 2 else "NCE" - elif formdegree == 1 and ndim == 3: + qfam = "DG" if tdim == 1 else "RTCE" if tdim == 2 else "NCE" + elif formdegree == 1 and tdim == 3: qfam = "NCF" else: qfam = "DQ L2" @@ -565,7 +568,7 @@ def assemble_coef(self, J, form_compiler_parameters): @PETSc.Log.EventDecorator("FDMRefTensor") def assemble_reference_tensor(self, V): - ndim = V.mesh().topological_dimension() + tdim = V.mesh().topological_dimension() value_size = V.value_size formdegree = V.finat_element.formdegree degree = V.finat_element.degree @@ -573,13 +576,13 @@ def assemble_reference_tensor(self, V): degree = max(degree) except TypeError: pass - if formdegree == ndim: + if formdegree == tdim: degree = degree + 1 is_interior, is_facet = is_restricted(V.finat_element) - key = (degree, ndim, formdegree, V.value_size, is_interior, is_facet) + key = (degree, tdim, formdegree, V.value_size, is_interior, is_facet) cache = self._reference_tensor_cache if key not in cache: - full_key = (degree, ndim, formdegree, V.value_size, False, False) + full_key = (degree, tdim, formdegree, V.value_size, False, False) if is_facet and full_key in cache: result = cache[full_key] noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.comm) @@ -613,8 +616,8 @@ def assemble_reference_tensor(self, V): A10 = numpy.linalg.solve(A11, A10) A11 = numpy.eye(A11.shape[0]) - Ihat = mass_matrix(ndim, formdegree, A00, A11) - Dhat = diff_matrix(ndim, formdegree, A00, A11, A10) + Ihat = mass_matrix(tdim, formdegree, A00, A11) + Dhat = diff_matrix(tdim, formdegree, A00, A11, A10) result = block_mat([[Ihat], [Dhat]]) Ihat.destroy() Dhat.destroy() @@ -846,19 +849,19 @@ def kron3(A, B, C, scale=None): return result -def mass_matrix(ndim, formdegree, B00, B11): +def mass_matrix(tdim, formdegree, B00, B11): B00 = petsc_sparse(B00) B11 = petsc_sparse(B11) - if ndim == 1: + if tdim == 1: B_blocks = [B11 if formdegree else B00] - elif ndim == 2: + elif tdim == 2: if formdegree == 0: B_blocks = [B00.kron(B00)] elif formdegree == 1: B_blocks = [B00.kron(B11), B11.kron(B00)] else: B_blocks = [B11.kron(B11)] - elif ndim == 3: + elif tdim == 3: if formdegree == 0: B_blocks = [kron3(B00, B00, B00)] elif formdegree == 1: @@ -887,9 +890,9 @@ def mass_matrix(ndim, formdegree, B00, B11): return result -def diff_matrix(ndim, formdegree, A00, A11, A10): - if formdegree == ndim: - ncols = A10.shape[0]**ndim +def diff_matrix(tdim, formdegree, A00, A11, A10): + if formdegree == tdim: + ncols = A10.shape[0]**tdim A_zero = PETSc.Mat().createAIJ((1, ncols), nnz=(0, 0), comm=PETSc.COMM_SELF) A_zero.assemble() return A_zero @@ -897,15 +900,15 @@ def diff_matrix(ndim, formdegree, A00, A11, A10): A00 = petsc_sparse(A00) A11 = petsc_sparse(A11) A10 = petsc_sparse(A10) - if ndim == 1: + if tdim == 1: return A10 - elif ndim == 2: + elif tdim == 2: if formdegree == 0: A_blocks = [[A00.kron(A10)], [A10.kron(A00)]] elif formdegree == 1: A_blocks = [[A10.kron(A11), A11.kron(A10)]] A_blocks[-1][-1].scale(-1) - elif ndim == 3: + elif tdim == 3: if formdegree == 0: A_blocks = [[kron3(A00, A00, A10)], [kron3(A00, A10, A00)], [kron3(A10, A00, A00)]] elif formdegree == 1: @@ -946,8 +949,8 @@ def diff_prolongator(Vf, Vc, fbcs=[], cbcs=[]): A00 = numpy.eye(degree+1, dtype=PETSc.RealType) A10 = fiat_reference_prolongator(e1, e0, derivative=True) - ndim = Vc.mesh().topological_dimension() - Dhat = diff_matrix(ndim, ec.formdegree, A00, A11, A10) + tdim = Vc.mesh().topological_dimension() + Dhat = diff_matrix(tdim, ec.formdegree, A00, A11, A10) scalar_element = lambda e: e._sub_element if isinstance(e, (ufl.TensorElement, ufl.VectorElement)) else e fdofs = restricted_dofs(ef, create_element(unrestrict_element(scalar_element(Vf.ufl_element())))) @@ -1056,7 +1059,7 @@ class PoissonFDMPC(FDMPC): inner(grad(v), alpha(grad(u)))*dx + inner(v, beta(u))*dx - where alpha and beta are linear functions (tensor contractions). + where alpha and beta are possibly tensor-valued. The sparse matrix is obtained by approximating alpha and beta by cell-wise constants and discarding the coefficients in alpha that couple together mixed derivatives and mixed components. @@ -1122,12 +1125,12 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): bsize = V.value_size ncomp = V.ufl_element().reference_value_size() sdim = (V.finat_element.space_dimension() * bsize) // ncomp # dimension of a single component - ndim = V.ufl_domain().topological_dimension() + tdim = V.mesh().topological_dimension() shift = self.axes_shifts * bsize index_coef, _ = glonum_fun((Gq or Bq).cell_node_map()) index_bc, _ = glonum_fun(bcflags.cell_node_map()) - flag2id = numpy.kron(numpy.eye(ndim, ndim, dtype=PETSc.IntType), [[1], [2]]) + flag2id = numpy.kron(numpy.eye(tdim, tdim, dtype=PETSc.IntType), [[1], [2]]) # pshape is the shape of the DOFs in the tensor product pshape = tuple(Ak[0].size[0] for Ak in Afdm) @@ -1136,7 +1139,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): static_condensation = True if set(shift) != {0}: - assert ncomp == ndim + assert ncomp == tdim pshape = [tuple(numpy.roll(pshape, -shift[k])) for k in range(ncomp)] # assemble zero-th order term separately, including off-diagonals (mixed components) @@ -1148,7 +1151,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): bshape = Bq.ufl_shape # Be = Bhat kron ... kron Bhat Be = Afdm[0][0].copy() - for k in range(1, ndim): + for k in range(1, tdim): Be = Be.kron(Afdm[k][0]) aptr = numpy.arange(0, (bshape[0]+1)*bshape[1], bshape[1], dtype=PETSc.IntType) @@ -1166,7 +1169,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): # assemble the second order term and the zero-th order term if any, # discarding mixed derivatives and mixed componentsget_weak_bc_flags(J) - mue = numpy.zeros((ncomp, ndim), dtype=PETSc.RealType) + mue = numpy.zeros((ncomp, tdim), dtype=PETSc.RealType) bqe = numpy.zeros((ncomp,), dtype=PETSc.RealType) for e in range(self.nel): @@ -1187,7 +1190,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): for k in range(ncomp): # permutation of axes with respect to the first vector component - axes = numpy.roll(numpy.arange(ndim), -shift[k]) + axes = numpy.roll(numpy.arange(tdim), -shift[k]) # for each component: compute the stiffness matrix Ae bck = bce[:, k] if len(bce.shape) == 2 else bce fbc = numpy.dot(bck, flag2id) @@ -1200,13 +1203,13 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): if Bq is not None: Ae.axpy(bqe[k], Be) - if ndim > 1: + if tdim > 1: # Ae = Ae kron Bhat + mue[k][1] Bhat kron Ahat Ae = Ae.kron(Afdm[axes[1]][0]) if Gq is not None: Ae.axpy(mue[k][1], Be.kron(Afdm[axes[1]][1+fbc[1]])) - if ndim > 2: + if tdim > 2: # Ae = Ae kron Bhat + mue[k][2] Bhat kron Bhat kron Ahat Be = Be.kron(Afdm[axes[1]][0]) Ae = Ae.kron(Afdm[axes[2]][0]) @@ -1216,7 +1219,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): elif Bq is not None: Ae = Afdm[axes[0]][0] - for m in range(1, ndim): + for m in range(1, tdim): Ae = Ae.kron(Afdm[axes[m]][0]) Ae.scale(bqe[k]) @@ -1228,7 +1231,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): if any(Dk is not None for Dk in Dfdm): if static_condensation: raise NotImplementedError("Static condensation for SIPG not implemented") - if ndim < V.ufl_domain().geometric_dimension(): + if tdim < V.mesh().geometric_dimension(): raise NotImplementedError("SIPG on immersed meshes is not implemented") eta = float(self.appctx.get("eta")) @@ -1246,8 +1249,8 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): if PT_facet: icell = numpy.reshape(lgmap.apply(ie), (2, ncomp, -1)) - iord0 = numpy.insert(numpy.delete(numpy.arange(ndim), idir[0]), 0, idir[0]) - iord1 = numpy.insert(numpy.delete(numpy.arange(ndim), idir[1]), 0, idir[1]) + iord0 = numpy.insert(numpy.delete(numpy.arange(tdim), idir[0]), 0, idir[0]) + iord1 = numpy.insert(numpy.delete(numpy.arange(tdim), idir[1]), 0, idir[1]) je = je[[0, 1], lfd] Pfacet = PT_facet.dat.data_ro_with_halos[je] Gfacet = Gq_facet.dat.data_ro_with_halos[je] @@ -1255,14 +1258,14 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): Gfacet = numpy.sum(Gq.dat.data_ro_with_halos[je], axis=1) for k in range(ncomp): - axes = numpy.roll(numpy.arange(ndim), -shift[k]) + axes = numpy.roll(numpy.arange(tdim), -shift[k]) Dfacet = Dfdm[axes[0]] if Dfacet is None: continue if PT_facet: - k0 = iord0[k] if shift != 1 else ndim-1-iord0[-k-1] - k1 = iord1[k] if shift != 1 else ndim-1-iord1[-k-1] + k0 = iord0[k] if shift != 1 else tdim-1-iord0[-k-1] + k1 = iord1[k] if shift != 1 else tdim-1-iord1[-k-1] Piola = Pfacet[[0, 1], [k0, k1]] mu = Gfacet[[0, 1], idir] else: @@ -1297,10 +1300,10 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): Adense[ii, j0:j1] -= smu[j] * Dfacet[:, jface % 2] Ae = numpy_to_petsc(Adense, dense_indices, diag=False) - if ndim > 1: + if tdim > 1: # assume that the mesh is oriented Ae = Ae.kron(Afdm[axes[1]][0]) - if ndim > 2: + if tdim > 2: Ae = Ae.kron(Afdm[axes[2]][0]) if bsize == ncomp: @@ -1323,12 +1326,12 @@ def assemble_coef(self, J, form_compiler_parameters, discard_mixed=True, cell_av coefficients = {} assembly_callables = [] - mesh = J.ufl_domain() + args_J = J.arguments() + V = args_J[-1].function_space() + mesh = V.mesh() tdim = mesh.topological_dimension() Finv = ufl.JacobianInverse(mesh) - args_J = J.arguments() - V = args_J[-1].function_space() degree = V.ufl_element().degree() try: degree = max(degree) @@ -1590,7 +1593,7 @@ def fdm_setup_ipdg(fdm_element, eta): @lru_cache(maxsize=10) def get_interior_facet_maps(V): """ - Extrude V.interior_facet_node_map and V.ufl_domain().interior_facets.local_facet_dat + Extrude V.interior_facet_node_map and V.mesh().interior_facets.local_facet_dat :arg V: a :class:`.FunctionSpace` @@ -1599,7 +1602,7 @@ def get_interior_facet_maps(V): local_facet_data_fun: maps interior facets to the local facet numbering in the two cells sharing it, nfacets: the total number of interior facets owned by this process """ - mesh = V.ufl_domain() + mesh = V.mesh() intfacets = mesh.interior_facets facet_to_cells = intfacets.facet_cell_map.values local_facet_data = intfacets.local_facet_dat.data_ro diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 56341599a6..6511be38b5 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -606,7 +606,7 @@ def prolongation_transfer_kernel_action(Vf, expr): kernel = compile_expression_dual_evaluation(expr, to_element, Vf.ufl_element(), log=PETSc.Log.isActive()) coefficients = extract_numbered_coefficients(expr, kernel.coefficient_numbers) if kernel.needs_external_coords: - coefficients = [Vf.ufl_domain().coordinates] + coefficients + coefficients = [Vf.mesh().coordinates] + coefficients return op2.Kernel(kernel.ast, kernel.name, requires_zeroed_output_arguments=True, @@ -790,7 +790,7 @@ def finat_reference_prolongator(felem, celem): from gem.interpreter import evaluate ref_el = felem.cell - ndim = ref_el.get_spatial_dimension() + tdim = ref_el.get_spatial_dimension() degree = felem.degree try: degree = max(degree) @@ -804,11 +804,12 @@ def _tabulate(e, ps, entity=None): is_facet_element = True entity_dofs = felem.entity_dofs() - for key in entity_dofs: - v = sum(list(entity_dofs[key].values()), []) + for edim in sorted(entity_dofs): + v = sum(list(entity_dofs[edim].values()), []) if len(v): - edim = sum(key) if type(key) == tuple else key - if edim == ndim: + if type(edim) == tuple: + edim = sum(edim) + if edim == tdim: is_facet_element = False if is_facet_element and degree > 5: @@ -816,7 +817,7 @@ def _tabulate(e, ps, entity=None): quadratures = [] for key in ref_el.sub_entities: edim = sum(key) if type(key) == tuple else key - if edim == ndim-1: + if edim == tdim-1: sub_entities = ref_el.sub_entities[key] entities.extend([(key, f) for f in sub_entities]) quadratures.extend([make_quadrature(ref_el.construct_subelement(key), quad_degree)]*len(sub_entities)) @@ -1113,6 +1114,11 @@ def make_kron_code(Vf, Vc, t_in, t_out, mat_name, scratch): prolong_code = "".join(prolong_code) restrict_code = "".join(reversed(restrict_code)) shapes = [tuple(map(max, zip(*fshapes))), tuple(map(max, zip(*cshapes)))] + + if fskip > numpy.prod(shapes[0]): + shapes[0] = (fskip, 1, 1, 1) + if cskip > numpy.prod(shapes[1]): + shapes[1] = (cskip, 1, 1, 1) return operator_decl, prolong_code, restrict_code, shapes @@ -1158,9 +1164,8 @@ def cache_generate_code(kernel, comm): def make_mapping_code(Q, fmapping, cmapping, t_in, t_out): if fmapping == cmapping: return None - domain = Q.ufl_domain() - A = get_piola_tensor(cmapping, domain, inverse=False) - B = get_piola_tensor(fmapping, domain, inverse=True) + A = get_piola_tensor(cmapping, Q.mesh(), inverse=False) + B = get_piola_tensor(fmapping, Q.mesh(), inverse=True) tensor = A if B: tensor = ufl.dot(B, tensor) if tensor else B @@ -1285,14 +1290,16 @@ def __init__(self, Vf, Vc, Vf_bcs, Vc_bcs): self.uf = Vf Vf = Vf.function_space() else: - self.uf = self._cache_work.get(Vf) or firedrake.Function(Vf) - self._cache_work[Vf] = self.uf + if Vf not in self._cache_work: + self._cache_work[Vf] = firedrake.Function(Vf) + self.uf = self._cache_work[Vf] if isinstance(Vc, firedrake.Function): self.uc = Vc Vc = Vc.function_space() else: - self.uc = self._cache_work.get(Vc) or firedrake.Function(Vc) - self._cache_work[Vc] = self.uc + if Vc not in self._cache_work: + self._cache_work[Vc] = firedrake.Function(Vc) + self.uc = self._cache_work[Vc] self.Vf = Vf self.Vc = Vc @@ -1339,6 +1346,16 @@ def _kernels(self): restrict = partial(op2.par_loop, *restrict_args, *coefficient_args) return prolong, restrict + def _prolong(self): + with self.uf.dat.vec_wo as uf: + uf.set(0.0E0) + self._kernels[0]() + + def _restrict(self): + with self.uc.dat.vec_wo as uc: + uc.set(0.0E0) + self._kernels[1]() + def view(self, mat, viewer=None): if viewer is None: return @@ -1399,14 +1416,14 @@ def make_blas_kernels(Vf, Vc): qelem = felem if qelem.mapping() != "identity": qelem = qelem.reconstruct(mapping="identity") - Qf = Vf if qelem == felem else firedrake.FunctionSpace(Vf.ufl_domain(), qelem) + Qf = Vf if qelem == felem else firedrake.FunctionSpace(Vf.mesh(), qelem) mapping_output = make_mapping_code(Qf, fmapping, cmapping, "t0", "t1") in_place_mapping = True except Exception: qelem = ufl.FiniteElement("DQ", cell=felem.cell(), degree=PMGBase.max_degree(felem)) if felem.value_shape(): qelem = ufl.TensorElement(qelem, shape=felem.value_shape(), symmetry=felem.symmetry()) - Qf = firedrake.FunctionSpace(Vf.ufl_domain(), qelem) + Qf = firedrake.FunctionSpace(Vf.mesh(), qelem) mapping_output = make_mapping_code(Qf, fmapping, cmapping, "t0", "t1") qshape = (Qf.value_size, Qf.finat_element.space_dimension()) @@ -1583,9 +1600,7 @@ def multTranspose(self, mat, rf, rc): for bc in self.Vf_bcs: bc.zero(self.uf) - with self.uc.dat.vec_wo as uc: - uc.set(0.0E0) - self._kernels[1]() + self._restrict() for bc in self.Vc_bcs: bc.zero(self.uc) @@ -1601,9 +1616,7 @@ def mult(self, mat, xc, xf, inc=False): for bc in self.Vc_bcs: bc.zero(self.uc) - with self.uf.dat.vec_wo as uf: - uf.set(0.0E0) - self._kernels[0]() + self._prolong() for bc in self.Vf_bcs: bc.zero(self.uf) @@ -1642,8 +1655,8 @@ def _standalones(self): @cached_property def _kernels(self): - prolong = lambda: [standalone._kernels[0]() for standalone in self._standalones] - restrict = lambda: [standalone._kernels[1]() for standalone in self._standalones] + prolong = lambda: [s._prolong() for s in self._standalones] + restrict = lambda: [s._restrict() for s in self._standalones] return prolong, restrict def getNestSubMatrix(self, i, j): @@ -1667,7 +1680,7 @@ def prolongation_matrix_aij(Pk, P1, Pk_bcs=[], P1_bcs=[]): (Pk.cell_node_map(), P1.cell_node_map())) mat = op2.Mat(sp, PETSc.ScalarType) - mesh = Pk.ufl_domain() + mesh = Pk.mesh() fele = Pk.ufl_element() if isinstance(fele, ufl.MixedElement) and not isinstance(fele, (ufl.VectorElement, ufl.TensorElement)): diff --git a/tests/multigrid/test_p_multigrid.py b/tests/multigrid/test_p_multigrid.py index 89d7c94efb..ff6d51be54 100644 --- a/tests/multigrid/test_p_multigrid.py +++ b/tests/multigrid/test_p_multigrid.py @@ -2,58 +2,102 @@ from firedrake import * -def test_reconstruct_degree(): - meshes = [UnitSquareMesh(1, 1, quadrilateral=True)] - meshes.append(ExtrudedMesh(meshes[0], layers=1)) - for mesh in meshes: - ndim = mesh.topological_dimension() - elist = [] - for degree in [7, 2, 31]: - V = VectorFunctionSpace(mesh, "Q", degree) - Q = FunctionSpace(mesh, "DQ", degree-2) - Z = MixedFunctionSpace([V, Q]) - e = Z.ufl_element() - elist.append(e) - assert e == PMGPC.reconstruct_degree(elist[0], degree) - - elist = [] - for degree in [7, 2, 31]: - V = FunctionSpace(mesh, "NCF" if ndim == 3 else "RTCF", degree) - Q = FunctionSpace(mesh, "DQ", degree-1) - Z = MixedFunctionSpace([V, Q]) - e = Z.ufl_element() - elist.append(e) - assert e == PMGPC.reconstruct_degree(elist[0], degree) - - -def test_prolongation_matrix_matfree(): +@pytest.fixture(params=[2, 3], + ids=["Rectangle", "Box"]) +def tp_mesh(request): + nx = 4 + distribution = {"overlap_type": (DistributedMeshOverlapType.VERTEX, 1)} + m = UnitSquareMesh(nx, nx, quadrilateral=True, distribution_parameters=distribution) + if request.param == 3: + m = ExtrudedMesh(m, nx) + + x = SpatialCoordinate(m) + xnew = as_vector([acos(1-2*xj)/pi for xj in x]) + m.coordinates.interpolate(xnew) + return m + + +@pytest.fixture(params=[0, 1, 2], + ids=["H1", "HCurl", "HDiv"]) +def tp_family(tp_mesh, request): + tdim = tp_mesh.topological_dimension() + if tdim == 3: + families = ["Q", "NCE", "NCF"] + else: + families = ["Q", "RTCE", "RTCF"] + return families[request.param] + + +@pytest.fixture(params=[None, "fdm", "hierarchical"], ids=["spectral", "fdm", "hierarchical"]) +def variant(request): + return request.param + + +def test_reconstruct_degree(tp_mesh): + tdim = tp_mesh.topological_dimension() + elist = [] + for degree in [7, 2, 31]: + V = VectorFunctionSpace(tp_mesh, "Q", degree) + Q = FunctionSpace(tp_mesh, "DQ", degree-2) + Z = MixedFunctionSpace([V, Q]) + e = Z.ufl_element() + elist.append(e) + assert e == PMGPC.reconstruct_degree(elist[0], degree) + + elist = [] + for degree in [7, 2, 31]: + V = FunctionSpace(tp_mesh, "NCF" if tdim == 3 else "RTCF", degree) + Q = FunctionSpace(tp_mesh, "DQ", degree-1) + Z = MixedFunctionSpace([V, Q]) + e = Z.ufl_element() + elist.append(e) + assert e == PMGPC.reconstruct_degree(elist[0], degree) + + +def test_prolong_de_rham(tp_mesh): + from firedrake.preconditioners.pmg import prolongation_matrix_matfree + + tdim = tp_mesh.topological_dimension() + b = Constant(list(range(tdim))) + mat = diag(Constant([tdim+1]*tdim)) + Constant([[-1]*tdim]*tdim) + expr = dot(mat, SpatialCoordinate(tp_mesh)) + b + + cell = tp_mesh.ufl_cell() + elems = [VectorElement(FiniteElement("Q", cell=cell, degree=2)), + FiniteElement("NCE" if tdim == 3 else "RTCE", cell=cell, degree=2), + FiniteElement("NCF" if tdim == 3 else "RTCF", cell=cell, degree=2)] + fs = [FunctionSpace(tp_mesh, e) for e in elems] + us = [Function(V) for V in fs] + us[0].interpolate(expr) + for u in us: + for v in us: + if u != v: + P = prolongation_matrix_matfree(v, u).getPythonContext() + P._prolong() + assert norm(v-expr, "L2") < 1E-14 + + +def test_prolong_low_order_to_restricted(tp_mesh, tp_family, variant): from firedrake.preconditioners.pmg import prolongation_matrix_matfree - tol = 1E-14 - meshes = [UnitSquareMesh(3, 2, quadrilateral=True)] - meshes.append(ExtrudedMesh(meshes[0], layers=2)) - for mesh in meshes: - ndim = mesh.topological_dimension() - b = Constant(list(range(ndim))) - mat = diag(Constant([ndim+1]*ndim)) + Constant([[-1]*ndim]*ndim) - expr = dot(mat, SpatialCoordinate(mesh)) + b - - variant = None - cell = mesh.ufl_cell() - elems = [] - elems.append(VectorElement(FiniteElement("Q", cell=cell, degree=3, variant=variant))) - elems.append(FiniteElement("NCF" if ndim == 3 else "RTCF", cell=cell, degree=2, variant=variant)) - elems.append(FiniteElement("NCE" if ndim == 3 else "RTCE", cell=cell, degree=2, variant=variant)) - fs = [FunctionSpace(mesh, e) for e in elems] - us = [Function(V) for V in fs] - us[0].interpolate(expr) - for u in us: - for v in us: - if u != v: - v.assign(0) - P = prolongation_matrix_matfree(v, u).getPythonContext() - P._kernels[0]() - assert norm(v-expr, "L2") < tol + degree = 3 + cell = tp_mesh.ufl_cell() + element = FiniteElement(tp_family, cell=cell, degree=degree, variant=variant) + Vi = FunctionSpace(tp_mesh, RestrictedElement(element, restriction_domain="interior")) + Vf = FunctionSpace(tp_mesh, RestrictedElement(element, restriction_domain="facet")) + Vc = FunctionSpace(tp_mesh, tp_family, degree=1) + + ui = Function(Vi) + uf = Function(Vf) + uc = Function(Vc) + uc.dat.data[0::2] = 0.0 + uc.dat.data[1::2] = 1.0 + + for v in [ui, uf]: + P = prolongation_matrix_matfree(v, uc).getPythonContext() + P._prolong() + + assert norm(ui + uf - uc, "L2") < 2E-14 @pytest.fixture(params=["triangles", "quadrilaterals"], scope="module") diff --git a/tests/regression/test_fdm.py b/tests/regression/test_fdm.py index 63129c455b..e4217b5129 100644 --- a/tests/regression/test_fdm.py +++ b/tests/regression/test_fdm.py @@ -73,7 +73,7 @@ def solve_riesz_map(V, d): - beta = Constant(1E-8) + beta = Constant(1E-4) subs = [(1, 3)] if V.mesh().cell_set._extruded: subs += ["top"] @@ -137,7 +137,7 @@ def test_p_independence_hgrad(mesh): @pytest.mark.skipcomplex def test_p_independence_hcurl(mesh): family = "NCE" if mesh.topological_dimension() == 3 else "RTCE" - expected = [6, 6] if mesh.topological_dimension() == 3 else [3, 3] + expected = [8, 7] if mesh.topological_dimension() == 3 else [4, 4] for degree in range(3, 6): element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant="fdm") V = FunctionSpace(mesh, element) @@ -147,7 +147,7 @@ def test_p_independence_hcurl(mesh): @pytest.mark.skipcomplex def test_p_independence_hdiv(mesh): family = "NCF" if mesh.topological_dimension() == 3 else "RTCF" - expected = [2, 2] + expected = [3, 3] for degree in range(3, 6): element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant="fdm") V = FunctionSpace(mesh, element) @@ -156,7 +156,7 @@ def test_p_independence_hdiv(mesh): @pytest.mark.skipcomplex def test_variable_coefficient(mesh): - ndim = mesh.geometric_dimension() + gdim = mesh.geometric_dimension() k = 4 V = FunctionSpace(mesh, "Lagrange", k) u = TrialFunction(V) @@ -165,10 +165,10 @@ def test_variable_coefficient(mesh): x -= Constant([0.5]*len(x)) # variable coefficients - alphas = [0.1+10*dot(x, x)]*ndim + alphas = [0.1+10*dot(x, x)]*gdim alphas[0] = 1+10*exp(-dot(x, x)) alpha = diag(as_vector(alphas)) - beta = ((10*cos(3*pi*x[0]) + 20*sin(2*pi*x[1]))*cos(pi*x[ndim-1]))**2 + beta = ((10*cos(3*pi*x[0]) + 20*sin(2*pi*x[1]))*cos(pi*x[gdim-1]))**2 a = (inner(grad(v), dot(alpha, grad(u))) + inner(v, beta*u))*dx(degree=3*k+2) L = inner(v, Constant(1))*dx @@ -189,44 +189,44 @@ def test_variable_coefficient(mesh): ids=["cg", "dg", "rt"]) def fs(request, mesh): degree = 3 - ndim = mesh.topological_dimension() + tdim = mesh.topological_dimension() cell = mesh.ufl_cell() element = request.param variant = "fdm_ipdg" if element == "rt": - family = "RTCF" if ndim == 2 else "NCF" + family = "RTCF" if tdim == 2 else "NCF" return FunctionSpace(mesh, FiniteElement(family, cell, degree=degree, variant=variant)) else: - if ndim == 1: + if tdim == 1: family = "DG" if element == "dg" else "CG" else: family = "DQ" if element == "dg" else "Q" - return VectorFunctionSpace(mesh, FiniteElement(family, cell, degree=degree, variant=variant), dim=5-ndim) + return VectorFunctionSpace(mesh, FiniteElement(family, cell, degree=degree, variant=variant), dim=5-tdim) @pytest.mark.skipcomplex def test_ipdg_direct_solver(fs): mesh = fs.mesh() x = SpatialCoordinate(mesh) - ndim = mesh.geometric_dimension() + gdim = mesh.geometric_dimension() ncomp = fs.ufl_element().value_size() u_exact = dot(x, x) if ncomp: u_exact = as_vector([u_exact + Constant(k) for k in range(ncomp)]) - N = fs.ufl_element().degree() + degree = fs.ufl_element().degree() try: - N, = set(N) + degree, = set(degree) except TypeError: pass - quad_degree = 2*(N+1)-1 + quad_degree = 2*(degree+1)-1 uh = Function(fs) u = TrialFunction(fs) v = TestFunction(fs) # problem coefficients - A1 = diag(Constant(range(1, ndim+1))) + A1 = diag(Constant(range(1, gdim+1))) A2 = diag(Constant(range(1, ncomp+1))) alpha = lambda grad_u: dot(dot(A2, grad_u), A1) beta = diag(Constant(range(2, ncomp+2))) @@ -238,7 +238,7 @@ def test_ipdg_direct_solver(fs): extruded = mesh.cell_set._extruded subs = (1,) - if ndim > 1: + if gdim > 1: subs += (3,) if extruded: subs += ("top",) @@ -271,7 +271,7 @@ def test_ipdg_direct_solver(fs): ds_Dir = sum(ds_Dir, ds(tuple())) ds_Neu = sum(ds_Neu, ds(tuple())) - eta = Constant((N+1)**2) + eta = Constant((degree+1)**2) h = CellVolume(mesh)/FacetArea(mesh) penalty = eta/h From 9a548e51bb2d3b9c3d31ca1e8dc40411b42b049c Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 13 Mar 2023 18:22:05 +0000 Subject: [PATCH 15/75] attempt to write restricted NCE prolongator --- firedrake/preconditioners/fdm.py | 6 +- firedrake/preconditioners/pmg.py | 422 +++++++++------------------- tests/multigrid/test_p_multigrid.py | 50 ++-- tests/regression/test_fdm.py | 55 ++-- 4 files changed, 196 insertions(+), 337 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 8918bc56e3..a4dd248622 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -1072,9 +1072,9 @@ class PoissonFDMPC(FDMPC): _variant = "fdm_ipdg" def assemble_reference_tensor(self, V): - from firedrake.preconditioners.pmg import get_line_elements + from firedrake.preconditioners.pmg import get_permutation_to_line_elements try: - line_elements, shifts = get_line_elements(V) + _, line_elements, shifts = get_permutation_to_line_elements(V) except ValueError: raise ValueError("FDMPC does not support the element %s" % V.ufl_element()) @@ -1602,6 +1602,8 @@ def get_interior_facet_maps(V): local_facet_data_fun: maps interior facets to the local facet numbering in the two cells sharing it, nfacets: the total number of interior facets owned by this process """ + if isinstance(V, firedrake.Function): + V = V.function_space() mesh = V.mesh() intfacets = mesh.interior_facets facet_to_cells = intfacets.facet_cell_map.values diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 6511be38b5..61ed8e06d9 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -72,13 +72,13 @@ def coarsen_form(self, form, fine_to_coarse_map): """ return ufl.replace(form, fine_to_coarse_map) - def initialize(self, pc): + def initialize(self, obj): # Make a new DM. # Hook up a (new) coarsen routine on that DM. # Make a new PC, of type MG. # Assign the DM to that PC. - odm = pc.getDM() + odm = obj.getDM() ctx = get_appctx(odm) if ctx is None: raise ValueError("No context found.") @@ -89,15 +89,15 @@ def initialize(self, pc): if test.function_space() != trial.function_space(): raise NotImplementedError("test and trial spaces must be the same") - prefix = pc.getOptionsPrefix() + prefix = obj.getOptionsPrefix() options_prefix = prefix + self._prefix - pdm = PETSc.DMShell().create(comm=pc.comm) + pdm = PETSc.DMShell().create(comm=obj.comm) pdm.setOptionsPrefix(options_prefix) - self.ppc = self.configure_pmg(pc, pdm) - self.ppc.setFromOptions() + ppc = self.configure_pmg(obj, pdm) + is_snes = isinstance(obj, PETSc.SNES) - copts = PETSc.Options(self.ppc.getOptionsPrefix()+self.ppc.getType()+"_coarse_") + copts = PETSc.Options(ppc.getOptionsPrefix()+ppc.getType()+"_coarse_") # Get the coarse degree from PETSc options fcp = ctx._problem.form_compiler_parameters @@ -126,12 +126,10 @@ def initialize(self, pc): # Now overwrite some routines on the DM pdm.setRefine(None) pdm.setCoarsen(self.coarsen) - pdm.setCreateInterpolation(self.create_interpolation) - # We need this for p-FAS - pdm.setCreateInjection(self.create_injection) - pdm.setSNESFunction(_SNESContext.form_function) - pdm.setSNESJacobian(_SNESContext.form_jacobian) - pdm.setKSPComputeOperators(_SNESContext.compute_operators) + if is_snes: + pdm.setSNESFunction(_SNESContext.form_function) + pdm.setSNESJacobian(_SNESContext.form_jacobian) + pdm.setKSPComputeOperators(_SNESContext.compute_operators) set_function_space(pdm, get_function_space(odm)) @@ -139,19 +137,23 @@ def initialize(self, pc): assert parent is not None add_hook(parent, setup=partial(push_parent, pdm, parent), teardown=partial(pop_parent, pdm, parent), call_setup=True) add_hook(parent, setup=partial(push_appctx, pdm, ctx), teardown=partial(pop_appctx, pdm, ctx), call_setup=True) - self.ppc.setUp() - def update(self, pc): - pass + ppc.incrementTabLevel(1, parent=obj) + ppc.setFromOptions() + ppc.setUp() + self.ppc = ppc + + def update(self, obj): + self.ppc.setUp() - def view(self, pc, viewer=None): + def view(self, obj, viewer=None): if viewer is None: viewer = PETSc.Viewer.STDOUT viewer.printfASCII("p-multigrid PC\n") if hasattr(self, "ppc"): - self.ppc.view(viewer) + self.ppc.view(viewer=viewer) - def destroy(self, pc): + def destroy(self, obj): if hasattr(self, "ppc"): self.ppc.destroy() @@ -385,11 +387,11 @@ def reconstruct_degree(ele, degree): By default, reconstructed EnrichedElements, TensorProductElements, and MixedElements will have the degree of the sub-elements shifted - by the same amount so that the maximum degree is N. - This is useful to coarsen spaces like NCF(N) x DQ(N-1). + by the same amount so that the maximum degree is `degree`. + This is useful to coarsen spaces like NCF(k) x DQ(k-1). :arg ele: a :class:`ufl.FiniteElement` to reconstruct, - :arg N: an integer degree. + :arg degree: an integer degree. :returns: the reconstructed element """ @@ -398,14 +400,14 @@ def reconstruct_degree(ele, degree): elif isinstance(ele, ufl.TensorElement): return type(ele)(PMGBase.reconstruct_degree(ele._sub_element, degree), shape=ele._shape, symmetry=ele.symmetry()) elif isinstance(ele, ufl.EnrichedElement): - shift = degree-PMGBase.max_degree(ele) - return type(ele)(*(PMGBase.reconstruct_degree(e, PMGBase.max_degree(e)+shift) for e in ele._elements)) + shift = degree - PMGBase.max_degree(ele) + return type(ele)(*(PMGBase.reconstruct_degree(e, PMGBase.max_degree(e) + shift) for e in ele._elements)) elif isinstance(ele, ufl.TensorProductElement): - shift = degree-PMGBase.max_degree(ele) - return type(ele)(*(PMGBase.reconstruct_degree(e, PMGBase.max_degree(e)+shift) for e in ele.sub_elements()), cell=ele.cell()) + shift = degree - PMGBase.max_degree(ele) + return type(ele)(*(PMGBase.reconstruct_degree(e, PMGBase.max_degree(e) + shift) for e in ele.sub_elements()), cell=ele.cell()) elif isinstance(ele, ufl.MixedElement): - shift = degree-PMGBase.max_degree(ele) - return type(ele)(*(PMGBase.reconstruct_degree(e, PMGBase.max_degree(e)+shift) for e in ele.sub_elements())) + shift = degree - PMGBase.max_degree(ele) + return type(ele)(*(PMGBase.reconstruct_degree(e, PMGBase.max_degree(e) + shift) for e in ele.sub_elements())) elif isinstance(ele, ufl.WithMapping): return type(ele)(PMGBase.reconstruct_degree(ele.wrapee, degree), ele.mapping()) elif isinstance(ele, (ufl.HDivElement, ufl.HCurlElement, ufl.BrokenElement)): @@ -426,7 +428,6 @@ def configure_pmg(self, pc, pdm): ppc.setType("mg") ppc.setOperators(*pc.getOperators()) ppc.setDM(pdm) - ppc.incrementTabLevel(1, parent=pc) # PETSc unfortunately requires us to make an ugly hack. # We would like to use GMG for the coarse solve, at least @@ -463,7 +464,8 @@ def configure_pmg(self, snes, pdm): psnes.setOptionsPrefix(snes.getOptionsPrefix() + "pfas_") psnes.setType("fas") psnes.setDM(pdm) - psnes.incrementTabLevel(1, parent=snes) + psnes.setTolerances(max_it=1) + psnes.setConvergenceTest("skip") (f, residual) = snes.getFunction() assert residual is not None @@ -508,97 +510,6 @@ def coarsen_residual(self, Fc, Jc, uc): return Fc -def load_c_code(code, name, argtypes, comm): - from pyop2.compilation import load - from pyop2.utils import get_petsc_dir - cppargs = ["-I%s/include" % d for d in get_petsc_dir()] - ldargs = (["-L%s/lib" % d for d in get_petsc_dir()] - + ["-Wl,-rpath,%s/lib" % d for d in get_petsc_dir()] - + ["-lpetsc", "-lm"]) - return load(code, "c", name, argtypes=argtypes, - cppargs=cppargs, ldargs=ldargs, - comm=comm) - - -def reference_moments(*args, **kwargs): - """ - Return a python function that computes the L2 inner product of the - arguments in the reference cell. - - :arg test: the test `ufl.Argument` - :arg trial: the trial `ufl.Argument` or `ufl.Coefficient` - :kwarg diagonal: are we assembling the diagonal of the bilinear form? - """ - import ctypes - from tsfc import compile_form - quad_degree = 1+sum([PMGBase.max_degree(t.ufl_element()) for t in args]) - form = ufl.inner(*args)*ufl.dx(degree=quad_degree) - kernel, = compile_form(form, parameters=dict(mode="spectral"), - log=PETSc.Log.isActive(), **kwargs) - op2kernel = op2.Kernel(kernel.ast, kernel.name, - requires_zeroed_output_arguments=True, - flop_count=kernel.flop_count, - events=(kernel.event,)) - code = op2kernel.code.gencode().replace("static inline void", "void") - coords = None - mesh = form.ufl_domain() - if len(kernel.arguments) > 3-len(form.arguments()): - mesh_element = mesh.coordinates.function_space().finat_element - nodes = mesh_element.fiat_equivalent.dual.get_nodes() - points = [list(node.get_point_dict().keys())[0] for node in nodes] - coords = numpy.array(points, dtype=PETSc.ScalarType) - - argtypes = [ctypes.c_voidp]*len(kernel.arguments) - funptr = load_c_code(code, op2kernel.code.name, argtypes, mesh.comm) - - def _wrapper(*args): - args[0].fill(0.0E0) - _args = list(args) - if coords is not None: - _args.insert(1, coords) - return funptr(*[a.ctypes.data for a in _args]) - - return _wrapper - - -@lru_cache(maxsize=10) -def matfree_reference_prolongator(Vf, Vc): - """ - Return the prolongation from Vc to Vf on the reference element. - """ - dimf = Vf.value_size * Vf.finat_element.space_dimension() - dimc = Vc.value_size * Vc.finat_element.space_dimension() - build_Afc = reference_moments(ufl.TestFunction(Vf), ufl.TrialFunction(Vc)) - apply_Aff = reference_moments(ufl.TestFunction(Vf), ufl.Coefficient(Vf)) - diag_Aff = reference_moments(ufl.TestFunction(Vf), ufl.TrialFunction(Vf), diagonal=True) - Ax = numpy.empty((dimf,), dtype=PETSc.ScalarType) - Dx = numpy.empty((dimf,), dtype=PETSc.ScalarType) - diagonal = numpy.empty((dimf,), dtype=PETSc.ScalarType) - result = numpy.empty((dimf, dimc), dtype=PETSc.ScalarType) - - def _afun(x): - nonlocal Ax, Dx, diagonal - numpy.multiply(x, diagonal, out=Dx) - apply_Aff(Ax, Dx) - numpy.multiply(Ax, diagonal, out=Ax) - return Ax - - if Vf.comm.rank == 0: - from scipy.sparse.linalg import cg, LinearOperator - build_Afc(result) - diag_Aff(diagonal) - numpy.sqrt(diagonal, out=diagonal) - numpy.reciprocal(diagonal, out=diagonal) - A = LinearOperator((dimf, dimf), _afun, dtype=result.dtype) - for k in range(dimc): - numpy.multiply(result[:, k], diagonal, out=result[:, k]) - result[:, k], _ = cg(A, result[:, k], tol=1E-12) - numpy.multiply(result[:, k], diagonal, out=result[:, k]) - - result = Vf.comm.bcast(result, root=0) - return result - - def prolongation_transfer_kernel_action(Vf, expr): from tsfc import compile_expression_dual_evaluation from tsfc.finatinterface import create_element @@ -719,7 +630,7 @@ def compare_dual_basis(l1, l2): @lru_cache(maxsize=10) @PETSc.Log.EventDecorator("GetLineElements") -def get_line_elements(V): +def get_permutation_to_line_elements(V): from FIAT.reference_element import LINE from tsfc.finatinterface import create_element ele = V.ufl_element() @@ -731,10 +642,6 @@ def get_line_elements(V): if finat_ele.space_dimension() != V.finat_element.space_dimension(): raise ValueError("Failed to decompose %s into tensor products" % V.ufl_element()) - def cyclic_perm(a): - return [a[i:] + a[:i] for i in range(len(a))] - - permutations = [] line_elements = [] axes_shifts = [] @@ -744,26 +651,48 @@ def cyclic_perm(a): expansion = tuple(e.fiat_equivalent for e in reversed(factors)) if not all([e.get_reference_element().shape == LINE for e in expansion]): raise ValueError("Failed to decompose %s into line elements" % V.ufl_element()) - - shift = -1 - for k, perm in enumerate(permutations): - is_perm = all([e1.space_dimension() == e2.space_dimension() - for e1, e2 in zip(perm, expansion)]) - for e1, e2 in zip(perm, expansion): - if is_perm: - is_perm = compare_element(e1, e2) - - if is_perm: - shift = len(expansion) - k - axes_shifts[-1] = axes_shifts[-1] + (shift, ) - break - - if shift == -1: - line_elements.append(expansion) - axes_shifts.append((0, )) - permutations = cyclic_perm(expansion) - - return line_elements, axes_shifts + line_elements.append(expansion) + + tp_shape = [tuple(e.space_dimension() for e in expansion) for expansion in line_elements] + sizes = list(map(numpy.prod, tp_shape)) + dof_ranges = numpy.cumsum([0] + sizes) + + dof_perm = [] + shifts = [] + + grouped = [False for e in line_elements] + nterms = len(line_elements) + unique_line_elements = [] + while not all(grouped): + istart = grouped.index(False) + expansion = line_elements[istart] + unique_line_elements.append(expansion) + axes_shifts = tuple() + + tdim = len(expansion) + permutations = [expansion[k:] + expansion[:k] for k in range(tdim)] + for i in range(istart, nterms): + ecur = line_elements[i] + if not grouped[i]: + for shift, perm in enumerate(permutations): + is_perm = all([e1.space_dimension() == e2.space_dimension() + for e1, e2 in zip(perm, ecur)]) + for e1, e2 in zip(perm, ecur): + if is_perm: + is_perm = compare_element(e1, e2) + + if is_perm: + axes_shifts += ((tdim - shift) % tdim,) + axes = numpy.arange(tdim) + dofs = numpy.arange(*dof_ranges[i:i+2], dtype=PETSc.IntType).reshape(tp_shape[istart]) + dofs = numpy.transpose(dofs, axes=numpy.roll(axes, -shift)) + dof_perm.extend(dofs.flat) + grouped[i] = True + break + + shifts.append(axes_shifts) + + return dof_perm, unique_line_elements, shifts @lru_cache(maxsize=10) @@ -784,61 +713,6 @@ def fiat_reference_prolongator(felem, celem, derivative=False): return evaluate_dual(fdual, celem, ckey) -@lru_cache(maxsize=10) -def finat_reference_prolongator(felem, celem): - from finat.quadrature import make_quadrature - from gem.interpreter import evaluate - - ref_el = felem.cell - tdim = ref_el.get_spatial_dimension() - degree = felem.degree - try: - degree = max(degree) - except TypeError: - pass - quad_degree = 2*degree+1 - - def _tabulate(e, ps, entity=None): - results = evaluate(e.basis_evaluation(0, ps, entity).values()) - return results[0].arr.reshape((len(ps.points), -1)) - - is_facet_element = True - entity_dofs = felem.entity_dofs() - for edim in sorted(entity_dofs): - v = sum(list(entity_dofs[edim].values()), []) - if len(v): - if type(edim) == tuple: - edim = sum(edim) - if edim == tdim: - is_facet_element = False - - if is_facet_element and degree > 5: - entities = [] - quadratures = [] - for key in ref_el.sub_entities: - edim = sum(key) if type(key) == tuple else key - if edim == tdim-1: - sub_entities = ref_el.sub_entities[key] - entities.extend([(key, f) for f in sub_entities]) - quadratures.extend([make_quadrature(ref_el.construct_subelement(key), quad_degree)]*len(sub_entities)) - - wts = numpy.concatenate([evaluate([q.weight_expression])[0].arr.reshape((-1,)) for q in quadratures]) - cphi = numpy.concatenate([_tabulate(celem, q.point_set, entity=e) for q, e in zip(quadratures, entities)]).T - fphi = numpy.concatenate([_tabulate(felem, q.point_set, entity=e) for q, e in zip(quadratures, entities)]).T - else: - quadrature = make_quadrature(ref_el, quad_degree) - wts = evaluate([quadrature.weight_expression])[0].arr.reshape((-1,)) - cphi = _tabulate(celem, quadrature.point_set).T - fphi = _tabulate(felem, quadrature.point_set).T - - numpy.sqrt(wts, out=wts) - numpy.multiply(fphi, wts, out=fphi) - numpy.multiply(cphi, wts, out=cphi) - cphi = cphi.reshape((celem.space_dimension(), -1)) - fphi = fphi.reshape((felem.space_dimension(), -1)) - return numpy.linalg.solve(fphi.dot(fphi.T), fphi.dot(cphi.T)) - - # Common kernel to compute y = kron(A3, kron(A2, A1)) * x # Vector and tensor field generalization from Deville, Fischer, and Mund section 8.3.1. kronmxv_code = """ @@ -980,15 +854,13 @@ def _tabulate(e, ps, entity=None): @PETSc.Log.EventDecorator("MakeKronCode") def make_kron_code(Vf, Vc, t_in, t_out, mat_name, scratch): """ - Return interpolation and restriction sub-kernels between enriched tensor product elements + Return interpolation and restriction kernels between enriched tensor product elements """ operator_decl = [] prolong_code = [] restrict_code = [] - felems, fshifts = get_line_elements(Vf) - celems, cshifts = get_line_elements(Vc) - if len(felems) > 3 or len(celems) > 3: - raise ValueError("The expansion is too complicated") + _, felems, fshifts = get_permutation_to_line_elements(Vf) + _, celems, cshifts = get_permutation_to_line_elements(Vc) shifts = fshifts in_place = False @@ -1002,6 +874,7 @@ def make_kron_code(Vf, Vc, t_in, t_out, mat_name, scratch): pelem = celems[0] perm_name = "perm_%s" % t_in celems = celems*len(felems) + elif len(felems) == 1: shifts = cshifts psize = Vf.value_size @@ -1011,20 +884,34 @@ def make_kron_code(Vf, Vc, t_in, t_out, mat_name, scratch): else: raise ValueError("Cannot assign fine to coarse DOFs") - for k in range(len(shifts)): - if Vc.value_size*len(shifts[k]) < Vf.value_size: - shifts[k] = shifts[k]*(Vf.value_size//Vc.value_size) - - perm = sum(shifts, tuple()) - perm_data = ", ".join(map(str, perm)) - operator_decl.append(f""" - PetscBLASInt {perm_name}[{len(perm)}] = {{ {perm_data} }}; - """) - pshape = [e.space_dimension() for e in pelem] pargs = ", ".join(map(str, pshape+[1]*(3-len(pshape)))) pstride = psize * numpy.prod(pshape) - if shifts == fshifts: + + if set(cshifts) == set(fshifts): + psize *= len(cshifts[0]) + pstride *= len(cshifts[0]) + prolong_code.append(f""" + for({IntType_c} j=1; j<{len(fshifts)}; j++) + permute_axis(0, {pargs}, {psize}, {t_in}, {t_in}+j*{pstride}); + """) + restrict_code.append(f""" + for({IntType_c} j=1; j<{len(fshifts)}; j++) + ipermute_axis(0, {pargs}, {psize}, {t_in}, {t_in}+j*{pstride}); + """) + psize = 1 + + elif pelem == celems[0]: + for k in range(len(shifts)): + if Vc.value_size*len(shifts[k]) < Vf.value_size: + shifts[k] = shifts[k]*(Vf.value_size//Vc.value_size) + + perm = sum(shifts, tuple()) + perm_data = ", ".join(map(str, perm)) + operator_decl.append(f""" + PetscBLASInt {perm_name}[{len(perm)}] = {{ {perm_data} }}; + """) + prolong_code.append(f""" for({IntType_c} j=1; j<{len(perm)}; j++) permute_axis({perm_name}[j], {pargs}, {psize}, {t_in}, {t_in}+j*{pstride}); @@ -1205,7 +1092,7 @@ def make_mapping_code(Q, fmapping, cmapping, t_in, t_out): def make_permutation_code(V, vshape, pshape, t_in, t_out, array_name): - _, shifts = get_line_elements(V) + _, _, shifts = get_permutation_to_line_elements(V) shift = shifts[0] if shift != (0,): ndof = numpy.prod(vshape) @@ -1257,22 +1144,10 @@ def get_permuted_map(V): Return a PermutedMap with the same tensor product shape for every component of H(div) or H(curl) tensor product elements """ - expansion, shifts = get_line_elements(V) + + perm, _, shifts = get_permutation_to_line_elements(V) if {(0, )} == set(shifts): return V.cell_node_map() - - istart = 0 - perm = [] - for factors, shift in zip(expansion, shifts): - axes = numpy.arange(len(factors)) - pshape = [len(shift)] + [e.space_dimension() for e in factors] - iend = istart + numpy.prod(pshape) - permutation = numpy.reshape(numpy.arange(istart, iend), pshape) - for k in range(permutation.shape[0]): - permutation[k] = numpy.reshape(numpy.transpose(permutation[k], axes=numpy.roll(axes, shift[k])), pshape[1:]) - perm.extend(permutation.flat) - istart = iend - return PermutedMap(V.cell_node_map(), perm) @@ -1529,66 +1404,29 @@ def make_kernels(self, Vf, Vc): This is temporary while we wait for dual evaluation in FInAT. """ - try: - prolong_kernel, _ = prolongation_transfer_kernel_action(Vf, self.uc) - matrix_kernel, coefficients = prolongation_transfer_kernel_action(Vf, firedrake.TestFunction(Vc)) - # The way we transpose the prolongation kernel is suboptimal. - # A local matrix is generated each time the kernel is executed. - element_kernel = loopy.generate_code_v2(matrix_kernel.code).device_code() - element_kernel = element_kernel.replace("void expression_kernel", "static void expression_kernel") - coef_args = "".join([", c%d" % i for i in range(len(coefficients))]) - coef_decl = "".join([", const %s *restrict c%d" % (ScalarType_c, i) for i in range(len(coefficients))]) - dimc = Vc.finat_element.space_dimension() * Vc.value_size - dimf = Vf.finat_element.space_dimension() * Vf.value_size - restrict_code = f""" - {element_kernel} - - void restriction({ScalarType_c} *restrict Rc, const {ScalarType_c} *restrict Rf, const {ScalarType_c} *restrict w{coef_decl}) - {{ - {ScalarType_c} Afc[{dimf}*{dimc}] = {{0}}; - expression_kernel(Afc{coef_args}); - for ({IntType_c} i = 0; i < {dimf}; i++) - for ({IntType_c} j = 0; j < {dimc}; j++) - Rc[j] += Afc[i*{dimc} + j] * Rf[i] * w[i]; - }} - """ - restrict_kernel = op2.Kernel(restrict_code, "restriction", requires_zeroed_output_arguments=True) - except NotImplementedError: - if Vc.ufl_element().mapping() != Vf.ufl_element().mapping(): - raise NotImplementedError("Prolongation not supported from %s to %s" % (Vc.ufl_element(), Vf.ufl_element())) - if Vf.finat_element.space_dimension() < 400: - Jmat = finat_reference_prolongator(Vf.finat_element, Vc.finat_element) - else: - Jmat = matfree_reference_prolongator(Vf, Vc) - dimf, dimc = Jmat.shape - vsize = (Vc.value_size*Vc.finat_element.space_dimension())//dimc - Jdata = ", ".join(map(float.hex, Jmat.flat)) - kernel_code = f""" - void prolongation({ScalarType_c} *restrict uf, const {ScalarType_c} *restrict uc) - {{ - {ScalarType_c} Afc[{dimf}*{dimc}] = {{ {Jdata} }}; - for ({IntType_c} i = 0; i < {vsize}*{dimf}; i++) - uf[i] = 0.0E0; - - for ({IntType_c} i = 0; i < {dimf}; i++) - for ({IntType_c} j = 0; j < {dimc}; j++) - for ({IntType_c} k = 0; k < {vsize}; k++) - uf[i*{vsize}+k] += Afc[i*{dimc} + j] * uc[j*{vsize}+k]; - }} - - void restriction({ScalarType_c} *restrict Rc, const {ScalarType_c} *restrict Rf, const {ScalarType_c} *restrict w) - {{ - {ScalarType_c} Afc[{dimf}*{dimc}] = {{ {Jdata} }}; - for ({IntType_c} i = 0; i < {dimf}; i++) - for ({IntType_c} j = 0; j < {dimc}; j++) - for ({IntType_c} k = 0; k < {vsize}; k++) - Rc[j*{vsize}+k] += Afc[i*{dimc} + j] * Rf[i*{vsize}+k] * w[i*{vsize}+k]; - }} - """ - prolong_kernel = op2.Kernel(kernel_code, "prolongation", requires_zeroed_output_arguments=True) - restrict_kernel = op2.Kernel(kernel_code, "restriction", requires_zeroed_output_arguments=True) - coefficients = [] - + prolong_kernel, _ = prolongation_transfer_kernel_action(Vf, self.uc) + matrix_kernel, coefficients = prolongation_transfer_kernel_action(Vf, firedrake.TestFunction(Vc)) + # The way we transpose the prolongation kernel is suboptimal. + # A local matrix is generated each time the kernel is executed. + element_kernel = loopy.generate_code_v2(matrix_kernel.code).device_code() + element_kernel = element_kernel.replace("void expression_kernel", "static void expression_kernel") + coef_args = "".join([", c%d" % i for i in range(len(coefficients))]) + coef_decl = "".join([", const %s *restrict c%d" % (ScalarType_c, i) for i in range(len(coefficients))]) + dimc = Vc.finat_element.space_dimension() * Vc.value_size + dimf = Vf.finat_element.space_dimension() * Vf.value_size + restrict_code = f""" + {element_kernel} + + void restriction({ScalarType_c} *restrict Rc, const {ScalarType_c} *restrict Rf, const {ScalarType_c} *restrict w{coef_decl}) + {{ + {ScalarType_c} Afc[{dimf}*{dimc}] = {{0}}; + expression_kernel(Afc{coef_args}); + for ({IntType_c} i = 0; i < {dimf}; i++) + for ({IntType_c} j = 0; j < {dimc}; j++) + Rc[j] += Afc[i*{dimc} + j] * Rf[i] * w[i]; + }} + """ + restrict_kernel = op2.Kernel(restrict_code, "restriction", requires_zeroed_output_arguments=True) return prolong_kernel, restrict_kernel, coefficients def multTranspose(self, mat, rf, rc): diff --git a/tests/multigrid/test_p_multigrid.py b/tests/multigrid/test_p_multigrid.py index ff6d51be54..fb6d882617 100644 --- a/tests/multigrid/test_p_multigrid.py +++ b/tests/multigrid/test_p_multigrid.py @@ -5,7 +5,7 @@ @pytest.fixture(params=[2, 3], ids=["Rectangle", "Box"]) def tp_mesh(request): - nx = 4 + nx = 1 distribution = {"overlap_type": (DistributedMeshOverlapType.VERTEX, 1)} m = UnitSquareMesh(nx, nx, quadrilateral=True, distribution_parameters=distribution) if request.param == 3: @@ -28,26 +28,32 @@ def tp_family(tp_mesh, request): return families[request.param] -@pytest.fixture(params=[None, "fdm", "hierarchical"], ids=["spectral", "fdm", "hierarchical"]) +@pytest.fixture(params=[None, "hierarchical", "fdm"], ids=["spectral", "hierarchical", "fdm"]) def variant(request): return request.param -def test_reconstruct_degree(tp_mesh): - tdim = tp_mesh.topological_dimension() - elist = [] - for degree in [7, 2, 31]: - V = VectorFunctionSpace(tp_mesh, "Q", degree) - Q = FunctionSpace(tp_mesh, "DQ", degree-2) - Z = MixedFunctionSpace([V, Q]) - e = Z.ufl_element() - elist.append(e) - assert e == PMGPC.reconstruct_degree(elist[0], degree) +@pytest.fixture(params=[0, 1], + ids=["CG-DG", "HDiv-DG"]) +def mixed_family(tp_mesh, request): + if request.param == 0: + Vfamily = "Q" + else: + tdim = tp_mesh.topological_dimension() + Vfamily = "NCF" if tdim == 3 else "RTCF" + Qfamily = "DQ" + return Vfamily, Qfamily + +def test_reconstruct_degree(tp_mesh, mixed_family): elist = [] + Vfamily, Qfamily = mixed_family for degree in [7, 2, 31]: - V = FunctionSpace(tp_mesh, "NCF" if tdim == 3 else "RTCF", degree) - Q = FunctionSpace(tp_mesh, "DQ", degree-1) + if Vfamily in ["NCF", "RTCF"]: + V = FunctionSpace(tp_mesh, Vfamily, degree) + else: + V = VectorFunctionSpace(tp_mesh, Vfamily, degree) + Q = FunctionSpace(tp_mesh, Qfamily, degree-2) Z = MixedFunctionSpace([V, Q]) e = Z.ufl_element() elist.append(e) @@ -80,7 +86,7 @@ def test_prolong_de_rham(tp_mesh): def test_prolong_low_order_to_restricted(tp_mesh, tp_family, variant): from firedrake.preconditioners.pmg import prolongation_matrix_matfree - degree = 3 + degree = 2 cell = tp_mesh.ufl_cell() element = FiniteElement(tp_family, cell=cell, degree=degree, variant=variant) Vi = FunctionSpace(tp_mesh, RestrictedElement(element, restriction_domain="interior")) @@ -90,14 +96,14 @@ def test_prolong_low_order_to_restricted(tp_mesh, tp_family, variant): ui = Function(Vi) uf = Function(Vf) uc = Function(Vc) - uc.dat.data[0::2] = 0.0 + uc.dat.data[0::2] = 2.0 uc.dat.data[1::2] = 1.0 for v in [ui, uf]: P = prolongation_matrix_matfree(v, uc).getPythonContext() P._prolong() - assert norm(ui + uf - uc, "L2") < 2E-14 + assert norm(ui + uf - uc, "L2") < 2E-14 @pytest.fixture(params=["triangles", "quadrilaterals"], scope="module") @@ -436,7 +442,7 @@ def test_p_fas_nonlinear_scalar(): rtol = 1E-8 atol = rtol * Fnorm - + rtol = 0.0 newton = { "mat_type": "aij", "snes_monitor": None, @@ -444,7 +450,7 @@ def test_p_fas_nonlinear_scalar(): "snes_type": "newtonls", "snes_max_it": 20, "snes_atol": atol, - "snes_rtol": 1E-50} + "snes_rtol": rtol} coarse = { "ksp_type": "preonly", @@ -459,7 +465,7 @@ def test_p_fas_nonlinear_scalar(): pmg = { "ksp_atol": atol*1E-1, - "ksp_rtol": 1E-50, + "ksp_rtol": rtol, "ksp_type": "cg", "ksp_converged_reason": None, "ksp_monitor_true_residual": None, @@ -479,7 +485,7 @@ def test_p_fas_nonlinear_scalar(): "snes_monitor": None, "snes_converged_reason": None, "snes_atol": atol, - "snes_rtol": 1E-50, + "snes_rtol": rtol, "snes_type": "python", "snes_python_type": "firedrake.PMGSNES", "pfas_snes_fas_type": "kaskade", @@ -503,7 +509,7 @@ def check_coarsen_quadrature(solver): Nq, = Nq Nl = p.u.ufl_element().degree() try: - Nl, = set(Nl) + Nl = max(Nl) except TypeError: pass assert Nq == 3*Nl+2 diff --git a/tests/regression/test_fdm.py b/tests/regression/test_fdm.py index e4217b5129..dc76294909 100644 --- a/tests/regression/test_fdm.py +++ b/tests/regression/test_fdm.py @@ -21,11 +21,12 @@ "pc_python_type": "firedrake.P1PC", "pmg_mg_coarse": coarse, "pmg_mg_levels": { + "ksp_max_it": 1, "ksp_type": "chebyshev", "ksp_norm_type": "none", "esteig_ksp_type": "cg", "esteig_ksp_norm_type": "natural", - "ksp_chebyshev_esteig": "0.75,0.25,0.0,1.0", + "ksp_chebyshev_esteig": "0.5,0.5,0.0,1.0", "pc_type": "python", "pc_python_type": "firedrake.FDMPC", "fdm": { @@ -55,11 +56,12 @@ "pc_python_type": "firedrake.P1PC", "pmg_mg_coarse": coarse, "pmg_mg_levels": { + "ksp_max_it": 1, "ksp_type": "chebyshev", "ksp_norm_type": "none", "esteig_ksp_type": "cg", "esteig_ksp_norm_type": "natural", - "ksp_chebyshev_esteig": "0.75,0.25,0.0,1.0", + "ksp_chebyshev_esteig": "0.5,0.5,0.0,1.0", "pc_type": "python", "pc_python_type": "firedrake.ASMExtrudedStarPC", "pc_star_mat_ordering_type": "nd", @@ -72,7 +74,7 @@ facetstar.update(ksp) -def solve_riesz_map(V, d): +def build_riesz_map(V, d): beta = Constant(1E-4) subs = [(1, 3)] if V.mesh().cell_set._extruded: @@ -84,7 +86,8 @@ def solve_riesz_map(V, d): u_exact = exp(-10*dot(x, x)) u_bc = u_exact else: - u_exact = x * exp(-10*dot(x, x)) + A = Constant([[-1.]*len(x)]*len(x)) + diag(Constant([len(x)]*len(x))) + u_exact = dot(A, x) * exp(-10*dot(x, x)) u_bc = Function(V) u_bc.project(u_exact, solver_parameters={"mat_type": "matfree", "pc_type": "jacobi"}) @@ -94,14 +97,14 @@ def solve_riesz_map(V, d): test = TestFunction(V) trial = TrialFunction(V) a = lambda v, u: inner(v, beta*u)*dx + inner(d(v), d(u))*dx - problem = LinearVariationalProblem(a(test, trial), a(test, u_exact), uh, bcs=bcs) - its = [] - for sparams in [fdmstar, facetstar]: - uh.assign(0) - solver = LinearVariationalSolver(problem, solver_parameters=sparams) - solver.solve() - its.append(solver.snes.ksp.getIterationNumber()) - return its + return LinearVariationalProblem(a(test, trial), a(test, u_exact), uh, bcs=bcs) + + +def solve_riesz_map(problem, solver_parameters): + problem.u.assign(0) + solver = LinearVariationalSolver(problem, solver_parameters=solver_parameters) + solver.solve() + return solver.snes.ksp.getIterationNumber() @pytest.fixture(params=[2, 3], @@ -125,33 +128,42 @@ def variant(request): @pytest.mark.skipcomplex -def test_p_independence_hgrad(mesh): +def test_p_independence_hgrad(mesh, variant): family = "Lagrange" - expected = [9, 9] if mesh.topological_dimension() == 3 else [5, 5] + expected = [16, 12] if mesh.topological_dimension() == 3 else [9, 7] + solvers = [fdmstar] if variant is None else [fdmstar, facetstar] for degree in range(3, 6): - element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant="fdm") + element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant=variant) V = FunctionSpace(mesh, element) - assert solve_riesz_map(V, grad) <= expected + problem = build_riesz_map(V, grad) + for sp, max_it in zip(solvers, expected[:len(solvers)]): + assert solve_riesz_map(problem, sp) <= max_it @pytest.mark.skipcomplex def test_p_independence_hcurl(mesh): family = "NCE" if mesh.topological_dimension() == 3 else "RTCE" - expected = [8, 7] if mesh.topological_dimension() == 3 else [4, 4] + expected = [13, 10] if mesh.topological_dimension() == 3 else [6, 6] + solvers = [fdmstar, facetstar] for degree in range(3, 6): element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant="fdm") V = FunctionSpace(mesh, element) - assert solve_riesz_map(V, curl) <= expected + problem = build_riesz_map(V, curl) + for sp, max_it in zip(solvers, expected[:len(solvers)]): + assert solve_riesz_map(problem, sp) <= max_it @pytest.mark.skipcomplex def test_p_independence_hdiv(mesh): family = "NCF" if mesh.topological_dimension() == 3 else "RTCF" - expected = [3, 3] + expected = [6, 6] + solvers = [fdmstar, facetstar] for degree in range(3, 6): element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant="fdm") V = FunctionSpace(mesh, element) - assert solve_riesz_map(V, div) <= expected + problem = build_riesz_map(V, div) + for sp, max_it in zip(solvers, expected[:len(solvers)]): + assert solve_riesz_map(problem, sp) <= max_it @pytest.mark.skipcomplex @@ -182,7 +194,8 @@ def test_variable_coefficient(mesh): problem = LinearVariationalProblem(a, L, uh, bcs=bcs) solver = LinearVariationalSolver(problem, solver_parameters=fdmstar) solver.solve() - assert solver.snes.ksp.getIterationNumber() <= 14 + expected = 23 if gdim == 3 else 14 + assert solver.snes.ksp.getIterationNumber() <= expected @pytest.fixture(params=["cg", "dg", "rt"], From 8fab0081bd805a2192161cb9acf64154e698a674 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 14 Mar 2023 22:32:50 +0000 Subject: [PATCH 16/75] expand FInAT elements --- firedrake/preconditioners/pmg.py | 115 +++++++++++----------------- tests/multigrid/test_p_multigrid.py | 28 ++++++- 2 files changed, 73 insertions(+), 70 deletions(-) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 61ed8e06d9..b8638d1095 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -10,6 +10,7 @@ from firedrake.utils import ScalarType_c, IntType_c, cached_property from firedrake.petsc import PETSc import firedrake +import finat import ufl import loopy import numpy @@ -97,7 +98,7 @@ def initialize(self, obj): ppc = self.configure_pmg(obj, pdm) is_snes = isinstance(obj, PETSc.SNES) - copts = PETSc.Options(ppc.getOptionsPrefix()+ppc.getType()+"_coarse_") + copts = PETSc.Options(ppc.getOptionsPrefix() + ppc.getType() + "_coarse_") # Get the coarse degree from PETSc options fcp = ctx._problem.form_compiler_parameters @@ -311,7 +312,7 @@ def coarsen_nullspace(coarse_V, mat, fine_nullspace): def coarsen_quadrature(self, metadata, fdeg, cdeg): if isinstance(metadata, dict): # Coarsen the quadrature degree in a dictionary - # such that the ratio of quadrature nodes to interpolation nodes (qdeg+1)//(fdeg+1) is preserved + # preserving the ratio of quadrature nodes to interpolation nodes (qdeg+1)//(fdeg+1) qdeg = metadata.get("quadrature_degree", None) if qdeg is not None: cmd = dict(metadata) @@ -529,49 +530,26 @@ def expand_element(ele): """ Expand a FiniteElement as an EnrichedElement of TensorProductElements, discarding modifiers. """ - if ele.cell().cellname().startswith("quadrilateral"): - # Handle immersed quadrilaterals - quadrilateral_tpc = ufl.TensorProductCell(ufl.interval, ufl.interval) - return expand_element(ele.reconstruct(cell=quadrilateral_tpc)) - elif ele.cell() == ufl.hexahedron: - hexahedron_tpc = ufl.TensorProductCell(ufl.quadrilateral, ufl.interval) - return expand_element(ele.reconstruct(cell=hexahedron_tpc)) - elif isinstance(ele, (ufl.TensorElement, ufl.VectorElement)): - return expand_element(ele._sub_element) - elif isinstance(ele, ufl.MixedElement): - return type(ele)(*[expand_element(e) for e in ele.sub_elements()]) - elif isinstance(ele, ufl.RestrictedElement): - return type(ele)(expand_element(ele._element), restriction_domain=ele._restriction_domain) - elif isinstance(ele, (ufl.HDivElement, ufl.HCurlElement, ufl.BrokenElement)): - return expand_element(ele._element) - elif isinstance(ele, ufl.WithMapping): - return expand_element(ele.wrapee) - elif isinstance(ele, ufl.EnrichedElement): - terms = [] - for e in ele._elements: - ee = expand_element(e) - if isinstance(ee, ufl.EnrichedElement): - terms.extend(ee._elements) - else: - terms.append(ee) - cell, = set([t.cell() for t in terms]) - return ufl.EnrichedElement(*terms) - elif isinstance(ele, ufl.TensorProductElement): - factors = [expand_element(e) for e in ele.sub_elements()] + if isinstance(ele, finat.FlattenedDimensions): + return expand_element(ele.product) + elif isinstance(ele, (finat.HDivElement, finat.HCurlElement)): + return expand_element(ele.wrappee) + elif isinstance(ele, finat.DiscontinuousElement): + return expand_element(ele.element) + elif isinstance(ele, finat.EnrichedElement): + terms = list(map(expand_element, ele.elements)) + return finat.EnrichedElement(terms) + elif isinstance(ele, finat.TensorProductElement): + factors = list(map(expand_element, ele.factors)) terms = [tuple()] for e in factors: new_terms = [] - for f in e._elements if isinstance(e, ufl.EnrichedElement) else [e]: - f_factors = f.sub_elements() if isinstance(f, ufl.TensorProductElement) else (f,) + for f in e.elements if isinstance(e, finat.EnrichedElement) else [e]: + f_factors = tuple(f.factors) if isinstance(f, finat.TensorProductElement) else (f,) new_terms.extend([t_factors + f_factors for t_factors in terms]) terms = new_terms - - if len(terms) == 1: - return ufl.TensorProductElement(*terms[0]) - else: - terms = [ufl.TensorProductElement(*k) for k in terms] - cell, = set([t.cell() for t in terms]) - return ufl.EnrichedElement(*terms) + terms = list(map(finat.TensorProductElement, terms)) + return finat.EnrichedElement(terms) else: return ele @@ -632,19 +610,15 @@ def compare_dual_basis(l1, l2): @PETSc.Log.EventDecorator("GetLineElements") def get_permutation_to_line_elements(V): from FIAT.reference_element import LINE - from tsfc.finatinterface import create_element ele = V.ufl_element() if isinstance(ele, ufl.MixedElement) and not isinstance(ele, (ufl.TensorElement, ufl.VectorElement)): raise ValueError("MixedElements are not decomposed into tensor products") - ele = expand_element(ele) - finat_ele = create_element(ele) + finat_ele = expand_element(V.finat_element) if finat_ele.space_dimension() != V.finat_element.space_dimension(): raise ValueError("Failed to decompose %s into tensor products" % V.ufl_element()) line_elements = [] - axes_shifts = [] - terms = finat_ele.elements if hasattr(finat_ele, "elements") else [finat_ele] for term in terms: factors = term.factors if hasattr(term, "factors") else (term,) @@ -658,40 +632,43 @@ def get_permutation_to_line_elements(V): dof_ranges = numpy.cumsum([0] + sizes) dof_perm = [] + unique_line_elements = [] shifts = [] - grouped = [False for e in line_elements] - nterms = len(line_elements) - unique_line_elements = [] - while not all(grouped): - istart = grouped.index(False) - expansion = line_elements[istart] - unique_line_elements.append(expansion) + visit = [False for e in line_elements] + while False in visit: + base = line_elements[visit.index(False)] + tdim = len(base) + pshape = tuple(e.space_dimension() for e in base) + unique_line_elements.append(base) + axes_shifts = tuple() + for shift in range(tdim): + if V.finat_element.formdegree != 2: + shift = (tdim - shift) % tdim - tdim = len(expansion) - permutations = [expansion[k:] + expansion[:k] for k in range(tdim)] - for i in range(istart, nterms): - ecur = line_elements[i] - if not grouped[i]: - for shift, perm in enumerate(permutations): + perm = base[shift:] + base[:shift] + for i, expansion in enumerate(line_elements): + if not visit[i]: is_perm = all([e1.space_dimension() == e2.space_dimension() - for e1, e2 in zip(perm, ecur)]) - for e1, e2 in zip(perm, ecur): + for e1, e2 in zip(perm, expansion)]) + for e1, e2 in zip(perm, expansion): if is_perm: is_perm = compare_element(e1, e2) if is_perm: - axes_shifts += ((tdim - shift) % tdim,) - axes = numpy.arange(tdim) - dofs = numpy.arange(*dof_ranges[i:i+2], dtype=PETSc.IntType).reshape(tp_shape[istart]) - dofs = numpy.transpose(dofs, axes=numpy.roll(axes, -shift)) - dof_perm.extend(dofs.flat) - grouped[i] = True + axes_shifts += ((tdim - shift) % tdim, ) + dofs = numpy.arange(*dof_ranges[i:i+2], dtype=PETSc.IntType).reshape(tp_shape[i]) + dofs = numpy.transpose(dofs, axes=numpy.roll(numpy.arange(tdim), shift)) + assert dofs.shape == pshape + dof_perm.append(dofs.flat) + visit[i] = True break shifts.append(axes_shifts) + dof_perm = numpy.concatenate(dof_perm) + dof_perm = numpy.argsort(dof_perm) return dof_perm, unique_line_elements, shifts @@ -1138,16 +1115,16 @@ def make_permutation_code(V, vshape, pshape, t_in, t_out, array_name): return decl, prolong, restrict -@PETSc.Log.EventDecorator("GetPermutedMap") def get_permuted_map(V): """ Return a PermutedMap with the same tensor product shape for every component of H(div) or H(curl) tensor product elements """ - perm, _, shifts = get_permutation_to_line_elements(V) - if {(0, )} == set(shifts): + perm, _, _ = get_permutation_to_line_elements(V) + if all(perm[:-1] < perm[1:]): return V.cell_node_map() + return PermutedMap(V.cell_node_map(), perm) diff --git a/tests/multigrid/test_p_multigrid.py b/tests/multigrid/test_p_multigrid.py index fb6d882617..37c56a37fa 100644 --- a/tests/multigrid/test_p_multigrid.py +++ b/tests/multigrid/test_p_multigrid.py @@ -96,13 +96,39 @@ def test_prolong_low_order_to_restricted(tp_mesh, tp_family, variant): ui = Function(Vi) uf = Function(Vf) uc = Function(Vc) - uc.dat.data[0::2] = 2.0 + uc.dat.data[0::2] = 0.0 uc.dat.data[1::2] = 1.0 + # import numpy + # from firedrake.preconditioners.pmg import get_permutation_to_line_elements + + # cperm, _, _ = get_permutation_to_line_elements(Vc) + # cnum = Vc.cell_node_map().values[0] + # uc.dat.data[cnum[cperm]] = numpy.arange(1, 1+len(uc.dat.data)) + + # fperm, _, _ = get_permutation_to_line_elements(Vf) + # fnum = Vf.cell_node_map().values[0] + + # print() + # # print("cperm", cperm) + # # print("fperm", fperm) + # print("inv(cperm)", numpy.argsort(cperm)) + # print("inv(fperm)", numpy.argsort(fperm)) + + # expr = Constant([0]*3) + # for row in numpy.eye(3): + # expr.assign(row) + # uf.project(expr, solver_parameters={"mat_type": "matfree"}) + # print(numpy.nonzero(numpy.rint(uf.dat.data[fnum]).astype(int))[0]) + + # uf.project(uc, solver_parameters={"mat_type": "matfree"}) + # print(numpy.rint(uf.dat.data[fnum[fperm]]).astype(int)) for v in [ui, uf]: P = prolongation_matrix_matfree(v, uc).getPythonContext() P._prolong() + # print(numpy.rint(uf.dat.data[fnum[fperm]]).astype(int)) + assert norm(ui + uf - uc, "L2") < 2E-14 From fc88b9a18fed0d6c572b251904193852f9f40d16 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 15 Mar 2023 11:02:45 +0000 Subject: [PATCH 17/75] fix BLAS kernels for FacetElement(NCE) --- firedrake/preconditioners/fdm.py | 2 +- firedrake/preconditioners/pmg.py | 62 ++++++++++++++--------------- tests/multigrid/test_p_multigrid.py | 31 ++------------- 3 files changed, 33 insertions(+), 62 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index a4dd248622..1aeaa8b3a3 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -1074,7 +1074,7 @@ class PoissonFDMPC(FDMPC): def assemble_reference_tensor(self, V): from firedrake.preconditioners.pmg import get_permutation_to_line_elements try: - _, line_elements, shifts = get_permutation_to_line_elements(V) + _, line_elements, shifts = get_permutation_to_line_elements(V.finat_element) except ValueError: raise ValueError("FDMPC does not support the element %s" % V.ufl_element()) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index b8638d1095..6777b1ecd9 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -608,27 +608,24 @@ def compare_dual_basis(l1, l2): @lru_cache(maxsize=10) @PETSc.Log.EventDecorator("GetLineElements") -def get_permutation_to_line_elements(V): +def get_permutation_to_line_elements(finat_element): from FIAT.reference_element import LINE - ele = V.ufl_element() - if isinstance(ele, ufl.MixedElement) and not isinstance(ele, (ufl.TensorElement, ufl.VectorElement)): - raise ValueError("MixedElements are not decomposed into tensor products") - finat_ele = expand_element(V.finat_element) - if finat_ele.space_dimension() != V.finat_element.space_dimension(): - raise ValueError("Failed to decompose %s into tensor products" % V.ufl_element()) + expansion = expand_element(finat_element) + if expansion.space_dimension() != finat_element.space_dimension(): + raise ValueError("Failed to decompose %s into tensor products" % finat_element) line_elements = [] - terms = finat_ele.elements if hasattr(finat_ele, "elements") else [finat_ele] + terms = expansion.elements if hasattr(expansion, "elements") else [expansion] for term in terms: factors = term.factors if hasattr(term, "factors") else (term,) - expansion = tuple(e.fiat_equivalent for e in reversed(factors)) - if not all([e.get_reference_element().shape == LINE for e in expansion]): - raise ValueError("Failed to decompose %s into line elements" % V.ufl_element()) - line_elements.append(expansion) + fiat_factors = tuple(e.fiat_equivalent for e in reversed(factors)) + if not all([e.get_reference_element().shape == LINE for e in fiat_factors]): + raise ValueError("Failed to decompose %s into line elements" % fiat_factors) + line_elements.append(fiat_factors) - tp_shape = [tuple(e.space_dimension() for e in expansion) for expansion in line_elements] - sizes = list(map(numpy.prod, tp_shape)) + shapes = [tuple(e.space_dimension() for e in factors) for factors in line_elements] + sizes = list(map(numpy.prod, shapes)) dof_ranges = numpy.cumsum([0] + sizes) dof_perm = [] @@ -644,23 +641,23 @@ def get_permutation_to_line_elements(V): axes_shifts = tuple() for shift in range(tdim): - if V.finat_element.formdegree != 2: + if finat_element.formdegree != 2: shift = (tdim - shift) % tdim perm = base[shift:] + base[:shift] - for i, expansion in enumerate(line_elements): + for i, term in enumerate(line_elements): if not visit[i]: is_perm = all([e1.space_dimension() == e2.space_dimension() - for e1, e2 in zip(perm, expansion)]) - for e1, e2 in zip(perm, expansion): + for e1, e2 in zip(perm, term)]) + for e1, e2 in zip(perm, term): if is_perm: is_perm = compare_element(e1, e2) if is_perm: axes_shifts += ((tdim - shift) % tdim, ) - dofs = numpy.arange(*dof_ranges[i:i+2], dtype=PETSc.IntType).reshape(tp_shape[i]) - dofs = numpy.transpose(dofs, axes=numpy.roll(numpy.arange(tdim), shift)) - assert dofs.shape == pshape + dofs = numpy.arange(*dof_ranges[i:i+2], dtype=PETSc.IntType).reshape(pshape) + dofs = numpy.transpose(dofs, axes=numpy.roll(numpy.arange(tdim), -shift)) + assert dofs.shape == shapes[i] dof_perm.append(dofs.flat) visit[i] = True break @@ -668,7 +665,6 @@ def get_permutation_to_line_elements(V): shifts.append(axes_shifts) dof_perm = numpy.concatenate(dof_perm) - dof_perm = numpy.argsort(dof_perm) return dof_perm, unique_line_elements, shifts @@ -836,8 +832,8 @@ def make_kron_code(Vf, Vc, t_in, t_out, mat_name, scratch): operator_decl = [] prolong_code = [] restrict_code = [] - _, felems, fshifts = get_permutation_to_line_elements(Vf) - _, celems, cshifts = get_permutation_to_line_elements(Vc) + _, felems, fshifts = get_permutation_to_line_elements(Vf.finat_element) + _, celems, cshifts = get_permutation_to_line_elements(Vc.finat_element) shifts = fshifts in_place = False @@ -866,17 +862,17 @@ def make_kron_code(Vf, Vc, t_in, t_out, mat_name, scratch): pstride = psize * numpy.prod(pshape) if set(cshifts) == set(fshifts): - psize *= len(cshifts[0]) - pstride *= len(cshifts[0]) + csize = Vc.value_size * Vc.finat_element.space_dimension() prolong_code.append(f""" - for({IntType_c} j=1; j<{len(fshifts)}; j++) - permute_axis(0, {pargs}, {psize}, {t_in}, {t_in}+j*{pstride}); + for({IntType_c} i=1; i<{len(fshifts)}; i++) + for({IntType_c} j=0; j<{csize}; j++) + {t_in}[i*{csize} + j] = {t_in}[j]; """) restrict_code.append(f""" - for({IntType_c} j=1; j<{len(fshifts)}; j++) - ipermute_axis(0, {pargs}, {psize}, {t_in}, {t_in}+j*{pstride}); + for({IntType_c} i=1; i<{len(fshifts)}; i++) + for({IntType_c} j=0; j<{csize}; j++) + {t_in}[j] += {t_in}[i*{csize} + j]; """) - psize = 1 elif pelem == celems[0]: for k in range(len(shifts)): @@ -1069,7 +1065,7 @@ def make_mapping_code(Q, fmapping, cmapping, t_in, t_out): def make_permutation_code(V, vshape, pshape, t_in, t_out, array_name): - _, _, shifts = get_permutation_to_line_elements(V) + _, _, shifts = get_permutation_to_line_elements(V.finat_element) shift = shifts[0] if shift != (0,): ndof = numpy.prod(vshape) @@ -1121,7 +1117,7 @@ def get_permuted_map(V): every component of H(div) or H(curl) tensor product elements """ - perm, _, _ = get_permutation_to_line_elements(V) + perm, _, _ = get_permutation_to_line_elements(V.finat_element) if all(perm[:-1] < perm[1:]): return V.cell_node_map() diff --git a/tests/multigrid/test_p_multigrid.py b/tests/multigrid/test_p_multigrid.py index 37c56a37fa..f81b707fda 100644 --- a/tests/multigrid/test_p_multigrid.py +++ b/tests/multigrid/test_p_multigrid.py @@ -28,7 +28,8 @@ def tp_family(tp_mesh, request): return families[request.param] -@pytest.fixture(params=[None, "hierarchical", "fdm"], ids=["spectral", "hierarchical", "fdm"]) +@pytest.fixture(params=[None, "hierarchical", "fdm"], + ids=["spectral", "hierarchical", "fdm"]) def variant(request): return request.param @@ -86,7 +87,7 @@ def test_prolong_de_rham(tp_mesh): def test_prolong_low_order_to_restricted(tp_mesh, tp_family, variant): from firedrake.preconditioners.pmg import prolongation_matrix_matfree - degree = 2 + degree = 3 cell = tp_mesh.ufl_cell() element = FiniteElement(tp_family, cell=cell, degree=degree, variant=variant) Vi = FunctionSpace(tp_mesh, RestrictedElement(element, restriction_domain="interior")) @@ -98,37 +99,11 @@ def test_prolong_low_order_to_restricted(tp_mesh, tp_family, variant): uc = Function(Vc) uc.dat.data[0::2] = 0.0 uc.dat.data[1::2] = 1.0 - # import numpy - # from firedrake.preconditioners.pmg import get_permutation_to_line_elements - - # cperm, _, _ = get_permutation_to_line_elements(Vc) - # cnum = Vc.cell_node_map().values[0] - # uc.dat.data[cnum[cperm]] = numpy.arange(1, 1+len(uc.dat.data)) - - # fperm, _, _ = get_permutation_to_line_elements(Vf) - # fnum = Vf.cell_node_map().values[0] - - # print() - # # print("cperm", cperm) - # # print("fperm", fperm) - # print("inv(cperm)", numpy.argsort(cperm)) - # print("inv(fperm)", numpy.argsort(fperm)) - - # expr = Constant([0]*3) - # for row in numpy.eye(3): - # expr.assign(row) - # uf.project(expr, solver_parameters={"mat_type": "matfree"}) - # print(numpy.nonzero(numpy.rint(uf.dat.data[fnum]).astype(int))[0]) - - # uf.project(uc, solver_parameters={"mat_type": "matfree"}) - # print(numpy.rint(uf.dat.data[fnum[fperm]]).astype(int)) for v in [ui, uf]: P = prolongation_matrix_matfree(v, uc).getPythonContext() P._prolong() - # print(numpy.rint(uf.dat.data[fnum[fperm]]).astype(int)) - assert norm(ui + uf - uc, "L2") < 2E-14 From 8df4c25f58b48df4dced2122fceb195c003ad1c2 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 15 Mar 2023 12:18:36 +0000 Subject: [PATCH 18/75] clean up --- firedrake/preconditioners/pmg.py | 30 +++++++++++++---------------- tests/multigrid/test_p_multigrid.py | 2 +- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 6777b1ecd9..e626999b11 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -857,21 +857,17 @@ def make_kron_code(Vf, Vc, t_in, t_out, mat_name, scratch): else: raise ValueError("Cannot assign fine to coarse DOFs") - pshape = [e.space_dimension() for e in pelem] - pargs = ", ".join(map(str, pshape+[1]*(3-len(pshape)))) - pstride = psize * numpy.prod(pshape) - if set(cshifts) == set(fshifts): csize = Vc.value_size * Vc.finat_element.space_dimension() prolong_code.append(f""" - for({IntType_c} i=1; i<{len(fshifts)}; i++) - for({IntType_c} j=0; j<{csize}; j++) - {t_in}[i*{csize} + j] = {t_in}[j]; + for({IntType_c} j=1; j<{len(fshifts)}; j++) + for({IntType_c} i=0; i<{csize}; i++) + {t_in}[j*{csize} + i] = {t_in}[i]; """) restrict_code.append(f""" - for({IntType_c} i=1; i<{len(fshifts)}; i++) - for({IntType_c} j=0; j<{csize}; j++) - {t_in}[j] += {t_in}[i*{csize} + j]; + for({IntType_c} j=1; j<{len(fshifts)}; j++) + for({IntType_c} i=0; i<{csize}; i++) + {t_in}[i] += {t_in}[j*{csize} + i]; """) elif pelem == celems[0]: @@ -879,12 +875,15 @@ def make_kron_code(Vf, Vc, t_in, t_out, mat_name, scratch): if Vc.value_size*len(shifts[k]) < Vf.value_size: shifts[k] = shifts[k]*(Vf.value_size//Vc.value_size) + pshape = [e.space_dimension() for e in pelem] + pargs = ", ".join(map(str, pshape+[1]*(3-len(pshape)))) + pstride = psize * numpy.prod(pshape) + perm = sum(shifts, tuple()) perm_data = ", ".join(map(str, perm)) operator_decl.append(f""" PetscBLASInt {perm_name}[{len(perm)}] = {{ {perm_data} }}; """) - prolong_code.append(f""" for({IntType_c} j=1; j<{len(perm)}; j++) permute_axis({perm_name}[j], {pargs}, {psize}, {t_in}, {t_in}+j*{pstride}); @@ -1116,12 +1115,10 @@ def get_permuted_map(V): Return a PermutedMap with the same tensor product shape for every component of H(div) or H(curl) tensor product elements """ - - perm, _, _ = get_permutation_to_line_elements(V.finat_element) - if all(perm[:-1] < perm[1:]): + indices, _, _ = get_permutation_to_line_elements(V.finat_element) + if all(indices[:-1] < indices[1:]): return V.cell_node_map() - - return PermutedMap(V.cell_node_map(), perm) + return PermutedMap(V.cell_node_map(), indices) class StandaloneInterpolationMatrix(object): @@ -1546,5 +1543,4 @@ def prolongation_matrix_matfree(Vf, Vc, Vf_bcs=[], Vc_bcs=[]): sizes = (Vf.dof_dset.layout_vec.getSizes(), Vc.dof_dset.layout_vec.getSizes()) M_shll = PETSc.Mat().createPython(sizes, ctx, comm=Vf._comm) M_shll.setUp() - return M_shll diff --git a/tests/multigrid/test_p_multigrid.py b/tests/multigrid/test_p_multigrid.py index f81b707fda..212a06bae1 100644 --- a/tests/multigrid/test_p_multigrid.py +++ b/tests/multigrid/test_p_multigrid.py @@ -87,7 +87,7 @@ def test_prolong_de_rham(tp_mesh): def test_prolong_low_order_to_restricted(tp_mesh, tp_family, variant): from firedrake.preconditioners.pmg import prolongation_matrix_matfree - degree = 3 + degree = 5 cell = tp_mesh.ufl_cell() element = FiniteElement(tp_family, cell=cell, degree=degree, variant=variant) Vi = FunctionSpace(tp_mesh, RestrictedElement(element, restriction_domain="interior")) From f33456fff58d5dae7dfbbe0d6c0a692149c43058 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 15 Mar 2023 17:29:33 +0000 Subject: [PATCH 19/75] fix H(div) IPDG solver, use more elegant caching --- firedrake/preconditioners/fdm.py | 67 ++++++------------ firedrake/preconditioners/pmg.py | 101 +++++++++++++++------------- tests/multigrid/test_p_multigrid.py | 2 +- 3 files changed, 76 insertions(+), 94 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 1aeaa8b3a3..c315b9dbd5 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -53,12 +53,12 @@ class FDMPC(PCBase): @staticmethod def load_set_values(triu=False): - cache = FDMPC._c_code_cache key = triu - if key not in cache: - comm = PETSc.COMM_SELF - cache[key] = load_assemble_csr(comm, triu=triu) - return cache[key] + cache = FDMPC._c_code_cache + try: + return cache[key] + except KeyError: + return cache.setdefault(key, load_assemble_csr(PETSc.COMM_SELF, triu=triu)) @PETSc.Log.EventDecorator("FDMInit") def initialize(self, pc): @@ -545,8 +545,9 @@ def assemble_coef(self, J, form_compiler_parameters): key = (mixed_form.signature(), mesh) block_diagonal = True - - if key not in self._coefficient_cache: + try: + return self._coefficient_cache[key] + except KeyError: if not block_diagonal or not V.shape: tensor = firedrake.Function(Z) coefficients = {"beta": tensor.sub(0), "alpha": tensor.sub(1)} @@ -562,9 +563,7 @@ def assemble_coef(self, J, form_compiler_parameters): ctx = sub.getPythonContext() coefficients[name] = ctx._block_diagonal assembly_callables.append(ctx._assemble_block_diagonal) - - self._coefficient_cache[key] = (coefficients, assembly_callables) - return self._coefficient_cache[key] + return self._coefficient_cache.setdefault(key, (coefficients, assembly_callables)) @PETSc.Log.EventDecorator("FDMRefTensor") def assemble_reference_tensor(self, V): @@ -581,14 +580,16 @@ def assemble_reference_tensor(self, V): is_interior, is_facet = is_restricted(V.finat_element) key = (degree, tdim, formdegree, V.value_size, is_interior, is_facet) cache = self._reference_tensor_cache - if key not in cache: + try: + return cache[key] + except KeyError: full_key = (degree, tdim, formdegree, V.value_size, False, False) if is_facet and full_key in cache: result = cache[full_key] noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.comm) - cache[key] = result.createSubMatrix(noperm, self.ises[1]) + result = result.createSubMatrix(noperm, self.ises[1]) noperm.destroy() - return cache[key] + return cache.setdefault(key, result) elements = sorted(get_base_elements(V.finat_element), key=lambda e: e.formdegree) ref_el = elements[0].get_reference_element() @@ -635,8 +636,7 @@ def assemble_reference_tensor(self, V): result = result.createSubMatrix(noperm, self.ises[1]) noperm.destroy() - cache[key] = result - return cache[key] + return cache.setdefault(key, result) def factor_interior_mat(A00): @@ -1091,7 +1091,8 @@ def assemble_reference_tensor(self, V): bdof = [] # indices of point evaluation dofs for each direction for e in line_elements: Afdm[:0], Dfdm[:0], bdof[:0] = tuple(zip(fdm_setup_ipdg(e, eta))) - if not (e.formdegree or is_dg): + if not is_dg and e.degree() == degree: + # do not apply SIPG along continuous directions Dfdm[0] = None return Afdm, Dfdm, bdof @@ -1144,7 +1145,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): # assemble zero-th order term separately, including off-diagonals (mixed components) # I cannot do this for hdiv elements as off-diagonals are not sparse, this is because - # the FDM eigenbases for GLL(N) and GLL(N-1) are not orthogonal to each other + # the FDM eigenbases for CG(k) and DG(k-1) are not orthogonal to each other rindices = None use_diag_Bq = Bq is None or len(Bq.ufl_shape) != 2 or static_condensation if not use_diag_Bq: @@ -1500,15 +1501,6 @@ def pull_axis(x, pshape, idir): return numpy.reshape(numpy.moveaxis(numpy.reshape(x.copy(), pshape), idir, 0), x.shape) -def set_submat_csr(A_global, A_local, global_indices, imode): - """insert values from A_local to A_global on the diagonal block with indices global_indices""" - indptr, indices, data = A_local.getValuesCSR() - for i, row in enumerate(global_indices.flat): - i0 = indptr[i] - i1 = indptr[i+1] - A_global.setValues(row, global_indices.flat[indices[i0:i1]], data[i0:i1], imode) - - def numpy_to_petsc(A_numpy, dense_indices, diag=True, block=False): """ Create a SeqAIJ Mat from a dense matrix using the diagonal and a subset of rows and columns. @@ -1555,7 +1547,7 @@ def fdm_setup_ipdg(fdm_element, eta): Bhat, and bcs(Ahat) for every combination of either natural or weak Dirichlet BCs on each endpoint. Dfdm: the tabulation of the normal derivatives of the Dirichlet eigenfunctions. - bdof: the indices of PointEvaluation dofs. + bdof: the indices of the vertex degrees of freedom. """ ref_el = fdm_element.get_reference_element() degree = fdm_element.degree() @@ -1563,7 +1555,8 @@ def fdm_setup_ipdg(fdm_element, eta): rule = fdm_element.dual.rule else: rule = FIAT.quadrature.make_quadrature(ref_el, degree+1) - bdof = [k for k, f in enumerate(fdm_element.dual_basis()) if isinstance(f, FIAT.functional.PointEvaluation)] + edof = fdm_element.entity_dofs() + bdof = edof[0][0] + edof[0][1] phi = fdm_element.tabulate(1, rule.get_points()) Jhat = phi[(0, )] @@ -1749,21 +1742,3 @@ def glonum(node_map): nelz = layers[:, 1]-layers[:, 0]-1 to_layer = numpy.concatenate([numpy.arange(nz, dtype=node_map.offset.dtype) for nz in nelz]) return numpy.repeat(node_map.values_with_halo, nelz, axis=0) + numpy.kron(to_layer.reshape((-1, 1)), node_map.offset) - - -def spy(A, comm=None): - import matplotlib.pyplot as plt - import scipy.sparse as sp - if comm is None: - comm = A.comm - nnz = A.getInfo()["nz_used"] - if A.getType().endswith("sbaij"): - A.setOption(PETSc.Mat.Option.GETROW_UPPERTRIANGULAR, True) - csr = tuple(reversed(A.getValuesCSR())) - if comm.rank == 0: - csr[0].fill(1) - scipy_mat = sp.csr_matrix(csr, shape=A.getSize()) - fig, axes = plt.subplots(nrows=1, ncols=1) - axes.spy(scipy_mat, marker=".", markersize=2) - plt.title("nnz(A) = %d" % nnz) - plt.show() diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index e626999b11..bf3a462e0a 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -50,6 +50,8 @@ class PMGBase(PCSNESBase): _prefix = "pmg_" + _cache_transfer = {} + def coarsen_element(self, ele): """ Coarsen a given element to form the next problem down in the p-hierarchy. @@ -333,19 +335,22 @@ def coarsen_bcs(self, fbcs, cV): raise NotImplementedError("Unsupported BC type, please get in touch if you need this") return cbcs - @staticmethod - @lru_cache(maxsize=20) - def create_transfer(cctx, fctx, mat_type, cbcs, fbcs): - cbcs = cctx._problem.bcs if cbcs else [] - fbcs = fctx._problem.bcs if fbcs else [] + def create_transfer(self, cctx, fctx, mat_type, cbcs, fbcs): cV = cctx.J.arguments()[0].function_space() fV = fctx.J.arguments()[0].function_space() - if mat_type == "matfree": - return prolongation_matrix_matfree(fV, cV, fbcs, cbcs) - elif mat_type == "aij": - return prolongation_matrix_aij(fV, cV, fbcs, cbcs) - else: - raise ValueError("Unknown matrix type") + cbcs = tuple(cctx._problem.bcs) if cbcs else tuple() + fbcs = tuple(fctx._problem.bcs) if fbcs else tuple() + key = (fV, cV, cbcs, fbcs, mat_type) + try: + return self._cache_transfer[key] + except KeyError: + if mat_type == "matfree": + construct_mat = prolongation_matrix_matfree + elif mat_type == "aij": + construct_mat = prolongation_matrix_aij + else: + raise ValueError("Unknown matrix type") + return self._cache_transfer.setdefault(key, construct_mat(fV, cV, fbcs, cbcs)) def create_interpolation(self, dmc, dmf): prefix = dmc.getOptionsPrefix() @@ -592,18 +597,13 @@ def compare_dual(b1, b2): k1 = numpy.array([p1[k][0][0] for k in p1]) k2 = numpy.array([p2[k][0][0] for k in p2]) - if not numpy.allclose(k1, k2, rtol=1E-16, atol=1E-16): - return False - return True + return numpy.allclose(k1, k2, rtol=1E-16, atol=1E-16) def compare_dual_basis(l1, l2): if len(l1) != len(l2): return False - for b1, b2 in zip(l1, l2): - if not compare_dual(b1, b2): - return False - return True + return all(compare_dual(b1, b2) for b1, b2 in zip(l1, l2)) @lru_cache(maxsize=10) @@ -615,14 +615,26 @@ def get_permutation_to_line_elements(finat_element): if expansion.space_dimension() != finat_element.space_dimension(): raise ValueError("Failed to decompose %s into tensor products" % finat_element) + unique_factors = [] line_elements = [] terms = expansion.elements if hasattr(expansion, "elements") else [expansion] for term in terms: factors = term.factors if hasattr(term, "factors") else (term,) - fiat_factors = tuple(e.fiat_equivalent for e in reversed(factors)) - if not all([e.get_reference_element().shape == LINE for e in fiat_factors]): + fiat_factors = [e.fiat_equivalent for e in reversed(factors)] + if any(e.get_reference_element().shape != LINE for e in fiat_factors): raise ValueError("Failed to decompose %s into line elements" % fiat_factors) - line_elements.append(fiat_factors) + + # use the same FIAT element if it appears multiple times in the expansion + for i in range(len(fiat_factors)): + n = fiat_factors[i] + for f in unique_factors: + if compare_element(n, f): + n = f + break + if n is fiat_factors[i]: + unique_factors.append(n) + fiat_factors[i] = n + line_elements.append(tuple(fiat_factors)) shapes = [tuple(e.space_dimension() for e in factors) for factors in line_elements] sizes = list(map(numpy.prod, shapes)) @@ -647,11 +659,10 @@ def get_permutation_to_line_elements(finat_element): perm = base[shift:] + base[:shift] for i, term in enumerate(line_elements): if not visit[i]: - is_perm = all([e1.space_dimension() == e2.space_dimension() - for e1, e2 in zip(perm, term)]) - for e1, e2 in zip(perm, term): - if is_perm: - is_perm = compare_element(e1, e2) + is_perm = all(e1.space_dimension() == e2.space_dimension() + for e1, e2 in zip(perm, term)) + if is_perm: + is_perm = all(compare_element(e1, e2) for e1, e2 in zip(perm, term)) if is_perm: axes_shifts += ((tdim - shift) % tdim, ) @@ -660,7 +671,6 @@ def get_permutation_to_line_elements(finat_element): assert dofs.shape == shapes[i] dof_perm.append(dofs.flat) visit[i] = True - break shifts.append(axes_shifts) @@ -838,7 +848,7 @@ def make_kron_code(Vf, Vc, t_in, t_out, mat_name, scratch): shifts = fshifts in_place = False if len(felems) == len(celems): - in_place = all([(len(fs)*Vf.value_size == len(cs)*Vc.value_size) for fs, cs in zip(fshifts, cshifts)]) + in_place = all((len(fs)*Vf.value_size == len(cs)*Vc.value_size) for fs, cs in zip(fshifts, cshifts)) psize = Vf.value_size if not in_place: @@ -914,7 +924,7 @@ def make_kron_code(Vf, Vc, t_in, t_out, mat_name, scratch): cshapes.append((nscal,) + tuple(cshape)) J = [fiat_reference_prolongator(fe, ce).T for fe, ce in zip(felem, celem)] - if any([Jk.size and numpy.isclose(Jk, 0.0E0).all() for Jk in J]): + if any(Jk.size and numpy.isclose(Jk, 0.0E0).all() for Jk in J): prolong_code.append(f""" for({IntType_c} i=0; i<{nscal*numpy.prod(fshape)}; i++) {t_out}[i+{fskip}] = 0.0E0; """) @@ -1116,7 +1126,7 @@ def get_permuted_map(V): every component of H(div) or H(curl) tensor product elements """ indices, _, _ = get_permutation_to_line_elements(V.finat_element) - if all(indices[:-1] < indices[1:]): + if numpy.all(indices[:-1] < indices[1:]): return V.cell_node_map() return PermutedMap(V.cell_node_map(), indices) @@ -1129,24 +1139,21 @@ class StandaloneInterpolationMatrix(object): _cache_work = {} def __init__(self, Vf, Vc, Vf_bcs, Vc_bcs): + self.uf = self.work_function(Vf) + self.uc = self.work_function(Vc) + self.Vf = self.uf.function_space() + self.Vc = self.uc.function_space() self.Vf_bcs = Vf_bcs self.Vc_bcs = Vc_bcs - if isinstance(Vf, firedrake.Function): - self.uf = Vf - Vf = Vf.function_space() - else: - if Vf not in self._cache_work: - self._cache_work[Vf] = firedrake.Function(Vf) - self.uf = self._cache_work[Vf] - if isinstance(Vc, firedrake.Function): - self.uc = Vc - Vc = Vc.function_space() + + def work_function(self, V): + if isinstance(V, firedrake.Function): + return V else: - if Vc not in self._cache_work: - self._cache_work[Vc] = firedrake.Function(Vc) - self.uc = self._cache_work[Vc] - self.Vf = Vf - self.Vc = Vc + try: + return self._cache_work[V] + except KeyError: + return self._cache_work.setdefault(V, firedrake.Function(V)) @cached_property def _weight(self): @@ -1187,8 +1194,8 @@ def _kernels(self): self.uf.dat(op2.READ, uf_map), self._weight.dat(op2.READ, uf_map)] coefficient_args = [c.dat(op2.READ, c.cell_node_map()) for c in coefficients] - prolong = partial(op2.par_loop, *prolong_args, *coefficient_args) - restrict = partial(op2.par_loop, *restrict_args, *coefficient_args) + prolong = op2.ParLoop(*prolong_args, *coefficient_args) + restrict = op2.ParLoop(*restrict_args, *coefficient_args) return prolong, restrict def _prolong(self): diff --git a/tests/multigrid/test_p_multigrid.py b/tests/multigrid/test_p_multigrid.py index 212a06bae1..0b6c36f0b9 100644 --- a/tests/multigrid/test_p_multigrid.py +++ b/tests/multigrid/test_p_multigrid.py @@ -5,7 +5,7 @@ @pytest.fixture(params=[2, 3], ids=["Rectangle", "Box"]) def tp_mesh(request): - nx = 1 + nx = 4 distribution = {"overlap_type": (DistributedMeshOverlapType.VERTEX, 1)} m = UnitSquareMesh(nx, nx, quadrilateral=True, distribution_parameters=distribution) if request.param == 3: From 0f2e80892b6b579bfd1e72c1818a72b8ce76c143 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 16 Mar 2023 13:21:33 +0000 Subject: [PATCH 20/75] add comments --- firedrake/preconditioners/fdm.py | 106 +++++++++++++++++----------- firedrake/preconditioners/pmg.py | 45 ++++++------ tests/multigrid/test_p_multigrid.py | 14 +++- tests/regression/test_fdm.py | 16 +++-- 4 files changed, 105 insertions(+), 76 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index c315b9dbd5..35aed9fed4 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -104,7 +104,7 @@ def initialize(self, pc): if element == e_fdm: V_fdm, J_fdm, bcs_fdm = (V, J, bcs) else: - # Matrix-free assembly of the transformed Jacobian + # Reconstruct forms with variant element V_fdm = firedrake.FunctionSpace(V.mesh(), e_fdm) J_fdm = J(*[t.reconstruct(function_space=V_fdm) for t in J.arguments()], coefficients={}) bcs_fdm = [] @@ -114,6 +114,7 @@ def initialize(self, pc): W = W.sub(index) bcs_fdm.append(bc.reconstruct(V=W, g=0)) + # Construct interpolation from original to variant spaces self.fdm_interp = prolongation_matrix_matfree(V, V_fdm, [], bcs_fdm) self.work_vec_x = Amat.createVecLeft() self.work_vec_y = Amat.createVecRight() @@ -156,7 +157,7 @@ def interp_nullspace(I, nsp): fcp=fcp, options_prefix=options_prefix) # Assemble the FDM preconditioner with sparse local matrices - Pmat, self._assemble_P = self.assemble_fdm_op(V_fdm, J_fdm, bcs_fdm, fcp, appctx, pmat_type) + Pmat, self._assemble_P = self.assemble_fdm_op(V_fdm, J_fdm, bcs_fdm, fcp, pmat_type) self._assemble_P() Pmat.setNullSpace(Amat.getNullSpace()) Pmat.setTransposeNullSpace(Amat.getTransposeNullSpace()) @@ -183,15 +184,14 @@ def interp_nullspace(I, nsp): fdmpc.setFromOptions() @PETSc.Log.EventDecorator("FDMPrealloc") - def assemble_fdm_op(self, V, J, bcs, form_compiler_parameters, appctx, pmat_type): + def assemble_fdm_op(self, V, J, bcs, form_compiler_parameters, pmat_type): """ - Assemble the sparse preconditioner with cell-wise constant coefficients. + Assemble the sparse preconditioner from diagonal mass matrices. :arg V: the :class:`.FunctionSpace` of the form arguments :arg J: the Jacobian bilinear form :arg bcs: an iterable of boundary conditions on V :arg form_compiler_parameters: parameters to assemble diagonal factors - :arg appctx: the application context :pmat_type: the preconditioner `PETSc.Mat.Type` :returns: 2-tuple with the preconditioner :class:`PETSc.Mat` and its assembly callable @@ -232,15 +232,15 @@ def assemble_fdm_op(self, V, J, bcs, form_compiler_parameters, appctx, pmat_type i1 = PETSc.IS().createGeneral(dofs, comm=PETSc.COMM_SELF) self.get_static_condensation[V] = lambda Ae: condense_element_pattern(Ae, self.ises[0], i1, self.submats) - # dict of cell to global mappings for each function space - self.cell_to_global = dict() - self.lgmaps = dict() - @PETSc.Log.EventDecorator("FDMGetIndices") def cell_to_global(lgmap, cell_to_local, cell_index, result=None): + # Be careful not to create new arrays result = cell_to_local(cell_index, result=result) return lgmap.apply(result, result=result) + # Create data strctures needed for assembly + self.cell_to_global = dict() + self.lgmaps = dict() bc_rows = dict() for Vsub in V: lgmap = Vsub.local_to_global_map([bc.reconstruct(V=Vsub, g=0) for bc in bcs]) @@ -253,13 +253,13 @@ def cell_to_global(lgmap, cell_to_local, cell_index, result=None): bdofs = numpy.nonzero(lgmap.indices[:own] < 0)[0].astype(PETSc.IntType) bc_rows[Vsub] = Vsub.dof_dset.lgmap.apply(bdofs, result=bdofs) - # get coefficients on a given cell coefficients, assembly_callables = self.assemble_coef(J, form_compiler_parameters) coeffs = [coefficients.get(k) for k in ("beta", "alpha")] cmaps = [glonum_fun(ck.cell_node_map())[0] for ck in coeffs] @PETSc.Log.EventDecorator("FDMGetCoeffs") def get_coeffs(e, result=None): + # Get vector for betas and alphas on a cell vals = [] for k, (coeff, cmap) in enumerate(zip(coeffs, cmaps)): get_coeffs.indices[k] = cmap(e, result=get_coeffs.indices[k]) @@ -277,6 +277,7 @@ def get_coeffs(e, result=None): # Store only off-diagonal blocks with more columns than rows to save memory Vsort = sorted(V, key=lambda Vsub: Vsub.dim()) + # Loop over all pairs of subspaces for Vrow, Vcol in product(Vsort, Vsort): if symmetric and (Vcol, Vrow) in Pmats: P = PETSc.Mat().createTranspose(Pmats[Vcol, Vrow]) @@ -428,6 +429,7 @@ def update_De(data): De.setDiagonal(work_vec, addv=insert) return De + # Core assembly loop for e in range(self.nel): rindices = get_rindices(e, result=rindices) cindices = get_cindices(e, result=cindices) @@ -438,6 +440,7 @@ def update_De(data): work_vec.destroy() elif self.nel: + # Preallocation of the sparsity pattern if common_key not in self.work_mats: data = self.get_coeffs(0) data.fill(1.0E0) @@ -640,8 +643,11 @@ def assemble_reference_tensor(self, V): def factor_interior_mat(A00): - # Assume that interior DOF list i0 is ordered such that A00 is block diagonal - # with blocks of increasing dimension + """ + Used in static condensation. Take in A00 on a cell, return its Cholesky + factorisation. Assumes that interior DOF have been reordered to make A00 + block diagonal with blocks of increasing dimension. + """ indptr, indices, data = A00.getValuesCSR() degree = numpy.diff(indptr) @@ -665,6 +671,7 @@ def factor_interior_mat(A00): @PETSc.Log.EventDecorator("FDMCondense") def condense_element_mat(A, i0, i1, submats): + # Return the Schur complement associated to indices in i1, condensing i0 out isrows = [i0, i0, i1, i1] iscols = [i0, i1, i0, i1] submats[:4] = A.createSubMatrices(isrows, iscols=iscols, submats=submats[:4] if submats[0] else None) @@ -679,6 +686,7 @@ def condense_element_mat(A, i0, i1, submats): @PETSc.Log.EventDecorator("FDMCondense") def condense_element_pattern(A, i0, i1, submats): + # Add zeroes on the statically condensed pattern so that you can run ICC(0) isrows = [i0, i0, i1] iscols = [i0, i1, i0] submats[:3] = A.createSubMatrices(isrows, iscols=iscols, submats=submats[:3] if submats[0] else None) @@ -716,6 +724,8 @@ def wrapper(*args): def load_assemble_csr(comm, triu=False): + # Insert one sparse matrix into another sparse matrix. + # Done in C for efficiency, since it loops over rows. if triu: name = "setSubMatCSR_SBAIJ" select_cols = "icol < irow ? -1: icol" @@ -767,11 +777,12 @@ def load_assemble_csr(comm, triu=False): restype=ctypes.c_int) -def petsc_sparse(A_numpy, rtol=1E-10): +def petsc_sparse(A_numpy, rtol=1E-10, comm=None): + # Convert dense numpy matrix into a sparse PETSc matrix Amax = max(A_numpy.min(), A_numpy.max(), key=abs) atol = rtol*Amax nnz = numpy.count_nonzero(abs(A_numpy) > atol, axis=1).astype(PETSc.IntType) - A = PETSc.Mat().createAIJ(A_numpy.shape, nnz=(nnz, 0), comm=PETSc.COMM_SELF) + A = PETSc.Mat().createAIJ(A_numpy.shape, nnz=(nnz, 0), comm=comm) for row, Arow in enumerate(A_numpy): cols = numpy.argwhere(abs(Arow) > atol).astype(PETSc.IntType).flat A.setValues(row, cols, Arow[cols], PETSc.InsertMode.INSERT) @@ -780,29 +791,18 @@ def petsc_sparse(A_numpy, rtol=1E-10): def block_mat(A_blocks): + # Return a concrete Mat corresponding to a block matrix given as a list of lists if len(A_blocks) == 1: if len(A_blocks[0]) == 1: return A_blocks[0][0] - nrows = sum([Arow[0].size[0] for Arow in A_blocks]) - ncols = sum([Aij.size[1] for Aij in A_blocks[0]]) - nnz = numpy.concatenate([sum([numpy.diff(Aij.getValuesCSR()[0]) for Aij in Arow]) for Arow in A_blocks]) - A = PETSc.Mat().createAIJ((nrows, ncols), nnz=(nnz, 0), comm=PETSc.COMM_SELF) - imode = PETSc.InsertMode.INSERT - insert_block = FDMPC.load_set_values() - rsizes = [sum([Ai[0].size[0] for Ai in A_blocks[:k]]) for k in range(len(A_blocks)+1)] - csizes = [sum([Aij.size[1] for Aij in A_blocks[0][:k]]) for k in range(len(A_blocks[0])+1)] - rows = [numpy.arange(*rsizes[i:i+2], dtype=PETSc.IntType) for i in range(len(A_blocks))] - cols = [numpy.arange(*csizes[j:j+2], dtype=PETSc.IntType) for j in range(len(A_blocks[0]))] - for Ai, irows in zip(A_blocks, rows): - for Aij, jcols in zip(Ai, cols): - insert_block(A, Aij, irows, jcols, imode) - - A.assemble() - return A + nest = PETSc.Mat().createNest(A_blocks, comm=A_blocks[0][0].getComm()) + # A nest Mat would not allow us to take matrix-matrix products + return nest.convert(mat_type=A_blocks[0][0].getType()) def is_restricted(finat_element): + # Determine if an element is a restriction onto interior or facets is_interior = True is_facet = True tdim = finat_element.cell.get_spatial_dimension() @@ -822,6 +822,7 @@ def is_restricted(finat_element): def sort_interior_dofs(idofs, A): + # Permute `idofs` to have A[idofs, idofs] with contiguous 1x1, 2x2, 3x3, ... blocks Aii = A.createSubMatrix(idofs, idofs) indptr, indices, _ = Aii.getValuesCSR() n = idofs.getSize() @@ -836,8 +837,8 @@ def sort_interior_dofs(idofs, A): if len(neigh) == degree: visit[neigh] = True perm.extend(neigh) - idofs.setIndices(idofs.getIndices()[perm]) + Aii.destroy() def kron3(A, B, C, scale=None): @@ -849,9 +850,13 @@ def kron3(A, B, C, scale=None): return result -def mass_matrix(tdim, formdegree, B00, B11): - B00 = petsc_sparse(B00) - B11 = petsc_sparse(B11) +def mass_matrix(tdim, formdegree, B00, B11, comm=None): + # Construct mass matrix on reference cell from 1D mass matrices B00 and B11. + # It can be applied with either broken or conforming test and trial spaces. + if comm is None: + comm = PETSc.COMM_SELF + B00 = petsc_sparse(B00, comm=comm) + B11 = petsc_sparse(B11, comm=comm) if tdim == 1: B_blocks = [B11 if formdegree else B00] elif tdim == 2: @@ -871,8 +876,6 @@ def mass_matrix(tdim, formdegree, B00, B11): else: B_blocks = [kron3(B11, B11, B11)] - B00.destroy() - B11.destroy() if len(B_blocks) == 1: result = B_blocks[0] else: @@ -884,23 +887,35 @@ def mass_matrix(tdim, formdegree, B00, B11): indptr = numpy.concatenate([csr[0][bool(shift):]+shift for csr, shift in zip(csr_block, ishift[:-1])]) indices = numpy.concatenate([csr[1]+shift for csr, shift in zip(csr_block, jshift[:-1])]) data = numpy.concatenate([csr[2] for csr in csr_block]) - result = PETSc.Mat().createAIJ((nrows, ncols), csr=(indptr, indices, data), comm=PETSc.COMM_SELF) + result = PETSc.Mat().createAIJ((nrows, ncols), csr=(indptr, indices, data), comm=comm) for B in B_blocks: B.destroy() + if not (B00 is result): + B00.destroy() + if not (B11 is result): + B11.destroy() return result -def diff_matrix(tdim, formdegree, A00, A11, A10): +def diff_matrix(tdim, formdegree, A00, A11, A10, comm=None): + # Construct exterior derivative matrix on reference cell from 1D mass matrices A00 and A11, + # and exterior derivative moments A10. + # It can be applied with either broken or conforming test and trial spaces. + if comm is None: + comm = PETSc.COMM_SELF if formdegree == tdim: ncols = A10.shape[0]**tdim - A_zero = PETSc.Mat().createAIJ((1, ncols), nnz=(0, 0), comm=PETSc.COMM_SELF) + A_zero = PETSc.Mat().createAIJ((1, ncols), nnz=(0, 0), comm=comm) A_zero.assemble() return A_zero - A00 = petsc_sparse(A00) - A11 = petsc_sparse(A11) - A10 = petsc_sparse(A10) + A00 = petsc_sparse(A00, comm=comm) + A11 = petsc_sparse(A11, comm=comm) + A10 = petsc_sparse(A10, comm=comm) if tdim == 1: + A00.destroy() + A11.destroy() + return A10 elif tdim == 2: if formdegree == 0: @@ -913,7 +928,7 @@ def diff_matrix(tdim, formdegree, A00, A11, A10): A_blocks = [[kron3(A00, A00, A10)], [kron3(A00, A10, A00)], [kron3(A10, A00, A00)]] elif formdegree == 1: size = tuple(A11.getSize()[k] * A10.getSize()[k] * A00.getSize()[k] for k in range(2)) - A_zero = PETSc.Mat().createAIJ(size, nnz=(0, 0), comm=PETSc.COMM_SELF) + A_zero = PETSc.Mat().createAIJ(size, nnz=(0, 0), comm=comm) A_zero.assemble() A_blocks = [[kron3(A00, A10, A11, scale=-1), kron3(A00, A11, A10), A_zero], [kron3(A10, A00, A11, scale=-1), A_zero, kron3(A11, A00, A10)], @@ -932,6 +947,10 @@ def diff_matrix(tdim, formdegree, A00, A11, A10): def diff_prolongator(Vf, Vc, fbcs=[], cbcs=[]): + """ + Magic. Tabulate exterior derivative: Vc -> Vf as an explicit sparse matrix. + Works for any basis. These are the same matrices one needs for HypreAMS and friends. + """ from tsfc.finatinterface import create_element from firedrake.preconditioners.pmg import fiat_reference_prolongator @@ -1012,6 +1031,7 @@ def cell_to_global(lgmap, cell_to_local, e, result=None): def unrestrict_element(ele): + # Get an element that might or might not be restricted and return the parent unrestricted element. if isinstance(ele, ufl.VectorElement): return type(ele)(unrestrict_element(ele._sub_element), dim=ele.num_sub_elements()) elif isinstance(ele, ufl.TensorElement): diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index bf3a462e0a..6757864e7a 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -78,8 +78,8 @@ def coarsen_form(self, form, fine_to_coarse_map): def initialize(self, obj): # Make a new DM. # Hook up a (new) coarsen routine on that DM. - # Make a new PC, of type MG. - # Assign the DM to that PC. + # Make a new PC, of type MG (or SNES of type FAS). + # Assign the DM to that PC (or SNES). odm = obj.getDM() ctx = get_appctx(odm) @@ -262,18 +262,17 @@ def _coarsen_form(a): cdm.setCreateInterpolation(self.create_interpolation) cdm.setCreateInjection(self.create_injection) - interp_petscmat, _ = cdm.createInterpolation(fdm) - inject_petscmat = cdm.createInjection(fdm) - if cu in cJ.coefficients(): - # injection of the initial state + # Only inject state if the coarse state is a dependency of the coarse Jacobian. + inject_petscmat = cdm.createInjection(fdm) + def inject_state(): with cu.dat.vec_wo as xc, fu.dat.vec_ro as xf: inject_petscmat.mult(xf, xc) add_hook(parent, setup=inject_state, call_setup=True) - # coarsen the nullspace basis + # Coarsen the nullspace basis def coarsen_nullspace(coarse_V, mat, fine_nullspace): if isinstance(fine_nullspace, MixedVectorSpaceBasis): if mat.type == 'python': @@ -302,13 +301,16 @@ def coarsen_nullspace(coarse_V, mat, fine_nullspace): else: return fine_nullspace - ises = cV._ises + if fctx._nullspace or fctx._near_nullspace or fctx._nullspace_T: + interp_petscmat, _ = cdm.createInterpolation(fdm) + else: + interp_petscmat = None cctx._nullspace = coarsen_nullspace(cV, interp_petscmat, fctx._nullspace) - cctx.set_nullspace(cctx._nullspace, ises, transpose=False, near=False) + cctx.set_nullspace(cctx._nullspace, cV._ises, transpose=False, near=False) cctx._near_nullspace = coarsen_nullspace(cV, interp_petscmat, fctx._near_nullspace) - cctx.set_nullspace(cctx._near_nullspace, ises, transpose=False, near=True) + cctx.set_nullspace(cctx._near_nullspace, cV._ises, transpose=False, near=True) cctx._nullspace_T = coarsen_nullspace(cV, interp_petscmat, fctx._nullspace_T) - cctx.set_nullspace(cctx._nullspace_T, ises, transpose=True, near=False) + cctx.set_nullspace(cctx._nullspace_T, cV._ises, transpose=True, near=False) return cdm def coarsen_quadrature(self, metadata, fdeg, cdeg): @@ -336,6 +338,7 @@ def coarsen_bcs(self, fbcs, cV): return cbcs def create_transfer(self, cctx, fctx, mat_type, cbcs, fbcs): + # Create a transfer or retrieve it from the class cache cV = cctx.J.arguments()[0].function_space() fV = fctx.J.arguments()[0].function_space() cbcs = tuple(cctx._problem.bcs) if cbcs else tuple() @@ -532,9 +535,7 @@ def prolongation_transfer_kernel_action(Vf, expr): def expand_element(ele): - """ - Expand a FiniteElement as an EnrichedElement of TensorProductElements, discarding modifiers. - """ + # Expand a FiniteElement as an EnrichedElement of TensorProductElements, discarding modifiers. if isinstance(ele, finat.FlattenedDimensions): return expand_element(ele.product) elif isinstance(ele, (finat.HDivElement, finat.HCurlElement)): @@ -560,6 +561,7 @@ def expand_element(ele): def evaluate_dual(dual, element, key=None): + # Evaluate the action of a set of dual functionals on the basis functions of an element. keys = set(tuple(phi.get_point_dict().keys()) for phi in dual) pts = list(set(sum(keys, ()))) if key is None: @@ -615,7 +617,7 @@ def get_permutation_to_line_elements(finat_element): if expansion.space_dimension() != finat_element.space_dimension(): raise ValueError("Failed to decompose %s into tensor products" % finat_element) - unique_factors = [] + unique_factors = set() line_elements = [] terms = expansion.elements if hasattr(expansion, "elements") else [expansion] for term in terms: @@ -632,7 +634,7 @@ def get_permutation_to_line_elements(finat_element): n = f break if n is fiat_factors[i]: - unique_factors.append(n) + unique_factors.add(n) fiat_factors[i] = n line_elements.append(tuple(fiat_factors)) @@ -680,15 +682,8 @@ def get_permutation_to_line_elements(finat_element): @lru_cache(maxsize=10) def fiat_reference_prolongator(felem, celem, derivative=False): - from FIAT.reference_element import flatten_reference_cube - - ref_el = flatten_reference_cube(felem.get_reference_element()) - tdim = ref_el.get_spatial_dimension() - if derivative and tdim > 1: - raise NotImplementedError("Derivative prolongator is only available on the interval") - ckey = (felem.formdegree,) if derivative else (0,)*tdim - fkey = (celem.formdegree,) if derivative else (0,)*tdim - + ckey = (felem.formdegree,) if derivative else None + fkey = (celem.formdegree,) if derivative else None fdual = felem.dual_basis() cdual = celem.dual_basis() if fkey == ckey and compare_dual_basis(fdual, cdual): diff --git a/tests/multigrid/test_p_multigrid.py b/tests/multigrid/test_p_multigrid.py index 0b6c36f0b9..a7414ebb4c 100644 --- a/tests/multigrid/test_p_multigrid.py +++ b/tests/multigrid/test_p_multigrid.py @@ -47,6 +47,10 @@ def mixed_family(tp_mesh, request): def test_reconstruct_degree(tp_mesh, mixed_family): + """ Construct a complicated mixed element and ensure we may recover it by + p-refining or p-coarsening an element of the same family with different + degree. + """ elist = [] Vfamily, Qfamily = mixed_family for degree in [7, 2, 31]: @@ -57,11 +61,15 @@ def test_reconstruct_degree(tp_mesh, mixed_family): Q = FunctionSpace(tp_mesh, Qfamily, degree-2) Z = MixedFunctionSpace([V, Q]) e = Z.ufl_element() + elist.append(e) assert e == PMGPC.reconstruct_degree(elist[0], degree) def test_prolong_de_rham(tp_mesh): + """ Interpolate a linear vector function between [H1]^d, HCurl and HDiv spaces + where it can be exactly represented + """ from firedrake.preconditioners.pmg import prolongation_matrix_matfree tdim = tp_mesh.topological_dimension() @@ -85,6 +93,10 @@ def test_prolong_de_rham(tp_mesh): def test_prolong_low_order_to_restricted(tp_mesh, tp_family, variant): + """ Interpolate a low-order function to interior and facet high-order spaces + and ensure that the sum of the two high-order functions is equal to the + low-order function + """ from firedrake.preconditioners.pmg import prolongation_matrix_matfree degree = 5 @@ -291,7 +303,7 @@ def test_p_multigrid_mixed(mat_type): "ksp_max_it": 3, "pc_type": "jacobi"} - coarse = {"mat_type": "aij", + coarse = {"mat_type": "aij", # This circumvents the need for AssembledPC "ksp_type": "richardson", "ksp_max_it": 1, "ksp_norm_type": "unpreconditioned", diff --git a/tests/regression/test_fdm.py b/tests/regression/test_fdm.py index dc76294909..f2263a8f10 100644 --- a/tests/regression/test_fdm.py +++ b/tests/regression/test_fdm.py @@ -16,6 +16,7 @@ "pc_type": "cholesky", } +# FDM without static condensation fdmstar = { "pc_type": "python", "pc_python_type": "firedrake.P1PC", @@ -38,6 +39,7 @@ } } +# FDM with static condensation facetstar = { "pc_type": "python", "pc_python_type": "firedrake.FacetSplitPC", @@ -48,7 +50,7 @@ "facet_fdm_pc_fieldsplit_type": "symmetric_multiplicative", "facet_fdm_fieldsplit_0": { "ksp_type": "preonly", - "pc_type": "icc", + "pc_type": "icc", # this is exact for the sparse approximation used in FDM }, "facet_fdm_fieldsplit_1": { "ksp_type": "preonly", @@ -136,8 +138,8 @@ def test_p_independence_hgrad(mesh, variant): element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant=variant) V = FunctionSpace(mesh, element) problem = build_riesz_map(V, grad) - for sp, max_it in zip(solvers, expected[:len(solvers)]): - assert solve_riesz_map(problem, sp) <= max_it + for sp, expected_it in zip(solvers, expected): + assert solve_riesz_map(problem, sp) <= expected_it @pytest.mark.skipcomplex @@ -149,8 +151,8 @@ def test_p_independence_hcurl(mesh): element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant="fdm") V = FunctionSpace(mesh, element) problem = build_riesz_map(V, curl) - for sp, max_it in zip(solvers, expected[:len(solvers)]): - assert solve_riesz_map(problem, sp) <= max_it + for sp, expected_it in zip(solvers, expected): + assert solve_riesz_map(problem, sp) <= expected_it @pytest.mark.skipcomplex @@ -162,8 +164,8 @@ def test_p_independence_hdiv(mesh): element = FiniteElement(family, cell=mesh.ufl_cell(), degree=degree, variant="fdm") V = FunctionSpace(mesh, element) problem = build_riesz_map(V, div) - for sp, max_it in zip(solvers, expected[:len(solvers)]): - assert solve_riesz_map(problem, sp) <= max_it + for sp, expected_it in zip(solvers, expected): + assert solve_riesz_map(problem, sp) <= expected_it @pytest.mark.skipcomplex From d1ae080247a6298a2b163fd4ad7b57604ab97203 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 16 Mar 2023 14:06:27 +0000 Subject: [PATCH 21/75] more comments --- firedrake/preconditioners/fdm.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 35aed9fed4..21c2ac91c0 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -53,6 +53,8 @@ class FDMPC(PCBase): @staticmethod def load_set_values(triu=False): + # Compile the C function to insert sparse element matrices and store in + # class cache key = triu cache = FDMPC._c_code_cache try: @@ -104,7 +106,7 @@ def initialize(self, pc): if element == e_fdm: V_fdm, J_fdm, bcs_fdm = (V, J, bcs) else: - # Reconstruct forms with variant element + # Reconstruct Jacobian and bcs with variant element V_fdm = firedrake.FunctionSpace(V.mesh(), e_fdm) J_fdm = J(*[t.reconstruct(function_space=V_fdm) for t in J.arguments()], coefficients={}) bcs_fdm = [] @@ -238,7 +240,7 @@ def cell_to_global(lgmap, cell_to_local, cell_index, result=None): result = cell_to_local(cell_index, result=result) return lgmap.apply(result, result=result) - # Create data strctures needed for assembly + # Create data structures needed for assembly self.cell_to_global = dict() self.lgmaps = dict() bc_rows = dict() @@ -380,6 +382,16 @@ def destroy(self, pc): @PETSc.Log.EventDecorator("FDMSetValues") def set_values(self, A, Vrow, Vcol, addv, triu=False): + """ + Assemble the stiffness matrix in the FDM basis using sparse reference + tensors and diagonal mass matrices. + + :arg A: the :class:`PETSc.Mat` to assemble + :arg Vrow: the :class:`.FunctionSpace` test space + :arg Vcol: the :class:`.FunctionSpace` trial space + :arg addv: a `PETSc.Mat.InsertMode` + :arg triu: are we assembling only the upper triangular part? + """ def RtAP(R, A, P, result=None): RtAP.buff = A.matMult(P, result=RtAP.buff) @@ -858,7 +870,12 @@ def mass_matrix(tdim, formdegree, B00, B11, comm=None): B00 = petsc_sparse(B00, comm=comm) B11 = petsc_sparse(B11, comm=comm) if tdim == 1: - B_blocks = [B11 if formdegree else B00] + if formdegree == 0: + B11.destroy() + return B00 + else: + B00.destroy() + return B11 elif tdim == 2: if formdegree == 0: B_blocks = [B00.kron(B00)] @@ -890,10 +907,8 @@ def mass_matrix(tdim, formdegree, B00, B11, comm=None): result = PETSc.Mat().createAIJ((nrows, ncols), csr=(indptr, indices, data), comm=comm) for B in B_blocks: B.destroy() - if not (B00 is result): - B00.destroy() - if not (B11 is result): - B11.destroy() + B00.destroy() + B11.destroy() return result From 39283025028cf66e1cee151c9054efdb70588365 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 16 Mar 2023 14:46:48 +0000 Subject: [PATCH 22/75] glonum_fun -> extrude_node_map, do not coarsen residual of linear p-MG --- firedrake/preconditioners/fdm.py | 66 ++++++++++++-------------------- firedrake/preconditioners/pmg.py | 17 ++++---- 2 files changed, 32 insertions(+), 51 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 21c2ac91c0..6ceade799e 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -53,8 +53,12 @@ class FDMPC(PCBase): @staticmethod def load_set_values(triu=False): - # Compile the C function to insert sparse element matrices and store in - # class cache + """ + Compile C code to insert sparse element matrices and store in class cache + :arg triu: are we inserting onto the upper triangular part of the matrix? + + :returns: a python wrapper for the matrix insertion function + """ key = triu cache = FDMPC._c_code_cache try: @@ -247,7 +251,7 @@ def cell_to_global(lgmap, cell_to_local, cell_index, result=None): for Vsub in V: lgmap = Vsub.local_to_global_map([bc.reconstruct(V=Vsub, g=0) for bc in bcs]) bsize = Vsub.dof_dset.layout_vec.getBlockSize() - cell_to_local, nel = glonum_fun(Vsub.cell_node_map(), bsize=bsize) + cell_to_local, nel = extrude_node_map(Vsub.cell_node_map(), bsize=bsize) self.cell_to_global[Vsub] = partial(cell_to_global, lgmap, cell_to_local) self.lgmaps[Vsub] = lgmap @@ -257,7 +261,7 @@ def cell_to_global(lgmap, cell_to_local, cell_index, result=None): coefficients, assembly_callables = self.assemble_coef(J, form_compiler_parameters) coeffs = [coefficients.get(k) for k in ("beta", "alpha")] - cmaps = [glonum_fun(ck.cell_node_map())[0] for ck in coeffs] + cmaps = [extrude_node_map(ck.cell_node_map())[0] for ck in coeffs] @PETSc.Log.EventDecorator("FDMGetCoeffs") def get_coeffs(e, result=None): @@ -1005,8 +1009,8 @@ def diff_prolongator(Vf, Vc, fbcs=[], cbcs=[]): rmap = Vf.local_to_global_map(fbcs) cmap = Vc.local_to_global_map(cbcs) - rlocal, nel = glonum_fun(Vf.cell_node_map(), bsize=Vf.value_size) - clocal, nel = glonum_fun(Vc.cell_node_map(), bsize=Vc.value_size) + rlocal, nel = extrude_node_map(Vf.cell_node_map(), bsize=Vf.value_size) + clocal, nel = extrude_node_map(Vc.cell_node_map(), bsize=Vc.value_size) def cell_to_global(lgmap, cell_to_local, e, result=None): result = cell_to_local(e, result=result) @@ -1164,8 +1168,8 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): tdim = V.mesh().topological_dimension() shift = self.axes_shifts * bsize - index_coef, _ = glonum_fun((Gq or Bq).cell_node_map()) - index_bc, _ = glonum_fun(bcflags.cell_node_map()) + index_coef, _ = extrude_node_map((Gq or Bq).cell_node_map()) + index_bc, _ = extrude_node_map(bcflags.cell_node_map()) flag2id = numpy.kron(numpy.eye(tdim, tdim, dtype=PETSc.IntType), [[1], [2]]) # pshape is the shape of the DOFs in the tensor product @@ -1693,19 +1697,20 @@ def get_interior_facet_maps(V): @lru_cache(maxsize=20) -def glonum_fun(node_map, bsize=1): +def extrude_node_map(node_map, bsize=1): """ - Return a the local numbering given an non-extruded local map and the total number of entities. + Construct a (possibly vector-valued) cell to node map from an un-extruded scalar map. :arg node_map: a :class:`pyop2.Map` mapping entities to their local dofs, including ghost entities. + :arg bsize: the block size - :returns: a 2-tuple with the map and the number of entities owned by this process + :returns: a 2-tuple with the map as function and the number of cells owned by this process """ nelv = node_map.values.shape[0] if node_map.offset is None: nel = nelv - def glonum(e, result=None): + def scalar_map(e, result=None): if result is None: result = numpy.copy(node_map.values_with_halo[e]) else: @@ -1718,14 +1723,14 @@ def glonum(e, result=None): nelz = layers[0, 1]-layers[0, 0]-1 nel = nelz*nelv - def _glonum(node_map, nelz, e, result=None): + def _scalar_map(node_map, nelz, e, result=None): if result is None: result = numpy.copy(node_map.values_with_halo[e // nelz]) else: numpy.copyto(result, node_map.values_with_halo[e // nelz]) result += (e % nelz)*node_map.offset return result - glonum = partial(_glonum, node_map, nelz) + scalar_map = partial(_scalar_map, node_map, nelz) else: nelz = layers[:, 1]-layers[:, 0]-1 @@ -1733,47 +1738,26 @@ def _glonum(node_map, nelz, e, result=None): to_base = numpy.repeat(numpy.arange(node_map.values_with_halo.shape[0], dtype=node_map.offset.dtype), nelz) to_layer = numpy.concatenate([numpy.arange(nz, dtype=node_map.offset.dtype) for nz in nelz]) - def _glonum(node_map, to_base, to_layer, e, result=None): + def _scalar_map(node_map, to_base, to_layer, e, result=None): if result is None: result = numpy.copy(node_map.values_with_halo[to_base[e]]) else: numpy.copyto(result, node_map.values_with_halo[to_base[e]]) result += to_layer[e]*node_map.offset return result - glonum = partial(_glonum, node_map, to_base, to_layer) + scalar_map = partial(_scalar_map, node_map, to_base, to_layer) if bsize == 1: - return glonum, nel + return scalar_map, nel ibase = numpy.arange(bsize, dtype=node_map.values.dtype) - def vector_glonum(bsize, ibase, e, result=None): + def vector_map(bsize, ibase, e, result=None): index = None if result is not None: index = result[:, 0] - index = glonum(e, result=index) + index = scalar_map(e, result=index) index *= bsize return numpy.add.outer(index, ibase, out=result) - return partial(vector_glonum, bsize, ibase), nel - - -def glonum(node_map): - """ - Return an array with the node map. - - :arg node_map: a :class:`pyop2.Map` mapping entities to their nodes, including ghost entities. - - :returns: a :class:`numpy.ndarray` whose rows are the nodes for each cell - """ - if (node_map.offset is None) or (node_map.values_with_halo.size == 0): - return node_map.values_with_halo - else: - layers = node_map.iterset.layers_array - if layers.shape[0] == 1: - nelz = layers[0, 1]-layers[0, 0]-1 - to_layer = numpy.tile(numpy.arange(nelz, dtype=node_map.offset.dtype), len(node_map.values_with_halo)) - else: - nelz = layers[:, 1]-layers[:, 0]-1 - to_layer = numpy.concatenate([numpy.arange(nz, dtype=node_map.offset.dtype) for nz in nelz]) - return numpy.repeat(node_map.values_with_halo, nelz, axis=0) + numpy.kron(to_layer.reshape((-1, 1)), node_map.offset) + return partial(vector_map, bsize, ibase), nel diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 6757864e7a..029fffad6d 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -98,7 +98,7 @@ def initialize(self, obj): pdm.setOptionsPrefix(options_prefix) ppc = self.configure_pmg(obj, pdm) - is_snes = isinstance(obj, PETSc.SNES) + self.is_snes = isinstance(obj, PETSc.SNES) copts = PETSc.Options(ppc.getOptionsPrefix() + ppc.getType() + "_coarse_") @@ -129,7 +129,7 @@ def initialize(self, obj): # Now overwrite some routines on the DM pdm.setRefine(None) pdm.setCoarsen(self.coarsen) - if is_snes: + if self.is_snes: pdm.setSNESFunction(_SNESContext.form_function) pdm.setSNESJacobian(_SNESContext.form_jacobian) pdm.setKSPComputeOperators(_SNESContext.compute_operators) @@ -201,12 +201,15 @@ def _coarsen_form(a): for f in a.integrals()]) return a - cF = _coarsen_form(fctx.F) cJ = _coarsen_form(fctx.J) cJp = _coarsen_form(fctx.Jp) + # This fixes a subtle bug where you are applying PMGPC on a mixed + # problem with geometric multigrid only on one block and an non-Lagrange element + # on the other block (gmg breaks for non-Lagrange elements) + cF = _coarsen_form(fctx.F) if self.is_snes else ufl.action(cJ, cu) + fcp = self.coarsen_quadrature(fproblem.form_compiler_parameters, fdeg, cdeg) cbcs = self.coarsen_bcs(fproblem.bcs, cV) - cF = self.coarsen_residual(cF, cJ, cu) # Coarsen the appctx: the user might want to provide solution-dependant expressions and forms cappctx = dict(fctx.appctx) @@ -460,9 +463,6 @@ def applyTranspose(self, pc, x, y): def coarsen_bc_value(self, bc, cV): return 0 - def coarsen_residual(self, Fc, Jc, uc): - return ufl.action(Jc, uc) - class PMGSNES(SNESBase, PMGBase): _prefix = "pfas_" @@ -515,9 +515,6 @@ def coarsen_bc_value(self, bc, cV): coarse.interpolate(bc._original_arg) return coarse - def coarsen_residual(self, Fc, Jc, uc): - return Fc - def prolongation_transfer_kernel_action(Vf, expr): from tsfc import compile_expression_dual_evaluation From 213a55b41d6d1443d103db4cfea493e925f18a77 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 16 Mar 2023 15:08:46 +0000 Subject: [PATCH 23/75] update citations --- firedrake/preconditioners/fdm.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 6ceade799e..9345bb674c 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -13,17 +13,29 @@ import FIAT import finat -Citations().add("Brubeck2021", """ -@misc{Brubeck2021, +Citations().add("Brubeck2022a", """ +@article{Brubeck2022a, title={A scalable and robust vertex-star relaxation for high-order {FEM}}, author={Brubeck, Pablo D. and Farrell, Patrick E.}, + journal = {SIAM J. Sci. Comput.}, + volume = {44}, + number = {5}, + pages = {A2991-A3017}, + year = {2022}, + doi = {10.1137/21M1444187} +""") + +Citations().add("Brubeck2022b", """ +@misc{Brubeck2022b, + title={{Multigrid solvers for the de Rham complex with optimal complexity in polynomial degree}}, + author={Brubeck, Pablo D. and Farrell, Patrick E.}, archiveprefix = {arXiv}, - eprint = {2107.14758}, + eprint = {2211.14284}, primaryclass = {math.NA}, - year={2021} -} + year={2022} """) + __all__ = ("FDMPC", "PoissonFDMPC") @@ -44,8 +56,8 @@ class FDMPC(PCBase): """ _prefix = "fdm_" - _variant = "fdm" + _citation = "Brubeck2022b" _reference_tensor_cache = {} _coefficient_cache = {} @@ -71,8 +83,8 @@ def initialize(self, pc): from firedrake.assemble import allocate_matrix, assemble from firedrake.preconditioners.pmg import prolongation_matrix_matfree from firedrake.preconditioners.patch import bcdofs - Citations().register("Brubeck2021") + Citations().register(self._citation) self.comm = pc.comm Amat, Pmat = pc.getOperators() prefix = pc.getOptionsPrefix() @@ -396,7 +408,6 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): :arg addv: a `PETSc.Mat.InsertMode` :arg triu: are we assembling only the upper triangular part? """ - def RtAP(R, A, P, result=None): RtAP.buff = A.matMult(P, result=RtAP.buff) return R.transposeMatMult(RtAP.buff, result=result) @@ -1109,6 +1120,7 @@ class PoissonFDMPC(FDMPC): """ _variant = "fdm_ipdg" + _citation = "Brubeck2022a" def assemble_reference_tensor(self, V): from firedrake.preconditioners.pmg import get_permutation_to_line_elements From b6dff72ba09c79e036e0ebd1c0ef903b2a53c75b Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 16 Mar 2023 15:19:23 +0000 Subject: [PATCH 24/75] add some comments --- firedrake/preconditioners/fdm.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 9345bb674c..5616cf547c 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -418,7 +418,9 @@ def RtAP(R, A, P, result=None): if Vrow == Vcol: get_cindices = lambda e, result=None: result update_A = lambda Ae, rindices, cindices: set_values_csr(A, Ae, rindices, rindices, addv) + # moments of orthogonalized basis against basis tabulation and derivative tabulation rtensor = self.reference_tensor_on_diag.get(Vrow) or self.assemble_reference_tensor(Vrow) + # element matrix obtained via Equation (3.9) of Brubeck2022b assemble_element_mat = lambda De, result=None: De.PtAP(rtensor, result=result) condense_element_mat = self.get_static_condensation.get(Vrow) else: @@ -505,6 +507,8 @@ def update_De(data): def assemble_coef(self, J, form_compiler_parameters): """ Obtain coefficients as the diagonal of a weighted mass matrix in V^k x V^{k+1} + + See Section 3.2 of Brubeck2022b. """ from ufl.algorithms.ad import expand_derivatives from ufl.algorithms.expand_indices import expand_indices From 415881ce76b0f3693ce2d1dfe21d79a18cdb056c Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 16 Mar 2023 17:02:42 +0000 Subject: [PATCH 25/75] comments explaining reference tensor and coefficients --- firedrake/preconditioners/fdm.py | 34 +++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 5616cf547c..330ea4c33e 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -506,15 +506,26 @@ def update_De(data): @PETSc.Log.EventDecorator("FDMCoefficients") def assemble_coef(self, J, form_compiler_parameters): """ - Obtain coefficients as the diagonal of a weighted mass matrix in V^k x V^{k+1} - + Obtain coefficients for the auxiliary operator as the diagonal of a + weighted mass matrix in broken(V^k) * broken(V^{k+1}). See Section 3.2 of Brubeck2022b. + + :arg J: the Jacobian bilinear :class:`ufl.Form`, + :form_compiler_parameters: a `dict` with tsfc parameters. + + :return: a 2-tuple with a `dict` with the zero-th order and second + order coefficients keyed on ``"beta"`` and ``"alpha"``, and a list of + assembly callables. """ from ufl.algorithms.ad import expand_derivatives from ufl.algorithms.expand_indices import expand_indices from firedrake.formmanipulation import ExtractSubBlock from firedrake.assemble import assemble + # Basic idea: take the original bilinear form and + # replace the exterior derivatives with arguments in broken(V^{k+1}). + # Then, replace the original arguments with arguments in broken(V^k). + # Where the broken spaces have L2-orthogonal FDM basis functions. index = len(J.arguments()[-1].function_space())-1 if index: splitter = ExtractSubBlock() @@ -529,6 +540,7 @@ def assemble_coef(self, J, form_compiler_parameters): e = unrestrict_element(e) sobolev = e.sobolev_space() + # Replacement rule for the exterior derivative = grad(arg) * eps map_grad = None if sobolev == ufl.H1: map_grad = lambda p: p @@ -544,6 +556,7 @@ def assemble_coef(self, J, form_compiler_parameters): else: map_grad = lambda p: p*(eps/2) + # Construct Z = broken(V^k) * broken(V^{k+1}) V = args_J[0].function_space() formdegree = V.finat_element.formdegree degree = e.degree() @@ -569,14 +582,16 @@ def assemble_coef(self, J, form_compiler_parameters): elements = list(map(ufl.BrokenElement, elements)) if V.shape: elements = [ufl.TensorElement(ele, shape=V.shape) for ele in elements] - Z = firedrake.FunctionSpace(mesh, ufl.MixedElement(elements)) + + # Transform the exterior derivative and the original arguments of J to arguments in Z args = (firedrake.TestFunctions(Z), firedrake.TrialFunctions(Z)) repargs = {t: v[0] for t, v in zip(args_J, args)} repgrad = {ufl.grad(t): map_grad(v[1]) for t, v in zip(args_J, args)} if map_grad else dict() Jcell = expand_indices(expand_derivatives(ufl.Form(J.integrals_by_type("cell")))) mixed_form = ufl.replace(ufl.replace(Jcell, repgrad), repargs) + # Return coefficients and assembly callables, and cache them class key = (mixed_form.signature(), mesh) block_diagonal = True try: @@ -601,6 +616,15 @@ def assemble_coef(self, J, form_compiler_parameters): @PETSc.Log.EventDecorator("FDMRefTensor") def assemble_reference_tensor(self, V): + """ + Return the reference tensor used in the diagonal factorization of the + sparse cell matrices. See Section 3.2 of Brubeck2022b. + + :arg V: a :class:`.FunctionSpace` + + :return: a :class:`PETSc.Mat` with the moments of orthogonalized bases + against the basis and its exterior derivative. + """ tdim = V.mesh().topological_dimension() value_size = V.value_size formdegree = V.finat_element.formdegree @@ -612,12 +636,12 @@ def assemble_reference_tensor(self, V): if formdegree == tdim: degree = degree + 1 is_interior, is_facet = is_restricted(V.finat_element) - key = (degree, tdim, formdegree, V.value_size, is_interior, is_facet) + key = (degree, tdim, formdegree, value_size, is_interior, is_facet) cache = self._reference_tensor_cache try: return cache[key] except KeyError: - full_key = (degree, tdim, formdegree, V.value_size, False, False) + full_key = (degree, tdim, formdegree, value_size, False, False) if is_facet and full_key in cache: result = cache[full_key] noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.comm) From 81764895aa23fa04cbf78db21114c11aac7b7321 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 16 Mar 2023 17:53:33 +0000 Subject: [PATCH 26/75] deterministic sort keys of point_dicts --- firedrake/preconditioners/pmg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 029fffad6d..55e0264014 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -559,8 +559,8 @@ def expand_element(ele): def evaluate_dual(dual, element, key=None): # Evaluate the action of a set of dual functionals on the basis functions of an element. - keys = set(tuple(phi.get_point_dict().keys()) for phi in dual) - pts = list(set(sum(keys, ()))) + keys = list(dict.fromkeys(tuple(phi.get_point_dict().keys()) for phi in dual)) + pts = list(dict.fromkeys(sum(keys, ()))) if key is None: key = (0, ) * len(pts[0]) tab = element.tabulate(sum(key), pts)[key] From 42ed1bfc7f92a857bd3945f97c25c85fdca7f803 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 16 Mar 2023 17:55:28 +0000 Subject: [PATCH 27/75] construct block diagonal mass matrix from a nest Mat --- firedrake/preconditioners/fdm.py | 42 +++++++++++++++----------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 330ea4c33e..9d13962782 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -877,7 +877,8 @@ def is_restricted(finat_element): def sort_interior_dofs(idofs, A): - # Permute `idofs` to have A[idofs, idofs] with contiguous 1x1, 2x2, 3x3, ... blocks + # Permute `idofs` to have A[idofs, idofs] with square blocks of + # increasing dimension along its diagonal. Aii = A.createSubMatrix(idofs, idofs) indptr, indices, _ = Aii.getValuesCSR() n = idofs.getSize() @@ -907,7 +908,7 @@ def kron3(A, B, C, scale=None): def mass_matrix(tdim, formdegree, B00, B11, comm=None): # Construct mass matrix on reference cell from 1D mass matrices B00 and B11. - # It can be applied with either broken or conforming test and trial spaces. + # The 1D matrices may come with different test and trial spaces. if comm is None: comm = PETSc.COMM_SELF B00 = petsc_sparse(B00, comm=comm) @@ -921,34 +922,31 @@ def mass_matrix(tdim, formdegree, B00, B11, comm=None): return B11 elif tdim == 2: if formdegree == 0: - B_blocks = [B00.kron(B00)] + B_diag = [B00.kron(B00)] elif formdegree == 1: - B_blocks = [B00.kron(B11), B11.kron(B00)] + B_diag = [B00.kron(B11), B11.kron(B00)] else: - B_blocks = [B11.kron(B11)] + B_diag = [B11.kron(B11)] elif tdim == 3: if formdegree == 0: - B_blocks = [kron3(B00, B00, B00)] + B_diag = [kron3(B00, B00, B00)] elif formdegree == 1: - B_blocks = [kron3(B00, B00, B11), kron3(B00, B11, B00), kron3(B11, B00, B00)] + B_diag = [kron3(B00, B00, B11), kron3(B00, B11, B00), kron3(B11, B00, B00)] elif formdegree == 2: - B_blocks = [kron3(B00, B11, B11), kron3(B11, B00, B11), kron3(B11, B11, B00)] + B_diag = [kron3(B00, B11, B11), kron3(B11, B00, B11), kron3(B11, B11, B00)] else: - B_blocks = [kron3(B11, B11, B11)] + B_diag = [kron3(B11, B11, B11)] - if len(B_blocks) == 1: - result = B_blocks[0] + if len(B_diag) == 1: + result = B_diag[0] else: - nrows = sum(Bk.size[0] for Bk in B_blocks) - ncols = sum(Bk.size[1] for Bk in B_blocks) - csr_block = [Bk.getValuesCSR() for Bk in B_blocks] - ishift = numpy.cumsum([0] + [csr[0][-1] for csr in csr_block]) - jshift = numpy.cumsum([0] + [Bk.size[1] for Bk in B_blocks]) - indptr = numpy.concatenate([csr[0][bool(shift):]+shift for csr, shift in zip(csr_block, ishift[:-1])]) - indices = numpy.concatenate([csr[1]+shift for csr, shift in zip(csr_block, jshift[:-1])]) - data = numpy.concatenate([csr[2] for csr in csr_block]) - result = PETSc.Mat().createAIJ((nrows, ncols), csr=(indptr, indices, data), comm=comm) - for B in B_blocks: + n = len(B_diag) + B_zero = PETSc.Mat().createAIJ(B_diag[0].getSize(), nnz=(0, 0), comm=comm) + B_zero.assemble() + B_blocks = [[B_diag[i] if i == j else B_zero for j in range(n)] for i in range(n)] + result = block_mat(B_blocks) + B_zero.destroy() + for B in B_diag: B.destroy() B00.destroy() B11.destroy() @@ -958,7 +956,7 @@ def mass_matrix(tdim, formdegree, B00, B11, comm=None): def diff_matrix(tdim, formdegree, A00, A11, A10, comm=None): # Construct exterior derivative matrix on reference cell from 1D mass matrices A00 and A11, # and exterior derivative moments A10. - # It can be applied with either broken or conforming test and trial spaces. + # The 1D matrices may come with different test and trial spaces. if comm is None: comm = PETSc.COMM_SELF if formdegree == tdim: From 9c0ccbfddf51db31d9493ecb72edbb8606d429e0 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 16 Mar 2023 22:23:21 +0000 Subject: [PATCH 28/75] fix docstrings --- firedrake/preconditioners/fdm.py | 41 ++++++++++++++------------------ 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 9d13962782..3b7c775667 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -67,6 +67,7 @@ class FDMPC(PCBase): def load_set_values(triu=False): """ Compile C code to insert sparse element matrices and store in class cache + :arg triu: are we inserting onto the upper triangular part of the matrix? :returns: a python wrapper for the matrix insertion function @@ -210,7 +211,7 @@ def assemble_fdm_op(self, V, J, bcs, form_compiler_parameters, pmat_type): :arg J: the Jacobian bilinear form :arg bcs: an iterable of boundary conditions on V :arg form_compiler_parameters: parameters to assemble diagonal factors - :pmat_type: the preconditioner `PETSc.Mat.Type` + :arg pmat_type: the preconditioner `PETSc.Mat.Type` :returns: 2-tuple with the preconditioner :class:`PETSc.Mat` and its assembly callable """ @@ -511,11 +512,11 @@ def assemble_coef(self, J, form_compiler_parameters): See Section 3.2 of Brubeck2022b. :arg J: the Jacobian bilinear :class:`ufl.Form`, - :form_compiler_parameters: a `dict` with tsfc parameters. + :arg form_compiler_parameters: a `dict` with tsfc parameters. - :return: a 2-tuple with a `dict` with the zero-th order and second - order coefficients keyed on ``"beta"`` and ``"alpha"``, and a list of - assembly callables. + :returns: a 2-tuple of a `dict` with the zero-th order and second + order coefficients keyed on ``"beta"`` and ``"alpha"``, + and a list of assembly callables. """ from ufl.algorithms.ad import expand_derivatives from ufl.algorithms.expand_indices import expand_indices @@ -622,8 +623,8 @@ def assemble_reference_tensor(self, V): :arg V: a :class:`.FunctionSpace` - :return: a :class:`PETSc.Mat` with the moments of orthogonalized bases - against the basis and its exterior derivative. + :returns: a :class:`PETSc.Mat` with the moments of orthogonalized bases + against the basis and its exterior derivative. """ tdim = V.mesh().topological_dimension() value_size = V.value_size @@ -911,16 +912,12 @@ def mass_matrix(tdim, formdegree, B00, B11, comm=None): # The 1D matrices may come with different test and trial spaces. if comm is None: comm = PETSc.COMM_SELF + if tdim == 1: + return petsc_sparse(B11 if formdegree else B00, comm=comm) + B00 = petsc_sparse(B00, comm=comm) B11 = petsc_sparse(B11, comm=comm) - if tdim == 1: - if formdegree == 0: - B11.destroy() - return B00 - else: - B00.destroy() - return B11 - elif tdim == 2: + if tdim == 2: if formdegree == 0: B_diag = [B00.kron(B00)] elif formdegree == 1: @@ -937,6 +934,8 @@ def mass_matrix(tdim, formdegree, B00, B11, comm=None): else: B_diag = [kron3(B11, B11, B11)] + B00.destroy() + B11.destroy() if len(B_diag) == 1: result = B_diag[0] else: @@ -948,8 +947,6 @@ def mass_matrix(tdim, formdegree, B00, B11, comm=None): B_zero.destroy() for B in B_diag: B.destroy() - B00.destroy() - B11.destroy() return result @@ -965,15 +962,13 @@ def diff_matrix(tdim, formdegree, A00, A11, A10, comm=None): A_zero.assemble() return A_zero - A00 = petsc_sparse(A00, comm=comm) - A11 = petsc_sparse(A11, comm=comm) A10 = petsc_sparse(A10, comm=comm) if tdim == 1: - A00.destroy() - A11.destroy() - return A10 - elif tdim == 2: + + A00 = petsc_sparse(A00, comm=comm) + A11 = petsc_sparse(A11, comm=comm) + if tdim == 2: if formdegree == 0: A_blocks = [[A00.kron(A10)], [A10.kron(A00)]] elif formdegree == 1: From b954a8a3b4e588b97f244d5188cf28cf0c076868 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 20 Mar 2023 09:06:20 +0000 Subject: [PATCH 29/75] prolongation arguments ordered now as coarse, fine --- firedrake/preconditioners/fdm.py | 16 +++++++-------- firedrake/preconditioners/pmg.py | 34 ++++++++++++++++---------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 5616cf547c..07a20fa444 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -133,7 +133,7 @@ def initialize(self, pc): bcs_fdm.append(bc.reconstruct(V=W, g=0)) # Construct interpolation from original to variant spaces - self.fdm_interp = prolongation_matrix_matfree(V, V_fdm, [], bcs_fdm) + self.fdm_interp = prolongation_matrix_matfree(V_fdm, V, bcs_fdm, []) self.work_vec_x = Amat.createVecLeft() self.work_vec_y = Amat.createVecRight() if use_amat: @@ -162,7 +162,7 @@ def interp_nullspace(I, nsp): x.destroy() return PETSc.NullSpace().create(constant=False, vectors=vectors, comm=nsp.getComm()) - inject = prolongation_matrix_matfree(V_fdm, V, [], []) + inject = prolongation_matrix_matfree(V, V_fdm, [], []) Amat.setNullSpace(interp_nullspace(inject, omat.getNullSpace())) Amat.setTransposeNullSpace(interp_nullspace(inject, omat.getTransposeNullSpace())) Amat.setNearNullSpace(interp_nullspace(inject, omat.getNearNullSpace())) @@ -980,16 +980,16 @@ def diff_matrix(tdim, formdegree, A00, A11, A10, comm=None): return result -def diff_prolongator(Vf, Vc, fbcs=[], cbcs=[]): +def diff_prolongator(Vc, Vf, cbcs=[], fbcs=[]): """ - Magic. Tabulate exterior derivative: Vc -> Vf as an explicit sparse matrix. - Works for any basis. These are the same matrices one needs for HypreAMS and friends. + Tabulate exterior derivative: Vc -> Vf as an explicit sparse matrix. + Works for any tensor-product basis. These are the same matrices one needs for HypreAMS and friends. """ from tsfc.finatinterface import create_element from firedrake.preconditioners.pmg import fiat_reference_prolongator - ef = Vf.finat_element ec = Vc.finat_element + ef = Vf.finat_element if ef.formdegree - ec.formdegree != 1: raise ValueError("Expecting Vf = d(Vc)") @@ -1000,7 +1000,7 @@ def diff_prolongator(Vf, Vc, fbcs=[], cbcs=[]): degree = e0.degree() A11 = numpy.eye(degree, dtype=PETSc.RealType) A00 = numpy.eye(degree+1, dtype=PETSc.RealType) - A10 = fiat_reference_prolongator(e1, e0, derivative=True) + A10 = fiat_reference_prolongator(e0, e1, derivative=True) tdim = Vc.mesh().topological_dimension() Dhat = diff_matrix(tdim, ec.formdegree, A00, A11, A10) @@ -1720,7 +1720,7 @@ def extrude_node_map(node_map, bsize=1): :arg node_map: a :class:`pyop2.Map` mapping entities to their local dofs, including ghost entities. :arg bsize: the block size - :returns: a 2-tuple with the map as function and the number of cells owned by this process + :returns: a 2-tuple with the cell to node map and the number of cells owned by this process """ nelv = node_map.values.shape[0] if node_map.offset is None: diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 029fffad6d..84433142d0 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -346,7 +346,7 @@ def create_transfer(self, cctx, fctx, mat_type, cbcs, fbcs): fV = fctx.J.arguments()[0].function_space() cbcs = tuple(cctx._problem.bcs) if cbcs else tuple() fbcs = tuple(fctx._problem.bcs) if fbcs else tuple() - key = (fV, cV, cbcs, fbcs, mat_type) + key = (cV, fV, cbcs, fbcs, mat_type) try: return self._cache_transfer[key] except KeyError: @@ -356,7 +356,7 @@ def create_transfer(self, cctx, fctx, mat_type, cbcs, fbcs): construct_mat = prolongation_matrix_aij else: raise ValueError("Unknown matrix type") - return self._cache_transfer.setdefault(key, construct_mat(fV, cV, fbcs, cbcs)) + return self._cache_transfer.setdefault(key, construct_mat(cV, fV, cbcs, fbcs)) def create_interpolation(self, dmc, dmf): prefix = dmc.getOptionsPrefix() @@ -678,7 +678,7 @@ def get_permutation_to_line_elements(finat_element): @lru_cache(maxsize=10) -def fiat_reference_prolongator(felem, celem, derivative=False): +def fiat_reference_prolongator(celem, felem, derivative=False): ckey = (felem.formdegree,) if derivative else None fkey = (celem.formdegree,) if derivative else None fdual = felem.dual_basis() @@ -915,7 +915,7 @@ def make_kron_code(Vf, Vc, t_in, t_out, mat_name, scratch): fshapes.append((nscal,) + tuple(fshape)) cshapes.append((nscal,) + tuple(cshape)) - J = [fiat_reference_prolongator(fe, ce).T for fe, ce in zip(felem, celem)] + J = [fiat_reference_prolongator(ce, fe).T for fe, ce in zip(felem, celem)] if any(Jk.size and numpy.isclose(Jk, 0.0E0).all() for Jk in J): prolong_code.append(f""" for({IntType_c} i=0; i<{nscal*numpy.prod(fshape)}; i++) {t_out}[i+{fskip}] = 0.0E0; @@ -1130,13 +1130,13 @@ class StandaloneInterpolationMatrix(object): _cache_work = {} - def __init__(self, Vf, Vc, Vf_bcs, Vc_bcs): - self.uf = self.work_function(Vf) + def __init__(self, Vc, Vf, Vc_bcs, Vf_bcs): self.uc = self.work_function(Vc) - self.Vf = self.uf.function_space() + self.uf = self.work_function(Vf) self.Vc = self.uc.function_space() - self.Vf_bcs = Vf_bcs + self.Vf = self.uf.function_space() self.Vc_bcs = Vc_bcs + self.Vf_bcs = Vf_bcs def work_function(self, V): if isinstance(V, firedrake.Function): @@ -1453,10 +1453,10 @@ def _weight(self): @cached_property def _standalones(self): standalones = [] - for (i, (uf_sub, uc_sub)) in enumerate(zip(self.uf.subfunctions, self.uc.subfunctions)): - Vf_sub_bcs = [bc for bc in self.Vf_bcs if bc.function_space().index == i] + for i, (uc_sub, uf_sub) in enumerate(zip(self.uc.subfunctions, self.uf.subfunctions)): Vc_sub_bcs = [bc for bc in self.Vc_bcs if bc.function_space().index == i] - standalone = StandaloneInterpolationMatrix(uf_sub, uc_sub, Vf_sub_bcs, Vc_sub_bcs) + Vf_sub_bcs = [bc for bc in self.Vf_bcs if bc.function_space().index == i] + standalone = StandaloneInterpolationMatrix(uc_sub, uf_sub, Vc_sub_bcs, Vf_sub_bcs) standalones.append(standalone) return standalones @@ -1477,11 +1477,11 @@ def getNestSubMatrix(self, i, j): return None -def prolongation_matrix_aij(Pk, P1, Pk_bcs=[], P1_bcs=[]): - if isinstance(Pk, firedrake.Function): - Pk = Pk.function_space() +def prolongation_matrix_aij(P1, Pk, P1_bcs=[], Pk_bcs=[]): if isinstance(P1, firedrake.Function): P1 = P1.function_space() + if isinstance(Pk, firedrake.Function): + Pk = Pk.function_space() sp = op2.Sparsity((Pk.dof_dset, P1.dof_dset), (Pk.cell_node_map(), @@ -1532,12 +1532,12 @@ def prolongation_matrix_aij(Pk, P1, Pk_bcs=[], P1_bcs=[]): return mat.handle -def prolongation_matrix_matfree(Vf, Vc, Vf_bcs=[], Vc_bcs=[]): +def prolongation_matrix_matfree(Vc, Vf, Vc_bcs=[], Vf_bcs=[]): fele = Vf.ufl_element() if isinstance(fele, ufl.MixedElement) and not isinstance(fele, (ufl.VectorElement, ufl.TensorElement)): - ctx = MixedInterpolationMatrix(Vf, Vc, Vf_bcs, Vc_bcs) + ctx = MixedInterpolationMatrix(Vc, Vf, Vc_bcs, Vf_bcs) else: - ctx = StandaloneInterpolationMatrix(Vf, Vc, Vf_bcs, Vc_bcs) + ctx = StandaloneInterpolationMatrix(Vc, Vf, Vc_bcs, Vf_bcs) sizes = (Vf.dof_dset.layout_vec.getSizes(), Vc.dof_dset.layout_vec.getSizes()) M_shll = PETSc.Mat().createPython(sizes, ctx, comm=Vf._comm) From 4b5f7b70eeafbc2c921b8f276329c1404f5f3686 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 20 Mar 2023 10:01:20 +0000 Subject: [PATCH 30/75] address some more review comments --- firedrake/preconditioners/fdm.py | 4 +-- firedrake/preconditioners/pmg.py | 51 ++++++++++++++++++++++------- tests/multigrid/test_p_multigrid.py | 4 +-- 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 86d34123fc..ae1ef176c6 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -936,10 +936,10 @@ def mass_matrix(tdim, formdegree, B00, B11, comm=None): B00.destroy() B11.destroy() - if len(B_diag) == 1: + n = len(B_diag) + if n == 1: result = B_diag[0] else: - n = len(B_diag) B_zero = PETSc.Mat().createAIJ(B_diag[0].getSize(), nnz=(0, 0), comm=comm) B_zero.assemble() B_blocks = [[B_diag[i] if i == j else B_zero for j in range(n)] for i in range(n)] diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index fbbade5c10..e98798b7f7 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -608,6 +608,19 @@ def compare_dual_basis(l1, l2): @lru_cache(maxsize=10) @PETSc.Log.EventDecorator("GetLineElements") def get_permutation_to_line_elements(finat_element): + """ + Find DOF permuation to factor out the EnrichedElement expansion into common + TensorProductElements. This routine exposes structure to e.g vectorize + prolongation of NCE or NCF accross vector components, by permuting all + components into a common TensorProductElement. + + This is temporary while we wait for dual evaluation of :class:`finat.EnrichedElement`. + + :returns: a 3-tuple of the DOF permuation, the unique terms in expansion as + a list of tuples of :class:`FIAT.FiniteElements`, and the cyclic + permuatations of the axes to form the element given by their shifts + in list of `int` tuples + """ from FIAT.reference_element import LINE expansion = expand_element(finat_element) @@ -714,7 +727,7 @@ def fiat_reference_prolongator(celem, felem, derivative=False): y is (mx*my*mz)-by-nel. Important notes: -The input data in x is destroyed in the process. +This routine is in-place: the input data in x and y are destroyed in the process. Need to allocate nel*max(mx, nx)*max(my, ny)*max(mz, nz) memory for both x and y. */ @@ -766,6 +779,10 @@ def fiat_reference_prolongator(celem, felem, derivative=False): PetscBLASInt nx, PetscBLASInt ny, PetscBLASInt nz, PetscBLASInt nel, PetscScalar *A1, PetscScalar *A2, PetscScalar *A3, PetscScalar *x, PetscScalar *y, PetscScalar *xwork, PetscScalar *ywork){ + /* + Same as kronmxv_inplace, but the work buffers allow the input data in x to + be kept untouched. + */ PetscScalar *ptr[2] = {xwork, ywork}; @@ -784,6 +801,10 @@ def fiat_reference_prolongator(celem, felem, derivative=False): static inline void permute_axis(PetscBLASInt axis, PetscBLASInt n0, PetscBLASInt n1, PetscBLASInt n2, PetscBLASInt n3, PetscScalar *x, PetscScalar *y){ + /* + Apply a cyclic permuation to a n0 x n1 x n2 x n3 array x, exponsing axis as + the fast direction. Write the result on y. + */ PetscBLASInt p = 0; PetscBLASInt s0, s1, s2, s3; @@ -805,6 +826,9 @@ def fiat_reference_prolongator(celem, felem, derivative=False): static inline void ipermute_axis(PetscBLASInt axis, PetscBLASInt n0, PetscBLASInt n1, PetscBLASInt n2, PetscBLASInt n3, PetscScalar *x, PetscScalar *y){ + /* + Apply the transpose of permute_axis, reading from y and adding to x. + */ PetscBLASInt p = 0; PetscBLASInt s0, s1, s2, s3; @@ -827,15 +851,15 @@ def fiat_reference_prolongator(celem, felem, derivative=False): @PETSc.Log.EventDecorator("MakeKronCode") -def make_kron_code(Vf, Vc, t_in, t_out, mat_name, scratch): +def make_kron_code(Vc, Vf, t_in, t_out, mat_name, scratch): """ Return interpolation and restriction kernels between enriched tensor product elements """ operator_decl = [] prolong_code = [] restrict_code = [] - _, felems, fshifts = get_permutation_to_line_elements(Vf.finat_element) _, celems, cshifts = get_permutation_to_line_elements(Vc.finat_element) + _, felems, fshifts = get_permutation_to_line_elements(Vf.finat_element) shifts = fshifts in_place = False @@ -902,7 +926,7 @@ def make_kron_code(Vf, Vc, t_in, t_out, mat_name, scratch): fshapes = [] cshapes = [] has_code = False - for felem, celem, shift in zip(felems, celems, shifts): + for celem, felem, shift in zip(celems, felems, shifts): if len(felem) != len(celem): raise ValueError("Fine and coarse elements do not have the same number of factors") if len(felem) > 3: @@ -915,7 +939,7 @@ def make_kron_code(Vf, Vc, t_in, t_out, mat_name, scratch): fshapes.append((nscal,) + tuple(fshape)) cshapes.append((nscal,) + tuple(cshape)) - J = [fiat_reference_prolongator(ce, fe).T for fe, ce in zip(felem, celem)] + J = [fiat_reference_prolongator(ce, fe).T for ce, fe in zip(celem, felem)] if any(Jk.size and numpy.isclose(Jk, 0.0E0).all() for Jk in J): prolong_code.append(f""" for({IntType_c} i=0; i<{nscal*numpy.prod(fshape)}; i++) {t_out}[i+{fskip}] = 0.0E0; @@ -1022,7 +1046,7 @@ def cache_generate_code(kernel, comm): return code -def make_mapping_code(Q, fmapping, cmapping, t_in, t_out): +def make_mapping_code(Q, cmapping, fmapping, t_in, t_out): if fmapping == cmapping: return None A = get_piola_tensor(cmapping, Q.mesh(), inverse=False) @@ -1166,6 +1190,9 @@ def _weight(self): @cached_property def _kernels(self): try: + # We generate custom prolongation and restriction kernels mainly because: + # 1. Code generation for the transpose of prolongation is not readily available + # 2. Dual evaluation of EnrichedElement is not yet implemented in FInAT uf_map = get_permuted_map(self.Vf) uc_map = get_permuted_map(self.Vc) prolong_kernel, restrict_kernel, coefficients = self.make_blas_kernels(self.Vf, self.Vc) @@ -1174,6 +1201,8 @@ def _kernels(self): self.uc.dat(op2.READ, uc_map), self._weight.dat(op2.READ, uf_map)] except ValueError: + # The elements do not have the expected tensor product structure + # Fall back to aij kernels uf_map = self.Vf.cell_node_map() uc_map = self.Vc.cell_node_map() prolong_kernel, restrict_kernel, coefficients = self.make_kernels(self.Vf, self.Vc) @@ -1250,7 +1279,7 @@ def make_blas_kernels(Vf, Vc): if fmapping == cmapping: # interpolate on each direction via Kroncker product - operator_decl, prolong_code, restrict_code, shapes = make_kron_code(Vf, Vc, "t0", "t1", "J0", "t2") + operator_decl, prolong_code, restrict_code, shapes = make_kron_code(Vc, Vf, "t0", "t1", "J0", "t2") else: decl = [""]*4 prolong = [""]*5 @@ -1261,18 +1290,18 @@ def make_blas_kernels(Vf, Vc): if qelem.mapping() != "identity": qelem = qelem.reconstruct(mapping="identity") Qf = Vf if qelem == felem else firedrake.FunctionSpace(Vf.mesh(), qelem) - mapping_output = make_mapping_code(Qf, fmapping, cmapping, "t0", "t1") + mapping_output = make_mapping_code(Qf, cmapping, fmapping, "t0", "t1") in_place_mapping = True except Exception: qelem = ufl.FiniteElement("DQ", cell=felem.cell(), degree=PMGBase.max_degree(felem)) if felem.value_shape(): qelem = ufl.TensorElement(qelem, shape=felem.value_shape(), symmetry=felem.symmetry()) Qf = firedrake.FunctionSpace(Vf.mesh(), qelem) - mapping_output = make_mapping_code(Qf, fmapping, cmapping, "t0", "t1") + mapping_output = make_mapping_code(Qf, cmapping, fmapping, "t0", "t1") qshape = (Qf.value_size, Qf.finat_element.space_dimension()) # interpolate to embedding fine space - decl[0], prolong[0], restrict[0], shapes = make_kron_code(Qf, Vc, "t0", "t1", "J0", "t2") + decl[0], prolong[0], restrict[0], shapes = make_kron_code(Vc, Qf, "t0", "t1", "J0", "t2") if mapping_output is not None: # permute to FInAT ordering, and apply the mapping @@ -1281,7 +1310,7 @@ def make_blas_kernels(Vf, Vc): if not in_place_mapping: # permute to Kronecker-friendly ordering and interpolate to fine space decl[2], prolong[3], restrict[3] = make_permutation_code(Vf, qshape, shapes[0], "t1", "t0", "perm1") - decl[3], prolong[4], restrict[4], _shapes = make_kron_code(Vf, Qf, "t0", "t1", "J1", "t2") + decl[3], prolong[4], restrict[4], _shapes = make_kron_code(Qf, Vf, "t0", "t1", "J1", "t2") shapes.extend(_shapes) operator_decl = "".join(decl) diff --git a/tests/multigrid/test_p_multigrid.py b/tests/multigrid/test_p_multigrid.py index a7414ebb4c..c04913e7d4 100644 --- a/tests/multigrid/test_p_multigrid.py +++ b/tests/multigrid/test_p_multigrid.py @@ -87,7 +87,7 @@ def test_prolong_de_rham(tp_mesh): for u in us: for v in us: if u != v: - P = prolongation_matrix_matfree(v, u).getPythonContext() + P = prolongation_matrix_matfree(u, v).getPythonContext() P._prolong() assert norm(v-expr, "L2") < 1E-14 @@ -113,7 +113,7 @@ def test_prolong_low_order_to_restricted(tp_mesh, tp_family, variant): uc.dat.data[1::2] = 1.0 for v in [ui, uf]: - P = prolongation_matrix_matfree(v, uc).getPythonContext() + P = prolongation_matrix_matfree(uc, v).getPythonContext() P._prolong() assert norm(ui + uf - uc, "L2") < 2E-14 From dd7d85518acf356a44cc979cb2c7bae33c978b46 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 20 Mar 2023 10:08:36 +0000 Subject: [PATCH 31/75] change API of exterior derivative in hiptmair.py --- firedrake/preconditioners/hiptmair.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firedrake/preconditioners/hiptmair.py b/firedrake/preconditioners/hiptmair.py index 317dc5614b..af1d1bab27 100644 --- a/firedrake/preconditioners/hiptmair.py +++ b/firedrake/preconditioners/hiptmair.py @@ -201,7 +201,7 @@ def coarsen(self, pc): if G_callback is None: interp_petscmat = chop(Interpolator(dminus(test), V, bcs=bcs + coarse_space_bcs).callable().handle) else: - interp_petscmat = G_callback(V, coarse_space, bcs, coarse_space_bcs) + interp_petscmat = G_callback(coarse_space, V, coarse_space_bcs, bcs) return coarse_operator, coarse_space_bcs, interp_petscmat From fa6da0e3d38f20b7802b00e77f6ce61ca080d733 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 20 Mar 2023 10:56:11 +0000 Subject: [PATCH 32/75] new prolongator API in hypre --- firedrake/preconditioners/hypre_ads.py | 4 ++-- firedrake/preconditioners/hypre_ams.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/firedrake/preconditioners/hypre_ads.py b/firedrake/preconditioners/hypre_ads.py index ca3728abdb..9cbc2537da 100644 --- a/firedrake/preconditioners/hypre_ads.py +++ b/firedrake/preconditioners/hypre_ads.py @@ -34,12 +34,12 @@ def initialize(self, obj): if G_callback is None: G = chop(Interpolator(grad(TestFunction(P1)), NC1).callable().handle) else: - G = G_callback(NC1, P1) + G = G_callback(P1, NC1) C_callback = appctx.get("get_curl", None) if C_callback is None: C = chop(Interpolator(curl(TestFunction(NC1)), V).callable().handle) else: - C = C_callback(V, NC1) + C = C_callback(NC1, V) pc = PETSc.PC().create(comm=obj.comm) pc.incrementTabLevel(1, parent=obj) diff --git a/firedrake/preconditioners/hypre_ams.py b/firedrake/preconditioners/hypre_ams.py index 8bfd14908e..a00334403b 100644 --- a/firedrake/preconditioners/hypre_ams.py +++ b/firedrake/preconditioners/hypre_ams.py @@ -54,7 +54,7 @@ def initialize(self, obj): if G_callback is None: G = chop(Interpolator(grad(TestFunction(P1)), V).callable().handle) else: - G = G_callback(V, P1) + G = G_callback(P1, V) pc = PETSc.PC().create(comm=obj.comm) pc.incrementTabLevel(1, parent=obj) From e7622d9dfccb482973f5ee69c693349daa034ea6 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 20 Mar 2023 17:28:15 +0000 Subject: [PATCH 33/75] typos --- firedrake/preconditioners/pmg.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index e98798b7f7..4ed71d2d14 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -609,16 +609,16 @@ def compare_dual_basis(l1, l2): @PETSc.Log.EventDecorator("GetLineElements") def get_permutation_to_line_elements(finat_element): """ - Find DOF permuation to factor out the EnrichedElement expansion into common + Find DOF permutation to factor out the EnrichedElement expansion into common TensorProductElements. This routine exposes structure to e.g vectorize prolongation of NCE or NCF accross vector components, by permuting all components into a common TensorProductElement. This is temporary while we wait for dual evaluation of :class:`finat.EnrichedElement`. - :returns: a 3-tuple of the DOF permuation, the unique terms in expansion as + :returns: a 3-tuple of the DOF permutation, the unique terms in expansion as a list of tuples of :class:`FIAT.FiniteElements`, and the cyclic - permuatations of the axes to form the element given by their shifts + permutations of the axes to form the element given by their shifts in list of `int` tuples """ from FIAT.reference_element import LINE @@ -802,7 +802,7 @@ def fiat_reference_prolongator(celem, felem, derivative=False): PetscBLASInt n0, PetscBLASInt n1, PetscBLASInt n2, PetscBLASInt n3, PetscScalar *x, PetscScalar *y){ /* - Apply a cyclic permuation to a n0 x n1 x n2 x n3 array x, exponsing axis as + Apply a cyclic permutation to a n0 x n1 x n2 x n3 array x, exponsing axis as the fast direction. Write the result on y. */ From 823c306747e9ff3d3bdb1e3b902de30c698c3629 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 21 Mar 2023 10:45:00 +0000 Subject: [PATCH 34/75] dual evaluation direclty through FIAT --- firedrake/preconditioners/pmg.py | 51 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 4ed71d2d14..4c1592a082 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -557,20 +557,19 @@ def expand_element(ele): return ele -def evaluate_dual(dual, element, key=None): - # Evaluate the action of a set of dual functionals on the basis functions of an element. - keys = list(dict.fromkeys(tuple(phi.get_point_dict().keys()) for phi in dual)) - pts = list(dict.fromkeys(sum(keys, ()))) - if key is None: - key = (0, ) * len(pts[0]) - tab = element.tabulate(sum(key), pts)[key] - result = numpy.empty((len(dual), element.space_dimension()), dtype=tab.dtype) - zero = [(0.0, ())] - for k, phi in enumerate(dual): - wts = phi.get_point_dict() - wts = numpy.array([wts.get(pt, zero)[0][0] for pt in pts]) - result[k] = tab.dot(wts).T - return result +def evaluate_dual(source, target, alpha=None): + # Evaluate the action of a set of dual functionals of the target element + # on the (derivatives of the) basis functions of the source element. + primal = source.get_nodal_basis() + dual = target.get_dual_set() + A = dual.to_riesz(primal) + B = numpy.transpose(primal.get_coeffs()) + if alpha is not None: + dmats = primal.get_dmats() + for i in range(len(alpha)): + for j in range(alpha[i]): + B = numpy.dot(dmats[i], B) + return numpy.dot(A, B) def compare_element(e1, e2): @@ -578,7 +577,7 @@ def compare_element(e1, e2): return True if e1.space_dimension() != e2.space_dimension(): return False - B = evaluate_dual(e1.dual_basis(), e2) + B = evaluate_dual(e1, e2) numpy.fill_diagonal(B, numpy.diagonal(B)-1.0) return numpy.allclose(B, 0.0, rtol=1E-14, atol=1E-14) @@ -605,6 +604,17 @@ def compare_dual_basis(l1, l2): return all(compare_dual(b1, b2) for b1, b2 in zip(l1, l2)) +@lru_cache(maxsize=10) +def fiat_reference_prolongator(celem, felem, derivative=False): + ckey = (felem.formdegree,) if derivative else None + fkey = (celem.formdegree,) if derivative else None + fdual = felem.dual_basis() + cdual = celem.dual_basis() + if fkey == ckey and (celem is felem or compare_dual_basis(cdual, fdual)): + return numpy.array([]) + return evaluate_dual(celem, felem, alpha=ckey) + + @lru_cache(maxsize=10) @PETSc.Log.EventDecorator("GetLineElements") def get_permutation_to_line_elements(finat_element): @@ -690,17 +700,6 @@ def get_permutation_to_line_elements(finat_element): return dof_perm, unique_line_elements, shifts -@lru_cache(maxsize=10) -def fiat_reference_prolongator(celem, felem, derivative=False): - ckey = (felem.formdegree,) if derivative else None - fkey = (celem.formdegree,) if derivative else None - fdual = felem.dual_basis() - cdual = celem.dual_basis() - if fkey == ckey and compare_dual_basis(fdual, cdual): - return numpy.array([]) - return evaluate_dual(fdual, celem, ckey) - - # Common kernel to compute y = kron(A3, kron(A2, A1)) * x # Vector and tensor field generalization from Deville, Fischer, and Mund section 8.3.1. kronmxv_code = """ From 67d56e9f6da62c8721c1df3a6ad7a082b401382b Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 21 Mar 2023 10:56:13 +0000 Subject: [PATCH 35/75] test HiptmairPC on hexes --- firedrake/preconditioners/fdm.py | 2 +- tests/multigrid/test_hiptmair.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index ae1ef176c6..56a512d2d6 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -997,7 +997,7 @@ def diff_matrix(tdim, formdegree, A00, A11, A10, comm=None): return result -def diff_prolongator(Vc, Vf, cbcs=[], fbcs=[]): +def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[]): """ Tabulate exterior derivative: Vc -> Vf as an explicit sparse matrix. Works for any tensor-product basis. These are the same matrices one needs for HypreAMS and friends. diff --git a/tests/multigrid/test_hiptmair.py b/tests/multigrid/test_hiptmair.py index d851e43e2f..b553356e5e 100644 --- a/tests/multigrid/test_hiptmair.py +++ b/tests/multigrid/test_hiptmair.py @@ -62,9 +62,14 @@ def run_riesz_map(V, mat_type): a = inner(d(u), d(v))*dx + inner(u, v)*dx L = inner(f, v)*dx bcs = [DirichletBC(V, u_exact, "on_boundary")] - + if V.mesh().ufl_cell().is_simplex(): + appctx = dict() + else: + from firedrake.preconditioners.fdm import tabulate_exterior_derivative + appctx = {"get_gradient": tabulate_exterior_derivative, + "get_curl": tabulate_exterior_derivative,} problem = LinearVariationalProblem(a, L, uh, bcs=bcs) - solver = LinearVariationalSolver(problem, solver_parameters=parameters) + solver = LinearVariationalSolver(problem, solver_parameters=parameters, appctx=appctx) solver.solve() its = solver.snes.ksp.getIterationNumber() return its @@ -72,7 +77,7 @@ def run_riesz_map(V, mat_type): @pytest.mark.skipcomplexnoslate @pytest.mark.parametrize(["family", "cell"], - [("N1curl", "tetrahedron")]) + [("N1curl", "tetrahedron"), ("NCE", "hexahedron")]) def test_hiptmair_hcurl(family, cell): mesh = mesh_hierarchy(cell)[-1] V = FunctionSpace(mesh, family, degree=1) @@ -82,7 +87,7 @@ def test_hiptmair_hcurl(family, cell): @pytest.mark.skipcomplexnoslate @pytest.mark.parametrize(["family", "cell"], - [("RT", "tetrahedron")]) + [("RT", "tetrahedron"), ("NCF", "hexahedron")]) def test_hiptmair_hdiv(family, cell): mesh = mesh_hierarchy(cell)[-1] V = FunctionSpace(mesh, family, degree=1) From fa966ff233984d303f1d69cd89b0b3a13438da23 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 21 Mar 2023 11:00:49 +0000 Subject: [PATCH 36/75] lint --- tests/multigrid/test_hiptmair.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/multigrid/test_hiptmair.py b/tests/multigrid/test_hiptmair.py index b553356e5e..b59022d9c5 100644 --- a/tests/multigrid/test_hiptmair.py +++ b/tests/multigrid/test_hiptmair.py @@ -67,7 +67,7 @@ def run_riesz_map(V, mat_type): else: from firedrake.preconditioners.fdm import tabulate_exterior_derivative appctx = {"get_gradient": tabulate_exterior_derivative, - "get_curl": tabulate_exterior_derivative,} + "get_curl": tabulate_exterior_derivative} problem = LinearVariationalProblem(a, L, uh, bcs=bcs) solver = LinearVariationalSolver(problem, solver_parameters=parameters, appctx=appctx) solver.solve() From b108044d5ed3419b5b5e703eeecc4f5002c62388 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 21 Mar 2023 13:56:22 +0000 Subject: [PATCH 37/75] add option fdm_static_condensation --- firedrake/preconditioners/fdm.py | 18 ++++++++++-------- tests/regression/test_fdm.py | 1 + 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 56a512d2d6..0887cb0e3d 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -93,6 +93,7 @@ def initialize(self, pc): options = PETSc.Options(options_prefix) use_amat = options.getBool("pc_use_amat", True) + use_static_condensation = options.getBool("static_condensation", False) pmat_type = options.getString("mat_type", PETSc.Mat.Type.AIJ) appctx = self.get_appctx(pc) @@ -176,7 +177,7 @@ def interp_nullspace(I, nsp): fcp=fcp, options_prefix=options_prefix) # Assemble the FDM preconditioner with sparse local matrices - Pmat, self._assemble_P = self.assemble_fdm_op(V_fdm, J_fdm, bcs_fdm, fcp, pmat_type) + Pmat, self._assemble_P = self.assemble_fdm_op(V_fdm, J_fdm, bcs_fdm, fcp, pmat_type, use_static_condensation) self._assemble_P() Pmat.setNullSpace(Amat.getNullSpace()) Pmat.setTransposeNullSpace(Amat.getTransposeNullSpace()) @@ -203,7 +204,7 @@ def interp_nullspace(I, nsp): fdmpc.setFromOptions() @PETSc.Log.EventDecorator("FDMPrealloc") - def assemble_fdm_op(self, V, J, bcs, form_compiler_parameters, pmat_type): + def assemble_fdm_op(self, V, J, bcs, form_compiler_parameters, pmat_type, use_static_condensation): """ Assemble the sparse preconditioner from diagonal mass matrices. @@ -212,6 +213,7 @@ def assemble_fdm_op(self, V, J, bcs, form_compiler_parameters, pmat_type): :arg bcs: an iterable of boundary conditions on V :arg form_compiler_parameters: parameters to assemble diagonal factors :arg pmat_type: the preconditioner `PETSc.Mat.Type` + :arg use_static_condensation: are we assembling the statically-condensed Schur complement on facets? :returns: 2-tuple with the preconditioner :class:`PETSc.Mat` and its assembly callable """ @@ -241,7 +243,7 @@ def assemble_fdm_op(self, V, J, bcs, form_compiler_parameters, pmat_type): self.reference_tensor_on_diag = dict() self.get_static_condensation = dict() - if Vfacet: + if Vfacet and use_static_condensation: # If we are in a facet space, we build the Schur complement on its diagonal block self.reference_tensor_on_diag[Vfacet] = self.assemble_reference_tensor(Vbig) self.get_static_condensation[Vfacet] = lambda A: condense_element_mat(A, self.ises[0], self.ises[1], self.submats) @@ -618,7 +620,7 @@ def assemble_coef(self, J, form_compiler_parameters): @PETSc.Log.EventDecorator("FDMRefTensor") def assemble_reference_tensor(self, V): """ - Return the reference tensor used in the diagonal factorization of the + Return the reference tensor used in the diagonal factorisation of the sparse cell matrices. See Section 3.2 of Brubeck2022b. :arg V: a :class:`.FunctionSpace` @@ -713,14 +715,14 @@ def factor_interior_mat(A00): zlice = slice(0, nblocks) numpy.sqrt(data[zlice], out=data[zlice]) numpy.reciprocal(data[zlice], out=data[zlice]) - PETSc.Log.logFlops(2*nblocks) + flops = nblocks * 2 for k in range(2, degree[-1]+1): nblocks = numpy.count_nonzero(degree == k) zlice = slice(zlice.stop, zlice.stop + k*nblocks) data[zlice] = invchol(data[zlice].reshape((-1, k, k))).reshape((-1,)) - flops = ((k+1)**3 + 5*(k+1)-12)//3 + k**3 - PETSc.Log.logFlops(flops*nblocks) + flops += nblocks * (((k+1)**3 + 5*(k+1)-12)//3 + k**3) + PETSc.Log.logFlops(flops) A00.setValuesCSR(indptr, indices, data) A00.assemble() @@ -1607,7 +1609,7 @@ def numpy_to_petsc(A_numpy, dense_indices, diag=True, block=False): @lru_cache(maxsize=10) def fdm_setup_ipdg(fdm_element, eta): """ - Setup for the fast diagonalization method for the IP-DG formulation. + Setup for the fast diagonalisation method for the IP-DG formulation. Compute sparsified interval stiffness and mass matrices and tabulate the normal derivative of the shape functions. diff --git a/tests/regression/test_fdm.py b/tests/regression/test_fdm.py index f2263a8f10..1104c8bb91 100644 --- a/tests/regression/test_fdm.py +++ b/tests/regression/test_fdm.py @@ -45,6 +45,7 @@ "pc_python_type": "firedrake.FacetSplitPC", "facet_pc_type": "python", "facet_pc_python_type": "firedrake.FDMPC", + "facet_fdm_static_condensation": True, "facet_fdm_pc_use_amat": False, "facet_fdm_pc_type": "fieldsplit", "facet_fdm_pc_fieldsplit_type": "symmetric_multiplicative", From 4149dce826f5d28d75861dbc69afc5b62d5c0401 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 22 Mar 2023 10:59:27 +0000 Subject: [PATCH 38/75] more elegant caching, remove interpolation of nullspace, comments addressing the extension of static condensation to non-symmetric matrices and SLATE --- firedrake/preconditioners/fdm.py | 62 +++++++++++++------------------- firedrake/preconditioners/pmg.py | 16 ++++----- 2 files changed, 33 insertions(+), 45 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 0887cb0e3d..6c7838b29c 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -53,15 +53,19 @@ class FDMPC(PCBase): where alpha and beta are possibly tensor-valued. The sparse matrix is obtained by approximating (v, alpha * u) and (v, beta * u) as diagonal mass matrices. + + The PETSc options inspected by this class are: + - 'fdm_mat_type': can be either 'aij' or 'sbaij' + - 'fdm_static_condensation': are we assembling the Schur complement on facets? + + Static condensation is currently only implemented for the symmetric case, + use it at your own risk. """ _prefix = "fdm_" _variant = "fdm" _citation = "Brubeck2022b" - - _reference_tensor_cache = {} - _coefficient_cache = {} - _c_code_cache = {} + _cache = {} @staticmethod def load_set_values(triu=False): @@ -73,7 +77,7 @@ def load_set_values(triu=False): :returns: a python wrapper for the matrix insertion function """ key = triu - cache = FDMPC._c_code_cache + cache = FDMPC._cache.setdefault("load_set_values", {}) try: return cache[key] except KeyError: @@ -112,9 +116,13 @@ def initialize(self, pc): bcs = tuple(ctx._problem.bcs) mat_type = ctx.mat_type - if isinstance(J, firedrake.slate.Add): - J = J.children[0].form - assert type(J) == ufl.Form + # For static condensation with SLATE, we might extract the form on the + # interface-interface block like this: + # + # if isinstance(J, firedrake.slate.TensorBase) and use_static_condensation: + # J = J.children[0].form + if not isinstance(J, ufl.Form): + raise ValueError("Expecting a ufl.Form, not a %r" % type(J)) # Transform the problem into the space with FDM shape functions V = J.arguments()[-1].function_space() @@ -134,12 +142,15 @@ def initialize(self, pc): W = W.sub(index) bcs_fdm.append(bc.reconstruct(V=W, g=0)) - # Construct interpolation from original to variant spaces + # Create a new _SNESContext in the variant space + self._ctx_ref = self.new_snes_ctx(pc, J_fdm, bcs_fdm, mat_type, + fcp=fcp, options_prefix=options_prefix) + + # Construct interpolation from variant to original spaces self.fdm_interp = prolongation_matrix_matfree(V_fdm, V, bcs_fdm, []) self.work_vec_x = Amat.createVecLeft() self.work_vec_y = Amat.createVecRight() if use_amat: - omat = Amat self.A = allocate_matrix(J_fdm, bcs=bcs_fdm, form_compiler_parameters=fcp, mat_type=mat_type, options_prefix=options_prefix) self._assemble_A = partial(assemble, J_fdm, tensor=self.A, bcs=bcs_fdm, @@ -147,34 +158,10 @@ def initialize(self, pc): self._assemble_A() Amat = self.A.petscmat - def interp_nullspace(I, nsp): - if not nsp.handle: - return nsp - vectors = [] - for x in nsp.getVecs(): - y = I.createVecLeft() - I.mult(x, y) - vectors.append(y) - if nsp.hasConstant(): - y = I.createVecLeft() - x = I.createVecRight() - x.set(1.0E0) - I.mult(x, y) - vectors.append(y) - x.destroy() - return PETSc.NullSpace().create(constant=False, vectors=vectors, comm=nsp.getComm()) - - inject = prolongation_matrix_matfree(V, V_fdm, [], []) - Amat.setNullSpace(interp_nullspace(inject, omat.getNullSpace())) - Amat.setTransposeNullSpace(interp_nullspace(inject, omat.getTransposeNullSpace())) - Amat.setNearNullSpace(interp_nullspace(inject, omat.getNearNullSpace())) - if len(bcs) > 0: self.bc_nodes = numpy.unique(numpy.concatenate([bcdofs(bc, ghost=False) for bc in bcs])) else: self.bc_nodes = numpy.empty(0, dtype=PETSc.IntType) - self._ctx_ref = self.new_snes_ctx(pc, J_fdm, bcs_fdm, mat_type, - fcp=fcp, options_prefix=options_prefix) # Assemble the FDM preconditioner with sparse local matrices Pmat, self._assemble_P = self.assemble_fdm_op(V_fdm, J_fdm, bcs_fdm, fcp, pmat_type, use_static_condensation) @@ -596,9 +583,10 @@ def assemble_coef(self, J, form_compiler_parameters): # Return coefficients and assembly callables, and cache them class key = (mixed_form.signature(), mesh) + cache = self._cache.setdefault("coefficients", {}) block_diagonal = True try: - return self._coefficient_cache[key] + return cache[key] except KeyError: if not block_diagonal or not V.shape: tensor = firedrake.Function(Z) @@ -615,7 +603,7 @@ def assemble_coef(self, J, form_compiler_parameters): ctx = sub.getPythonContext() coefficients[name] = ctx._block_diagonal assembly_callables.append(ctx._assemble_block_diagonal) - return self._coefficient_cache.setdefault(key, (coefficients, assembly_callables)) + return cache.setdefault(key, (coefficients, assembly_callables)) @PETSc.Log.EventDecorator("FDMRefTensor") def assemble_reference_tensor(self, V): @@ -640,7 +628,7 @@ def assemble_reference_tensor(self, V): degree = degree + 1 is_interior, is_facet = is_restricted(V.finat_element) key = (degree, tdim, formdegree, value_size, is_interior, is_facet) - cache = self._reference_tensor_cache + cache = self._cache.setdefault("reference_tensor", {}) try: return cache[key] except KeyError: diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 4c1592a082..d63c006f79 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -49,8 +49,7 @@ class PMGBase(PCSNESBase): """ _prefix = "pmg_" - - _cache_transfer = {} + _cache = {} def coarsen_element(self, ele): """ @@ -340,15 +339,16 @@ def coarsen_bcs(self, fbcs, cV): raise NotImplementedError("Unsupported BC type, please get in touch if you need this") return cbcs - def create_transfer(self, cctx, fctx, mat_type, cbcs, fbcs): + def create_transfer(self, mat_type, cctx, fctx, cbcs, fbcs): # Create a transfer or retrieve it from the class cache cV = cctx.J.arguments()[0].function_space() fV = fctx.J.arguments()[0].function_space() cbcs = tuple(cctx._problem.bcs) if cbcs else tuple() fbcs = tuple(fctx._problem.bcs) if fbcs else tuple() - key = (cV, fV, cbcs, fbcs, mat_type) + key = (mat_type, cV, fV, cbcs, fbcs) + cache = self._cache.setdefault("transfer", {}) try: - return self._cache_transfer[key] + return cache[key] except KeyError: if mat_type == "matfree": construct_mat = prolongation_matrix_matfree @@ -356,19 +356,19 @@ def create_transfer(self, cctx, fctx, mat_type, cbcs, fbcs): construct_mat = prolongation_matrix_aij else: raise ValueError("Unknown matrix type") - return self._cache_transfer.setdefault(key, construct_mat(cV, fV, cbcs, fbcs)) + return cache.setdefault(key, construct_mat(cV, fV, cbcs, fbcs)) def create_interpolation(self, dmc, dmf): prefix = dmc.getOptionsPrefix() mat_type = PETSc.Options(prefix).getString("mg_levels_transfer_mat_type", default="matfree") - interpolate = self.create_transfer(get_appctx(dmc), get_appctx(dmf), mat_type, True, False) + interpolate = self.create_transfer(mat_type, get_appctx(dmc), get_appctx(dmf), True, False) rscale = interpolate.createVecRight() # only used as a workaround in the creation of coarse vecs return interpolate, rscale def create_injection(self, dmc, dmf): prefix = dmc.getOptionsPrefix() mat_type = PETSc.Options(prefix).getString("mg_levels_transfer_mat_type", default="matfree") - return self.create_transfer(get_appctx(dmf), get_appctx(dmc), mat_type, False, False) + return self.create_transfer(mat_type, get_appctx(dmf), get_appctx(dmc), False, False) @staticmethod def max_degree(ele): From 43429f856690e8426d86dcb667a27377cf62b735 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 22 Mar 2023 13:16:49 +0000 Subject: [PATCH 39/75] create fewer intermidiate Mats --- firedrake/preconditioners/fdm.py | 102 +++++++++++++++---------------- 1 file changed, 50 insertions(+), 52 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 6c7838b29c..7db94e14e1 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -373,6 +373,8 @@ def view(self, pc, viewer=None): def destroy(self, pc): objs = [] + if hasattr(self, "A"): + objs.append(self.A) if hasattr(self, "pc"): objs.append(self.pc.getOperators()[-1]) objs.append(self.pc) @@ -666,12 +668,9 @@ def assemble_reference_tensor(self, V): A10 = numpy.linalg.solve(A11, A10) A11 = numpy.eye(A11.shape[0]) - Ihat = mass_matrix(tdim, formdegree, A00, A11) - Dhat = diff_matrix(tdim, formdegree, A00, A11, A10) - result = block_mat([[Ihat], [Dhat]]) - Ihat.destroy() - Dhat.destroy() - + B_blocks = mass_blocks(tdim, formdegree, A00, A11) + A_blocks = diff_blocks(tdim, formdegree, A00, A11, A10) + result = block_mat(B_blocks + A_blocks, destroy=True) if value_size != 1: eye = petsc_sparse(numpy.eye(value_size)) temp = result @@ -720,13 +719,14 @@ def condense_element_mat(A, i0, i1, submats): # Return the Schur complement associated to indices in i1, condensing i0 out isrows = [i0, i0, i1, i1] iscols = [i0, i1, i0, i1] + structure = PETSc.Mat.Structure.SUBSET if submats[6] else None submats[:4] = A.createSubMatrices(isrows, iscols=iscols, submats=submats[:4] if submats[0] else None) A00, A01, A10, A11 = submats[:4] factor_interior_mat(A00) submats[4] = A00.matMult(A01, result=submats[4]) submats[5] = A10.matTransposeMult(A00, result=submats[5]) submats[6] = submats[5].matMult(submats[4], result=submats[6]) - submats[6].aypx(-1.0, A11) + submats[6].aypx(-1.0, A11, structure=structure) return submats[6] @@ -735,12 +735,13 @@ def condense_element_pattern(A, i0, i1, submats): # Add zeroes on the statically condensed pattern so that you can run ICC(0) isrows = [i0, i0, i1] iscols = [i0, i1, i0] + structure = PETSc.Mat.Structure.SUBSET if submats[6] else None submats[:3] = A.createSubMatrices(isrows, iscols=iscols, submats=submats[:3] if submats[0] else None) A00, A01, A10 = submats[:3] submats[4] = A10.matTransposeMult(A00, result=submats[4]) submats[5] = A00.matMult(A01, result=submats[5]) submats[6] = submats[4].matMult(submats[5], result=submats[6]) - submats[6].aypx(0.0, A) + submats[6].aypx(0.0, A, structure=structure) return submats[6] @@ -823,30 +824,6 @@ def load_assemble_csr(comm, triu=False): restype=ctypes.c_int) -def petsc_sparse(A_numpy, rtol=1E-10, comm=None): - # Convert dense numpy matrix into a sparse PETSc matrix - Amax = max(A_numpy.min(), A_numpy.max(), key=abs) - atol = rtol*Amax - nnz = numpy.count_nonzero(abs(A_numpy) > atol, axis=1).astype(PETSc.IntType) - A = PETSc.Mat().createAIJ(A_numpy.shape, nnz=(nnz, 0), comm=comm) - for row, Arow in enumerate(A_numpy): - cols = numpy.argwhere(abs(Arow) > atol).astype(PETSc.IntType).flat - A.setValues(row, cols, Arow[cols], PETSc.InsertMode.INSERT) - A.assemble() - return A - - -def block_mat(A_blocks): - # Return a concrete Mat corresponding to a block matrix given as a list of lists - if len(A_blocks) == 1: - if len(A_blocks[0]) == 1: - return A_blocks[0][0] - - nest = PETSc.Mat().createNest(A_blocks, comm=A_blocks[0][0].getComm()) - # A nest Mat would not allow us to take matrix-matrix products - return nest.convert(mat_type=A_blocks[0][0].getType()) - - def is_restricted(finat_element): # Determine if an element is a restriction onto interior or facets is_interior = True @@ -888,6 +865,19 @@ def sort_interior_dofs(idofs, A): Aii.destroy() +def petsc_sparse(A_numpy, rtol=1E-10, comm=None): + # Convert dense numpy matrix into a sparse PETSc matrix + Amax = max(A_numpy.min(), A_numpy.max(), key=abs) + atol = rtol*Amax + nnz = numpy.count_nonzero(abs(A_numpy) > atol, axis=1).astype(PETSc.IntType) + A = PETSc.Mat().createAIJ(A_numpy.shape, nnz=(nnz, 0), comm=comm) + for row, Arow in enumerate(A_numpy): + cols = numpy.argwhere(abs(Arow) > atol).astype(PETSc.IntType).flat + A.setValues(row, cols, Arow[cols], PETSc.InsertMode.INSERT) + A.assemble() + return A + + def kron3(A, B, C, scale=None): temp = B.kron(C) if scale is not None: @@ -897,13 +887,30 @@ def kron3(A, B, C, scale=None): return result -def mass_matrix(tdim, formdegree, B00, B11, comm=None): - # Construct mass matrix on reference cell from 1D mass matrices B00 and B11. +def block_mat(A_blocks, destroy=False): + # Return a concrete Mat corresponding to a block matrix given as a list of lists + # Optionally, destroys the input Mats if a new Mat is created + if len(A_blocks) == 1: + if len(A_blocks[0]) == 1: + return A_blocks[0][0] + + result = PETSc.Mat().createNest(A_blocks, comm=A_blocks[0][0].getComm()) + # A nest Mat would not allow us to take matrix-matrix products + result = result.convert(mat_type=A_blocks[0][0].getType()) + if destroy: + for row in A_blocks: + for mat in row: + mat.destroy() + return result + + +def mass_blocks(tdim, formdegree, B00, B11, comm=None): + # Construct mass block matrix on reference cell from 1D mass matrices B00 and B11. # The 1D matrices may come with different test and trial spaces. if comm is None: comm = PETSc.COMM_SELF if tdim == 1: - return petsc_sparse(B11 if formdegree else B00, comm=comm) + return [[petsc_sparse(B11 if formdegree else B00, comm=comm)]] B00 = petsc_sparse(B00, comm=comm) B11 = petsc_sparse(B11, comm=comm) @@ -928,20 +935,15 @@ def mass_matrix(tdim, formdegree, B00, B11, comm=None): B11.destroy() n = len(B_diag) if n == 1: - result = B_diag[0] + return [B_diag] else: B_zero = PETSc.Mat().createAIJ(B_diag[0].getSize(), nnz=(0, 0), comm=comm) B_zero.assemble() - B_blocks = [[B_diag[i] if i == j else B_zero for j in range(n)] for i in range(n)] - result = block_mat(B_blocks) - B_zero.destroy() - for B in B_diag: - B.destroy() - return result + return [[B_diag[i] if i == j else B_zero for j in range(n)] for i in range(n)] -def diff_matrix(tdim, formdegree, A00, A11, A10, comm=None): - # Construct exterior derivative matrix on reference cell from 1D mass matrices A00 and A11, +def diff_blocks(tdim, formdegree, A00, A11, A10, comm=None): + # Construct exterior derivative block matrix on reference cell from 1D mass matrices A00 and A11, # and exterior derivative moments A10. # The 1D matrices may come with different test and trial spaces. if comm is None: @@ -950,11 +952,11 @@ def diff_matrix(tdim, formdegree, A00, A11, A10, comm=None): ncols = A10.shape[0]**tdim A_zero = PETSc.Mat().createAIJ((1, ncols), nnz=(0, 0), comm=comm) A_zero.assemble() - return A_zero + return [[A_zero]] A10 = petsc_sparse(A10, comm=comm) if tdim == 1: - return A10 + return [[A10]] A00 = petsc_sparse(A00, comm=comm) A11 = petsc_sparse(A11, comm=comm) @@ -980,11 +982,7 @@ def diff_matrix(tdim, formdegree, A00, A11, A10, comm=None): A00.destroy() A11.destroy() A10.destroy() - result = block_mat(A_blocks) - for A_row in A_blocks: - for A in A_row: - A.destroy() - return result + return A_blocks def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[]): @@ -1010,7 +1008,7 @@ def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[]): A10 = fiat_reference_prolongator(e0, e1, derivative=True) tdim = Vc.mesh().topological_dimension() - Dhat = diff_matrix(tdim, ec.formdegree, A00, A11, A10) + Dhat = block_mat(diff_blocks(tdim, ec.formdegree, A00, A11, A10), destroy=True) scalar_element = lambda e: e._sub_element if isinstance(e, (ufl.TensorElement, ufl.VectorElement)) else e fdofs = restricted_dofs(ef, create_element(unrestrict_element(scalar_element(Vf.ufl_element())))) From a7b1e952c710f8adbea5c427f428ee2be1eff02d Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 22 Mar 2023 18:26:42 +0000 Subject: [PATCH 40/75] move imports to the top --- firedrake/preconditioners/fdm.py | 63 +++++++++++++++----------------- firedrake/preconditioners/pmg.py | 15 ++++---- 2 files changed, 36 insertions(+), 42 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 7db94e14e1..e1a3a12f0c 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -1,10 +1,21 @@ from functools import partial, lru_cache from itertools import product -from pyop2.sparsity import get_preallocation from firedrake.petsc import PETSc from firedrake.preconditioners.base import PCBase +from firedrake.preconditioners.patch import bcdofs +from firedrake.preconditioners.pmg import (prolongation_matrix_matfree, + fiat_reference_prolongator, + get_permutation_to_line_elements) from firedrake.preconditioners.facet_split import split_dofs, restricted_dofs +from firedrake.formmanipulation import ExtractSubBlock from firedrake_citations import Citations +from pyop2.compilation import load +from pyop2.utils import get_petsc_dir +from pyop2.sparsity import get_preallocation +from tsfc.finatinterface import create_element +from ufl.algorithms.ad import expand_derivatives +from ufl.algorithms.expand_indices import expand_indices + import firedrake.dmhooks as dmhooks import firedrake import ctypes @@ -85,10 +96,6 @@ def load_set_values(triu=False): @PETSc.Log.EventDecorator("FDMInit") def initialize(self, pc): - from firedrake.assemble import allocate_matrix, assemble - from firedrake.preconditioners.pmg import prolongation_matrix_matfree - from firedrake.preconditioners.patch import bcdofs - Citations().register(self._citation) self.comm = pc.comm Amat, Pmat = pc.getOperators() @@ -151,10 +158,12 @@ def initialize(self, pc): self.work_vec_x = Amat.createVecLeft() self.work_vec_y = Amat.createVecRight() if use_amat: + from firedrake.assemble import allocate_matrix, TwoFormAssembler self.A = allocate_matrix(J_fdm, bcs=bcs_fdm, form_compiler_parameters=fcp, mat_type=mat_type, options_prefix=options_prefix) - self._assemble_A = partial(assemble, J_fdm, tensor=self.A, bcs=bcs_fdm, - form_compiler_parameters=fcp, mat_type=mat_type) + self._assemble_A = TwoFormAssembler(J_fdm, tensor=self.A, bcs=bcs_fdm, + form_compiler_parameters=fcp, + mat_type=mat_type).assemble self._assemble_A() Amat = self.A.petscmat @@ -509,11 +518,6 @@ def assemble_coef(self, J, form_compiler_parameters): order coefficients keyed on ``"beta"`` and ``"alpha"``, and a list of assembly callables. """ - from ufl.algorithms.ad import expand_derivatives - from ufl.algorithms.expand_indices import expand_indices - from firedrake.formmanipulation import ExtractSubBlock - from firedrake.assemble import assemble - # Basic idea: take the original bilinear form and # replace the exterior derivatives with arguments in broken(V^{k+1}). # Then, replace the original arguments with arguments in broken(V^k). @@ -593,11 +597,11 @@ def assemble_coef(self, J, form_compiler_parameters): if not block_diagonal or not V.shape: tensor = firedrake.Function(Z) coefficients = {"beta": tensor.sub(0), "alpha": tensor.sub(1)} - assembly_callables = [partial(assemble, mixed_form, tensor=tensor, diagonal=True, + assembly_callables = [partial(firedrake.assemble, mixed_form, tensor=tensor, diagonal=True, form_compiler_parameters=form_compiler_parameters)] else: - M = assemble(mixed_form, mat_type="matfree", - form_compiler_parameters=form_compiler_parameters) + M = firedrake.assemble(mixed_form, mat_type="matfree", + form_compiler_parameters=form_compiler_parameters) coefficients = dict() assembly_callables = [] for iset, name in zip(Z.dof_dset.field_ises, ("beta", "alpha")): @@ -747,8 +751,6 @@ def condense_element_pattern(A, i0, i1, submats): @PETSc.Log.EventDecorator("LoadCode") def load_c_code(code, name, **kwargs): - from pyop2.compilation import load - from pyop2.utils import get_petsc_dir cppargs = ["-I%s/include" % d for d in get_petsc_dir()] ldargs = (["-L%s/lib" % d for d in get_petsc_dir()] + ["-Wl,-rpath,%s/lib" % d for d in get_petsc_dir()] @@ -990,9 +992,6 @@ def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[]): Tabulate exterior derivative: Vc -> Vf as an explicit sparse matrix. Works for any tensor-product basis. These are the same matrices one needs for HypreAMS and friends. """ - from tsfc.finatinterface import create_element - from firedrake.preconditioners.pmg import fiat_reference_prolongator - ec = Vc.finat_element ef = Vf.finat_element if ef.formdegree - ec.formdegree != 1: @@ -1132,7 +1131,6 @@ class PoissonFDMPC(FDMPC): _citation = "Brubeck2022a" def assemble_reference_tensor(self, V): - from firedrake.preconditioners.pmg import get_permutation_to_line_elements try: _, line_elements, shifts = get_permutation_to_line_elements(V.finat_element) except ValueError: @@ -1381,9 +1379,6 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): @PETSc.Log.EventDecorator("FDMCoefficients") def assemble_coef(self, J, form_compiler_parameters, discard_mixed=True, cell_average=True): - from ufl import inner, diff - from ufl.algorithms.ad import expand_derivatives - coefficients = {} assembly_callables = [] @@ -1421,8 +1416,8 @@ def assemble_coef(self, J, form_compiler_parameters, discard_mixed=True, cell_av else: replace_grad = {ufl.grad(t): ufl.dot(dt, Finv) for t, dt in zip(args_J, ref_grad)} - alpha = expand_derivatives(sum([diff(diff(ufl.replace(i.integrand(), replace_grad), - ref_grad[0]), ref_grad[1]) for i in integrals_J])) + alpha = expand_derivatives(sum([ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_grad), + ref_grad[0]), ref_grad[1]) for i in integrals_J])) # get zero-th order coefficent ref_val = [ufl.variable(t) for t in args_J] @@ -1433,8 +1428,8 @@ def assemble_coef(self, J, form_compiler_parameters, discard_mixed=True, cell_av else: replace_val = {t: s for t, s in zip(args_J, ref_val)} - beta = expand_derivatives(sum([diff(diff(ufl.replace(i.integrand(), replace_val), - ref_val[0]), ref_val[1]) for i in integrals_J])) + beta = expand_derivatives(sum([ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_val), + ref_val[0]), ref_val[1]) for i in integrals_J])) if Piola: beta = ufl.replace(beta, {dummy_Piola: Piola}) @@ -1457,7 +1452,7 @@ def assemble_coef(self, J, form_compiler_parameters, discard_mixed=True, cell_av q = firedrake.TestFunction(Q) Gq = firedrake.Function(Q) coefficients["alpha"] = Gq - assembly_callables.append(partial(firedrake.assemble, inner(G, q)*dx, Gq)) + assembly_callables.append(partial(firedrake.assemble, ufl.inner(G, q)*dx, Gq)) # assemble zero-th order coefficient if not isinstance(beta, ufl.constantvalue.Zero): @@ -1472,7 +1467,7 @@ def assemble_coef(self, J, form_compiler_parameters, discard_mixed=True, cell_av q = firedrake.TestFunction(Q) Bq = firedrake.Function(Q) coefficients["beta"] = Bq - assembly_callables.append(partial(firedrake.assemble, inner(beta, q)*dx, Bq)) + assembly_callables.append(partial(firedrake.assemble, ufl.inner(beta, q)*dx, Bq)) if Piola: # make DGT functions with the second order coefficient @@ -1483,8 +1478,8 @@ def assemble_coef(self, J, form_compiler_parameters, discard_mixed=True, cell_av area = ufl.FacetArea(mesh) replace_grad = {ufl.grad(t): ufl.dot(dt, Finv) for t, dt in zip(args_J, ref_grad)} - alpha = expand_derivatives(sum([diff(diff(ufl.replace(i.integrand(), replace_grad), - ref_grad[0]), ref_grad[1]) for i in integrals_J])) + alpha = expand_derivatives(sum([ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_grad), + ref_grad[0]), ref_grad[1]) for i in integrals_J])) vol = abs(ufl.JacobianDeterminant(mesh)) G = vol * alpha G = ufl.as_tensor([[[G[i, k, j, k] for i in range(G.ufl_shape[0])] for j in range(G.ufl_shape[2])] for k in range(G.ufl_shape[3])]) @@ -1493,14 +1488,14 @@ def assemble_coef(self, J, form_compiler_parameters, discard_mixed=True, cell_av q = firedrake.TestFunction(Q) Gq_facet = firedrake.Function(Q) coefficients["Gq_facet"] = Gq_facet - assembly_callables.append(partial(firedrake.assemble, ((inner(q('+'), G('+')) + inner(q('-'), G('-')))/area)*dS_int, Gq_facet)) + assembly_callables.append(partial(firedrake.assemble, ((ufl.inner(q('+'), G('+')) + ufl.inner(q('-'), G('-')))/area)*dS_int, Gq_facet)) PT = Piola.T Q = firedrake.TensorFunctionSpace(mesh, ele, shape=PT.ufl_shape) q = firedrake.TestFunction(Q) PT_facet = firedrake.Function(Q) coefficients["PT_facet"] = PT_facet - assembly_callables.append(partial(firedrake.assemble, ((inner(q('+'), PT('+')) + inner(q('-'), PT('-')))/area)*dS_int, PT_facet)) + assembly_callables.append(partial(firedrake.assemble, ((ufl.inner(q('+'), PT('+')) + ufl.inner(q('-'), PT('-')))/area)*dS_int, PT_facet)) # make DGT functions with BC flags rvs = V.ufl_element().reference_value_shape() diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index d63c006f79..8446d5aa3e 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -1,14 +1,19 @@ from functools import partial, lru_cache from itertools import chain -from pyop2 import op2, PermutedMap +from firedrake.petsc import PETSc from firedrake.preconditioners.base import PCBase, SNESBase, PCSNESBase from firedrake.dmhooks import (attach_hooks, get_appctx, push_appctx, pop_appctx, add_hook, get_parent, push_parent, pop_parent, get_function_space, set_function_space) from firedrake.solving_utils import _SNESContext +from firedrake.nullspace import VectorSpaceBasis, MixedVectorSpaceBasis from firedrake.tsfc_interface import extract_numbered_coefficients from firedrake.utils import ScalarType_c, IntType_c, cached_property -from firedrake.petsc import PETSc +from pyop2 import op2, PermutedMap +from tsfc import compile_expression_dual_evaluation +from tsfc.finatinterface import create_element +from FIAT.reference_element import LINE + import firedrake import finat import ufl @@ -162,8 +167,6 @@ def destroy(self, obj): def coarsen(self, fdm, comm): # Coarsen the _SNESContext of a DM fdm # return the coarse DM cdm of the coarse _SNESContext - from firedrake.nullspace import VectorSpaceBasis, MixedVectorSpaceBasis - fctx = get_appctx(fdm) parent = get_parent(fdm) assert parent is not None @@ -517,8 +520,6 @@ def coarsen_bc_value(self, bc, cV): def prolongation_transfer_kernel_action(Vf, expr): - from tsfc import compile_expression_dual_evaluation - from tsfc.finatinterface import create_element to_element = create_element(Vf.ufl_element()) kernel = compile_expression_dual_evaluation(expr, to_element, Vf.ufl_element(), log=PETSc.Log.isActive()) coefficients = extract_numbered_coefficients(expr, kernel.coefficient_numbers) @@ -631,8 +632,6 @@ def get_permutation_to_line_elements(finat_element): permutations of the axes to form the element given by their shifts in list of `int` tuples """ - from FIAT.reference_element import LINE - expansion = expand_element(finat_element) if expansion.space_dimension() != finat_element.space_dimension(): raise ValueError("Failed to decompose %s into tensor products" % finat_element) From a2cd50c02dd02b56282d88fc188ced2f82d357d5 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 23 Mar 2023 09:27:49 +0000 Subject: [PATCH 41/75] use any instead of sum --- firedrake/preconditioners/fdm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index e1a3a12f0c..5f8e479b8f 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -833,8 +833,7 @@ def is_restricted(finat_element): tdim = finat_element.cell.get_spatial_dimension() entity_dofs = finat_element.entity_dofs() for edim in sorted(entity_dofs): - v = sum(list(entity_dofs[edim].values()), []) - if len(v): + if any(len(entity_dofs[edim][entity]) > 0 for entity in entity_dofs[edim]): try: edim = sum(edim) except TypeError: From b6f31530998173bd66e5d1519cbc2a7730e71981 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 23 Mar 2023 15:42:27 +0000 Subject: [PATCH 42/75] address some of Connor's comments format docstrings, dict() -> {} --- firedrake/preconditioners/fdm.py | 184 +++++++++++++++---------------- firedrake/preconditioners/pmg.py | 4 +- 2 files changed, 91 insertions(+), 97 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 5f8e479b8f..862b80038c 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -79,20 +79,20 @@ class FDMPC(PCBase): _cache = {} @staticmethod - def load_set_values(triu=False): + def setSubMatCSR(comm, triu=False): """ - Compile C code to insert sparse element matrices and store in class cache + Compile C code to insert sparse submatrices and store in class cache :arg triu: are we inserting onto the upper triangular part of the matrix? :returns: a python wrapper for the matrix insertion function """ + cache = FDMPC._cache.setdefault("setSubMatCSR", {}) key = triu - cache = FDMPC._cache.setdefault("load_set_values", {}) try: return cache[key] except KeyError: - return cache.setdefault(key, load_assemble_csr(PETSc.COMM_SELF, triu=triu)) + return cache.setdefault(key, load_setSubMatCSR(comm, triu)) @PETSc.Log.EventDecorator("FDMInit") def initialize(self, pc): @@ -123,8 +123,8 @@ def initialize(self, pc): bcs = tuple(ctx._problem.bcs) mat_type = ctx.mat_type - # For static condensation with SLATE, we might extract the form on the - # interface-interface block like this: + # TODO assemble Schur complements specified by a SLATE Tensor + # we might extract the form on the interface-interface block like this: # # if isinstance(J, firedrake.slate.TensorBase) and use_static_condensation: # J = J.children[0].form @@ -237,8 +237,8 @@ def assemble_fdm_op(self, V, J, bcs, form_compiler_parameters, pmat_type, use_st self.ises = tuple(PETSc.IS().createGeneral(indices, comm=PETSc.COMM_SELF) for indices in (idofs, fdofs)) self.submats = [None for _ in range(7)] - self.reference_tensor_on_diag = dict() - self.get_static_condensation = dict() + self.reference_tensor_on_diag = {} + self.get_static_condensation = {} if Vfacet and use_static_condensation: # If we are in a facet space, we build the Schur complement on its diagonal block self.reference_tensor_on_diag[Vfacet] = self.assemble_reference_tensor(Vbig) @@ -256,9 +256,9 @@ def cell_to_global(lgmap, cell_to_local, cell_index, result=None): return lgmap.apply(result, result=result) # Create data structures needed for assembly - self.cell_to_global = dict() - self.lgmaps = dict() - bc_rows = dict() + self.cell_to_global = {} + self.lgmaps = {} + bc_rows = {} for Vsub in V: lgmap = Vsub.local_to_global_map([bc.reconstruct(V=Vsub, g=0) for bc in bcs]) bsize = Vsub.dof_dset.layout_vec.getBlockSize() @@ -286,9 +286,9 @@ def get_coeffs(e, result=None): self.get_coeffs = get_coeffs self.nel = nel - self.work_mats = dict() + self.work_mats = {} - Pmats = dict() + Pmats = {} addv = PETSc.InsertMode.ADD_VALUES symmetric = pmat_type.endswith("sbaij") @@ -414,11 +414,11 @@ def RtAP(R, A, P, result=None): return R.transposeMatMult(RtAP.buff, result=result) RtAP.buff = None - set_values_csr = self.load_set_values(triu=triu) + set_submat = self.setSubMatCSR(PETSc.COMM_SELF, triu=triu) get_rindices = self.cell_to_global[Vrow] if Vrow == Vcol: get_cindices = lambda e, result=None: result - update_A = lambda Ae, rindices, cindices: set_values_csr(A, Ae, rindices, rindices, addv) + update_A = lambda Ae, rindices, cindices: set_submat(A, Ae, rindices, rindices, addv) # moments of orthogonalized basis against basis tabulation and derivative tabulation rtensor = self.reference_tensor_on_diag.get(Vrow) or self.assemble_reference_tensor(Vrow) # element matrix obtained via Equation (3.9) of Brubeck2022b @@ -426,7 +426,7 @@ def RtAP(R, A, P, result=None): condense_element_mat = self.get_static_condensation.get(Vrow) else: get_cindices = self.cell_to_global[Vcol] - update_A = lambda Ae, rindices, cindices: set_values_csr(A, Ae, rindices, cindices, addv) + update_A = lambda Ae, rindices, cindices: set_submat(A, Ae, rindices, cindices, addv) rtensor = self.assemble_reference_tensor(Vrow) ctensor = self.assemble_reference_tensor(Vcol) assemble_element_mat = lambda De, result=None: RtAP(rtensor, De, ctensor, result=result) @@ -583,7 +583,7 @@ def assemble_coef(self, J, form_compiler_parameters): # Transform the exterior derivative and the original arguments of J to arguments in Z args = (firedrake.TestFunctions(Z), firedrake.TrialFunctions(Z)) repargs = {t: v[0] for t, v in zip(args_J, args)} - repgrad = {ufl.grad(t): map_grad(v[1]) for t, v in zip(args_J, args)} if map_grad else dict() + repgrad = {ufl.grad(t): map_grad(v[1]) for t, v in zip(args_J, args)} if map_grad else {} Jcell = expand_indices(expand_derivatives(ufl.Form(J.integrals_by_type("cell")))) mixed_form = ufl.replace(ufl.replace(Jcell, repgrad), repargs) @@ -594,21 +594,21 @@ def assemble_coef(self, J, form_compiler_parameters): try: return cache[key] except KeyError: - if not block_diagonal or not V.shape: - tensor = firedrake.Function(Z) - coefficients = {"beta": tensor.sub(0), "alpha": tensor.sub(1)} - assembly_callables = [partial(firedrake.assemble, mixed_form, tensor=tensor, diagonal=True, - form_compiler_parameters=form_compiler_parameters)] - else: + if block_diagonal and V.shape: M = firedrake.assemble(mixed_form, mat_type="matfree", form_compiler_parameters=form_compiler_parameters) - coefficients = dict() + coefficients = {} assembly_callables = [] for iset, name in zip(Z.dof_dset.field_ises, ("beta", "alpha")): sub = M.petscmat.createSubMatrix(iset, iset) ctx = sub.getPythonContext() coefficients[name] = ctx._block_diagonal assembly_callables.append(ctx._assemble_block_diagonal) + else: + tensor = firedrake.Function(Z) + coefficients = {"beta": tensor.sub(0), "alpha": tensor.sub(1)} + assembly_callables = [partial(firedrake.assemble, mixed_form, tensor=tensor, diagonal=True, + form_compiler_parameters=form_compiler_parameters)] return cache.setdefault(key, (coefficients, assembly_callables)) @PETSc.Log.EventDecorator("FDMRefTensor") @@ -720,7 +720,7 @@ def factor_interior_mat(A00): @PETSc.Log.EventDecorator("FDMCondense") def condense_element_mat(A, i0, i1, submats): - # Return the Schur complement associated to indices in i1, condensing i0 out + """Return the Schur complement associated to indices in i1, condensing i0 out""" isrows = [i0, i0, i1, i1] iscols = [i0, i1, i0, i1] structure = PETSc.Mat.Structure.SUBSET if submats[6] else None @@ -736,7 +736,7 @@ def condense_element_mat(A, i0, i1, submats): @PETSc.Log.EventDecorator("FDMCondense") def condense_element_pattern(A, i0, i1, submats): - # Add zeroes on the statically condensed pattern so that you can run ICC(0) + """Add zeroes on the statically condensed pattern so that you can run ICC(0)""" isrows = [i0, i0, i1] iscols = [i0, i1, i0] structure = PETSc.Mat.Structure.SUBSET if submats[6] else None @@ -760,7 +760,7 @@ def load_c_code(code, name, **kwargs): **kwargs) def get_pointer(obj): - if isinstance(obj, (PETSc.Mat, PETSc.Vec)): + if isinstance(obj, PETSc.Object): return obj.handle elif isinstance(obj, numpy.ndarray): return obj.ctypes.data @@ -772,9 +772,9 @@ def wrapper(*args): return wrapper -def load_assemble_csr(comm, triu=False): - # Insert one sparse matrix into another sparse matrix. - # Done in C for efficiency, since it loops over rows. +def load_setSubMatCSR(comm, triu=False): + """Insert one sparse matrix into another sparse matrix. + Done in C for efficiency, since it loops over rows.""" if triu: name = "setSubMatCSR_SBAIJ" select_cols = "icol < irow ? -1: icol" @@ -827,18 +827,14 @@ def load_assemble_csr(comm, triu=False): def is_restricted(finat_element): - # Determine if an element is a restriction onto interior or facets + """Determine if an element is a restriction onto interior or facets""" is_interior = True is_facet = True - tdim = finat_element.cell.get_spatial_dimension() + cell_dim = finat_element.cell.get_dimension() entity_dofs = finat_element.entity_dofs() - for edim in sorted(entity_dofs): - if any(len(entity_dofs[edim][entity]) > 0 for entity in entity_dofs[edim]): - try: - edim = sum(edim) - except TypeError: - pass - if edim == tdim: + for dim in sorted(entity_dofs): + if any(len(entity_dofs[dim][entity]) > 0 for entity in entity_dofs[dim]): + if dim == cell_dim: is_facet = False else: is_interior = False @@ -846,8 +842,8 @@ def is_restricted(finat_element): def sort_interior_dofs(idofs, A): - # Permute `idofs` to have A[idofs, idofs] with square blocks of - # increasing dimension along its diagonal. + """Permute `idofs` to have A[idofs, idofs] with square blocks of + increasing dimension along its diagonal.""" Aii = A.createSubMatrix(idofs, idofs) indptr, indices, _ = Aii.getValuesCSR() n = idofs.getSize() @@ -867,19 +863,20 @@ def sort_interior_dofs(idofs, A): def petsc_sparse(A_numpy, rtol=1E-10, comm=None): - # Convert dense numpy matrix into a sparse PETSc matrix - Amax = max(A_numpy.min(), A_numpy.max(), key=abs) - atol = rtol*Amax - nnz = numpy.count_nonzero(abs(A_numpy) > atol, axis=1).astype(PETSc.IntType) + """Convert dense numpy matrix into a sparse PETSc matrix""" + atol = rtol * max(A_numpy.min(), A_numpy.max(), key=abs) + sparsity = abs(A_numpy) > atol + nnz = numpy.count_nonzero(sparsity, axis=1).astype(PETSc.IntType) A = PETSc.Mat().createAIJ(A_numpy.shape, nnz=(nnz, 0), comm=comm) - for row, Arow in enumerate(A_numpy): - cols = numpy.argwhere(abs(Arow) > atol).astype(PETSc.IntType).flat + for row, (Arow, Srow) in enumerate(zip(A_numpy, sparsity)): + cols = numpy.argwhere(Srow).astype(PETSc.IntType).flat A.setValues(row, cols, Arow[cols], PETSc.InsertMode.INSERT) A.assemble() return A def kron3(A, B, C, scale=None): + """Returns scale * kron(A, kron(B, C))""" temp = B.kron(C) if scale is not None: temp.scale(scale) @@ -889,8 +886,8 @@ def kron3(A, B, C, scale=None): def block_mat(A_blocks, destroy=False): - # Return a concrete Mat corresponding to a block matrix given as a list of lists - # Optionally, destroys the input Mats if a new Mat is created + """Return a concrete Mat corresponding to a block matrix given as a list of lists. + Optionally, destroys the input Mats if a new Mat is created.""" if len(A_blocks) == 1: if len(A_blocks[0]) == 1: return A_blocks[0][0] @@ -906,8 +903,8 @@ def block_mat(A_blocks, destroy=False): def mass_blocks(tdim, formdegree, B00, B11, comm=None): - # Construct mass block matrix on reference cell from 1D mass matrices B00 and B11. - # The 1D matrices may come with different test and trial spaces. + """Construct mass block matrix on reference cell from 1D mass matrices B00 and B11. + The 1D matrices may come with different test and trial spaces.""" if comm is None: comm = PETSc.COMM_SELF if tdim == 1: @@ -944,9 +941,9 @@ def mass_blocks(tdim, formdegree, B00, B11, comm=None): def diff_blocks(tdim, formdegree, A00, A11, A10, comm=None): - # Construct exterior derivative block matrix on reference cell from 1D mass matrices A00 and A11, - # and exterior derivative moments A10. - # The 1D matrices may come with different test and trial spaces. + """Construct exterior derivative block matrix on reference cell from 1D + mass matrices A00 and A11, and exterior derivative moments A10. + The 1D matrices may come with different test and trial spaces.""" if comm is None: comm = PETSc.COMM_SELF if formdegree == tdim: @@ -1001,23 +998,24 @@ def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[]): e0, e1 = elements[::len(elements)-1] degree = e0.degree() + tdim = Vc.mesh().topological_dimension() A11 = numpy.eye(degree, dtype=PETSc.RealType) A00 = numpy.eye(degree+1, dtype=PETSc.RealType) A10 = fiat_reference_prolongator(e0, e1, derivative=True) - - tdim = Vc.mesh().topological_dimension() Dhat = block_mat(diff_blocks(tdim, ec.formdegree, A00, A11, A10), destroy=True) - scalar_element = lambda e: e._sub_element if isinstance(e, (ufl.TensorElement, ufl.VectorElement)) else e - fdofs = restricted_dofs(ef, create_element(unrestrict_element(scalar_element(Vf.ufl_element())))) - cdofs = restricted_dofs(ec, create_element(unrestrict_element(scalar_element(Vc.ufl_element())))) - fises = PETSc.IS().createGeneral(fdofs, comm=PETSc.COMM_SELF) - cises = PETSc.IS().createGeneral(cdofs, comm=PETSc.COMM_SELF) - temp = Dhat - Dhat = temp.createSubMatrix(fises, cises) - fises.destroy() - cises.destroy() - temp.destroy() + if any(is_restricted(ec)) or any(is_restricted(ef)): + scalar_element = lambda e: e._sub_element if isinstance(e, (ufl.TensorElement, ufl.VectorElement)) else e + fdofs = restricted_dofs(ef, create_element(unrestrict_element(scalar_element(Vf.ufl_element())))) + cdofs = restricted_dofs(ec, create_element(unrestrict_element(scalar_element(Vc.ufl_element())))) + fises = PETSc.IS().createGeneral(fdofs, comm=PETSc.COMM_SELF) + cises = PETSc.IS().createGeneral(cdofs, comm=PETSc.COMM_SELF) + temp = Dhat + Dhat = temp.createSubMatrix(fises, cises) + temp.destroy() + fises.destroy() + cises.destroy() + if Vf.value_size > 1: temp = Dhat eye = petsc_sparse(numpy.eye(Vf.value_size, dtype=PETSc.RealType)) @@ -1035,7 +1033,7 @@ def cell_to_global(lgmap, cell_to_local, e, result=None): return lgmap.apply(result, result=result) imode = PETSc.InsertMode.INSERT - update_Dmat = FDMPC.load_set_values() + update_Dmat = FDMPC.setSubMatCSR(PETSc.COMM_SELF, triu=False) sizes = tuple(V.dof_dset.layout_vec.getSizes() for V in (Vf, Vc)) block_size = Vf.dof_dset.layout_vec.getBlockSize() @@ -1068,7 +1066,8 @@ def cell_to_global(lgmap, cell_to_local, e, result=None): def unrestrict_element(ele): - # Get an element that might or might not be restricted and return the parent unrestricted element. + """Get an element that might or might not be restricted and + return the parent unrestricted element.""" if isinstance(ele, ufl.VectorElement): return type(ele)(unrestrict_element(ele._sub_element), dim=ele.num_sub_elements()) elif isinstance(ele, ufl.TensorElement): @@ -1136,7 +1135,7 @@ def assemble_reference_tensor(self, V): raise ValueError("FDMPC does not support the element %s" % V.ufl_element()) line_elements, = line_elements - self.axes_shifts, = shifts + axes_shifts, = shifts degree = max(e.degree() for e in line_elements) eta = float(self.appctx.get("eta", degree*(degree+1))) @@ -1151,7 +1150,7 @@ def assemble_reference_tensor(self, V): if not is_dg and e.degree() == degree: # do not apply SIPG along continuous directions Dfdm[0] = None - return Afdm, Dfdm, bdof + return Afdm, Dfdm, bdof, axes_shifts @PETSc.Log.EventDecorator("FDMSetValues") def set_values(self, A, Vrow, Vcol, addv, triu=False): @@ -1164,14 +1163,16 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): :arg addv: a `PETSc.Mat.InsertMode` :arg triu: are we assembling only the upper triangular part? """ - set_values_csr = self.load_set_values(triu=triu) - update_A = lambda A, Ae, rindices: set_values_csr(A, Ae, rindices, rindices, addv) + set_submat = self.setSubMatCSR(PETSc.COMM_SELF, triu=triu) + update_A = lambda A, Ae, rindices: set_submat(A, Ae, rindices, rindices, addv) condense_element_mat = lambda x: x get_rindices = self.cell_to_global[Vrow] - rtensor = self.reference_tensor_on_diag.get(Vrow) or self.assemble_reference_tensor(Vrow) - self.reference_tensor_on_diag[Vrow] = rtensor - Afdm, Dfdm, bdof = rtensor + try: + rtensor = self.reference_tensor_on_diag[Vrow] + except KeyError: + rtensor = self.reference_tensor_on_diag.setdefault(Vrow, self.assemble_reference_tensor(Vrow)) + Afdm, Dfdm, bdof, axes_shifts = rtensor Gq = self.coefficients.get("alpha") Bq = self.coefficients.get("beta") @@ -1184,7 +1185,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): ncomp = V.ufl_element().reference_value_size() sdim = (V.finat_element.space_dimension() * bsize) // ncomp # dimension of a single component tdim = V.mesh().topological_dimension() - shift = self.axes_shifts * bsize + shift = axes_shifts * bsize index_coef, _ = extrude_node_map((Gq or Bq).cell_node_map()) index_bc, _ = extrude_node_map(bcflags.cell_node_map()) @@ -1322,8 +1323,8 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): continue if PT_facet: - k0 = iord0[k] if shift != 1 else tdim-1-iord0[-k-1] - k1 = iord1[k] if shift != 1 else tdim-1-iord1[-k-1] + k0 = iord0[k] if shift[1] != 1 else tdim-1-iord0[-k-1] + k1 = iord1[k] if shift[1] != 1 else tdim-1-iord1[-k-1] Piola = Pfacet[[0, 1], [k0, k1]] mu = Gfacet[[0, 1], idir] else: @@ -1377,7 +1378,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): Ae.destroy() @PETSc.Log.EventDecorator("FDMCoefficients") - def assemble_coef(self, J, form_compiler_parameters, discard_mixed=True, cell_average=True): + def assemble_coef(self, J, form_compiler_parameters): coefficients = {} assembly_callables = [] @@ -1396,12 +1397,8 @@ def assemble_coef(self, J, form_compiler_parameters, discard_mixed=True, cell_av quad_deg = (form_compiler_parameters or {}).get("degree", quad_deg) dx = firedrake.dx(degree=quad_deg) - if cell_average: - family = "Discontinuous Lagrange" if tdim == 1 else "DQ" - degree = 0 - else: - family = "Quadrature" - degree = quad_deg + family = "Discontinuous Lagrange" if tdim == 1 else "DQ" + degree = 0 # extract coefficients directly from the bilinear form integrals_J = J.integrals_by_type("cell") @@ -1432,18 +1429,15 @@ def assemble_coef(self, J, form_compiler_parameters, discard_mixed=True, cell_av if Piola: beta = ufl.replace(beta, {dummy_Piola: Piola}) + # discard mixed derivatives and mixed components G = alpha - if discard_mixed: - # discard mixed derivatives and mixed components - if len(G.ufl_shape) == 2: - G = ufl.diag_vector(G) - else: - Gshape = G.ufl_shape - Gshape = Gshape[:len(Gshape)//2] - G = ufl.as_tensor(numpy.reshape([G[i+i] for i in numpy.ndindex(Gshape)], (Gshape[0], -1))) - Qe = ufl.TensorElement(family, mesh.ufl_cell(), degree=degree, quad_scheme="default", shape=G.ufl_shape) + if len(G.ufl_shape) == 2: + G = ufl.diag_vector(G) else: - Qe = ufl.TensorElement(family, mesh.ufl_cell(), degree=degree, quad_scheme="default", shape=G.ufl_shape, symmetry=True) + Gshape = G.ufl_shape + Gshape = Gshape[:len(Gshape)//2] + G = ufl.as_tensor(numpy.reshape([G[i+i] for i in numpy.ndindex(Gshape)], (Gshape[0], -1))) + Qe = ufl.TensorElement(family, mesh.ufl_cell(), degree=degree, quad_scheme="default", shape=G.ufl_shape) # assemble second order coefficient if not isinstance(alpha, ufl.constantvalue.Zero): diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 8446d5aa3e..1fb2e267da 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -1481,8 +1481,8 @@ def _weight(self): def _standalones(self): standalones = [] for i, (uc_sub, uf_sub) in enumerate(zip(self.uc.subfunctions, self.uf.subfunctions)): - Vc_sub_bcs = [bc for bc in self.Vc_bcs if bc.function_space().index == i] - Vf_sub_bcs = [bc for bc in self.Vf_bcs if bc.function_space().index == i] + Vc_sub_bcs = tuple(bc for bc in self.Vc_bcs if bc.function_space().index == i) + Vf_sub_bcs = tuple(bc for bc in self.Vf_bcs if bc.function_space().index == i) standalone = StandaloneInterpolationMatrix(uc_sub, uf_sub, Vc_sub_bcs, Vf_sub_bcs) standalones.append(standalone) return standalones From 1d754e53b8b5392c7558a3ffee66e2dd99752b7a Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 23 Mar 2023 16:05:20 +0000 Subject: [PATCH 43/75] more careful imports from firedrake --- firedrake/preconditioners/fdm.py | 69 +++++++++++++++++--------------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 862b80038c..dcb6f9aedb 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -8,6 +8,10 @@ get_permutation_to_line_elements) from firedrake.preconditioners.facet_split import split_dofs, restricted_dofs from firedrake.formmanipulation import ExtractSubBlock +from firedrake.function import Function +from firedrake.functionspace import FunctionSpace +from firedrake.ufl_expr import TestFunction, TestFunctions, TrialFunctions + from firedrake_citations import Citations from pyop2.compilation import load from pyop2.utils import get_petsc_dir @@ -17,7 +21,6 @@ from ufl.algorithms.expand_indices import expand_indices import firedrake.dmhooks as dmhooks -import firedrake import ctypes import numpy import ufl @@ -140,7 +143,7 @@ def initialize(self, pc): V_fdm, J_fdm, bcs_fdm = (V, J, bcs) else: # Reconstruct Jacobian and bcs with variant element - V_fdm = firedrake.FunctionSpace(V.mesh(), e_fdm) + V_fdm = FunctionSpace(V.mesh(), e_fdm) J_fdm = J(*[t.reconstruct(function_space=V_fdm) for t in J.arguments()], coefficients={}) bcs_fdm = [] for bc in bcs: @@ -221,7 +224,7 @@ def assemble_fdm_op(self, V, J, bcs, form_compiler_parameters, pmat_type, use_st elif len(ifacet) == 1: Vfacet = V[ifacet[0]] ebig, = set(unrestrict_element(Vsub.ufl_element()) for Vsub in V) - Vbig = firedrake.FunctionSpace(V.mesh(), ebig) + Vbig = FunctionSpace(V.mesh(), ebig) if len(V) > 1: dims = [Vsub.finat_element.space_dimension() for Vsub in V] assert sum(dims) == Vbig.finat_element.space_dimension() @@ -578,10 +581,10 @@ def assemble_coef(self, J, form_compiler_parameters): elements = list(map(ufl.BrokenElement, elements)) if V.shape: elements = [ufl.TensorElement(ele, shape=V.shape) for ele in elements] - Z = firedrake.FunctionSpace(mesh, ufl.MixedElement(elements)) + Z = FunctionSpace(mesh, ufl.MixedElement(elements)) # Transform the exterior derivative and the original arguments of J to arguments in Z - args = (firedrake.TestFunctions(Z), firedrake.TrialFunctions(Z)) + args = (TestFunctions(Z), TrialFunctions(Z)) repargs = {t: v[0] for t, v in zip(args_J, args)} repgrad = {ufl.grad(t): map_grad(v[1]) for t, v in zip(args_J, args)} if map_grad else {} Jcell = expand_indices(expand_derivatives(ufl.Form(J.integrals_by_type("cell")))) @@ -594,9 +597,10 @@ def assemble_coef(self, J, form_compiler_parameters): try: return cache[key] except KeyError: + from firedrake.assemble import assemble if block_diagonal and V.shape: - M = firedrake.assemble(mixed_form, mat_type="matfree", - form_compiler_parameters=form_compiler_parameters) + M = assemble(mixed_form, mat_type="matfree", + form_compiler_parameters=form_compiler_parameters) coefficients = {} assembly_callables = [] for iset, name in zip(Z.dof_dset.field_ises, ("beta", "alpha")): @@ -605,9 +609,9 @@ def assemble_coef(self, J, form_compiler_parameters): coefficients[name] = ctx._block_diagonal assembly_callables.append(ctx._assemble_block_diagonal) else: - tensor = firedrake.Function(Z) + tensor = Function(Z) coefficients = {"beta": tensor.sub(0), "alpha": tensor.sub(1)} - assembly_callables = [partial(firedrake.assemble, mixed_form, tensor=tensor, diagonal=True, + assembly_callables = [partial(assemble, mixed_form, tensor=tensor, diagonal=True, form_compiler_parameters=form_compiler_parameters)] return cache.setdefault(key, (coefficients, assembly_callables)) @@ -1379,6 +1383,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): @PETSc.Log.EventDecorator("FDMCoefficients") def assemble_coef(self, J, form_compiler_parameters): + from firedrake.assemble import assemble coefficients = {} assembly_callables = [] @@ -1395,7 +1400,7 @@ def assemble_coef(self, J, form_compiler_parameters): pass quad_deg = 2*degree+1 quad_deg = (form_compiler_parameters or {}).get("degree", quad_deg) - dx = firedrake.dx(degree=quad_deg) + dx = ufl.dx(degree=quad_deg) family = "Discontinuous Lagrange" if tdim == 1 else "DQ" degree = 0 @@ -1441,11 +1446,11 @@ def assemble_coef(self, J, form_compiler_parameters): # assemble second order coefficient if not isinstance(alpha, ufl.constantvalue.Zero): - Q = firedrake.FunctionSpace(mesh, Qe) - q = firedrake.TestFunction(Q) - Gq = firedrake.Function(Q) + Q = FunctionSpace(mesh, Qe) + q = TestFunction(Q) + Gq = Function(Q) coefficients["alpha"] = Gq - assembly_callables.append(partial(firedrake.assemble, ufl.inner(G, q)*dx, Gq)) + assembly_callables.append(partial(assemble, ufl.inner(G, q)*dx, Gq)) # assemble zero-th order coefficient if not isinstance(beta, ufl.constantvalue.Zero): @@ -1456,17 +1461,17 @@ def assemble_coef(self, J, form_compiler_parameters): Qe = ufl.FiniteElement(family, mesh.ufl_cell(), degree=degree, quad_scheme="default") if shape: Qe = ufl.TensorElement(Qe, shape=shape) - Q = firedrake.FunctionSpace(mesh, Qe) - q = firedrake.TestFunction(Q) - Bq = firedrake.Function(Q) + Q = FunctionSpace(mesh, Qe) + q = TestFunction(Q) + Bq = Function(Q) coefficients["beta"] = Bq - assembly_callables.append(partial(firedrake.assemble, ufl.inner(beta, q)*dx, Bq)) + assembly_callables.append(partial(assemble, ufl.inner(beta, q)*dx, Bq)) if Piola: # make DGT functions with the second order coefficient # and the Piola tensor for each side of each facet extruded = mesh.cell_set._extruded - dS_int = firedrake.dS_h(degree=quad_deg) + firedrake.dS_v(degree=quad_deg) if extruded else firedrake.dS(degree=quad_deg) + dS_int = ufl.dS_h(degree=quad_deg) + ufl.dS_v(degree=quad_deg) if extruded else ufl.dS(degree=quad_deg) ele = ufl.BrokenElement(ufl.FiniteElement("DGT", mesh.ufl_cell(), 0)) area = ufl.FacetArea(mesh) @@ -1477,18 +1482,18 @@ def assemble_coef(self, J, form_compiler_parameters): G = vol * alpha G = ufl.as_tensor([[[G[i, k, j, k] for i in range(G.ufl_shape[0])] for j in range(G.ufl_shape[2])] for k in range(G.ufl_shape[3])]) - Q = firedrake.TensorFunctionSpace(mesh, ele, shape=G.ufl_shape) - q = firedrake.TestFunction(Q) - Gq_facet = firedrake.Function(Q) + Q = FunctionSpace(mesh, ufl.TensorElement(ele, shape=G.ufl_shape)) + q = TestFunction(Q) + Gq_facet = Function(Q) coefficients["Gq_facet"] = Gq_facet - assembly_callables.append(partial(firedrake.assemble, ((ufl.inner(q('+'), G('+')) + ufl.inner(q('-'), G('-')))/area)*dS_int, Gq_facet)) + assembly_callables.append(partial(assemble, ((ufl.inner(q('+'), G('+')) + ufl.inner(q('-'), G('-')))/area)*dS_int, Gq_facet)) PT = Piola.T - Q = firedrake.TensorFunctionSpace(mesh, ele, shape=PT.ufl_shape) - q = firedrake.TestFunction(Q) - PT_facet = firedrake.Function(Q) + Q = FunctionSpace(mesh, ufl.TensorElement(ele, shape=PT.ufl_shape)) + q = TestFunction(Q) + PT_facet = Function(Q) coefficients["PT_facet"] = PT_facet - assembly_callables.append(partial(firedrake.assemble, ((ufl.inner(q('+'), PT('+')) + ufl.inner(q('-'), PT('-')))/area)*dS_int, PT_facet)) + assembly_callables.append(partial(assemble, ((ufl.inner(q('+'), PT('+')) + ufl.inner(q('-'), PT('-')))/area)*dS_int, PT_facet)) # make DGT functions with BC flags rvs = V.ufl_element().reference_value_shape() @@ -1498,9 +1503,9 @@ def assemble_coef(self, J, form_compiler_parameters): Qe = ufl.FiniteElement(family, cell=cell, degree=degree) if rvs: Qe = ufl.TensorElement(Qe, shape=rvs) - Q = firedrake.FunctionSpace(mesh, Qe) - q = firedrake.TestFunction(Q) - bcflags = firedrake.Function(Q) + Q = FunctionSpace(mesh, Qe) + q = TestFunction(Q) + bcflags = Function(Q) ref_args = [ufl.variable(t) for t in args_J] replace_args = {t: s for t, s in zip(args_J, ref_args)} @@ -1520,7 +1525,7 @@ def assemble_coef(self, J, form_compiler_parameters): if len(forms): form = sum(forms) if len(form.arguments()) == 1: - assembly_callables.append(partial(firedrake.assemble, form, bcflags)) + assembly_callables.append(partial(assemble, form, bcflags)) coefficients["bcflags"] = bcflags # set arbitrary non-zero coefficients for preallocation @@ -1643,7 +1648,7 @@ def get_interior_facet_maps(V): local_facet_data_fun: maps interior facets to the local facet numbering in the two cells sharing it, nfacets: the total number of interior facets owned by this process """ - if isinstance(V, firedrake.Function): + if isinstance(V, Function): V = V.function_space() mesh = V.mesh() intfacets = mesh.interior_facets From a9dbd8a44eb28baf2c0a2036800ea438db74c4fc Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 23 Mar 2023 18:21:35 +0000 Subject: [PATCH 44/75] mantain old code --- firedrake/preconditioners/fdm.py | 79 ++++++++++++++------------------ 1 file changed, 34 insertions(+), 45 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index dcb6f9aedb..09c7cc730a 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -111,7 +111,7 @@ def initialize(self, pc): pmat_type = options.getString("mat_type", PETSc.Mat.Type.AIJ) appctx = self.get_appctx(pc) - fcp = appctx.get("form_compiler_parameters") + fcp = appctx.get("form_compiler_parameters") or {} self.appctx = appctx # Get original Jacobian form and bcs @@ -203,14 +203,14 @@ def initialize(self, pc): fdmpc.setFromOptions() @PETSc.Log.EventDecorator("FDMPrealloc") - def assemble_fdm_op(self, V, J, bcs, form_compiler_parameters, pmat_type, use_static_condensation): + def assemble_fdm_op(self, V, J, bcs, fcp, pmat_type, use_static_condensation): """ Assemble the sparse preconditioner from diagonal mass matrices. :arg V: the :class:`.FunctionSpace` of the form arguments :arg J: the Jacobian bilinear form :arg bcs: an iterable of boundary conditions on V - :arg form_compiler_parameters: parameters to assemble diagonal factors + :arg fcp: form compiler parameters to assemble coefficients :arg pmat_type: the preconditioner `PETSc.Mat.Type` :arg use_static_condensation: are we assembling the statically-condensed Schur complement on facets? @@ -273,7 +273,7 @@ def cell_to_global(lgmap, cell_to_local, cell_index, result=None): bdofs = numpy.nonzero(lgmap.indices[:own] < 0)[0].astype(PETSc.IntType) bc_rows[Vsub] = Vsub.dof_dset.lgmap.apply(bdofs, result=bdofs) - coefficients, assembly_callables = self.assemble_coef(J, form_compiler_parameters) + coefficients, assembly_callables = self.assemble_coefficients(J, fcp) coeffs = [coefficients.get(k) for k in ("beta", "alpha")] cmaps = [extrude_node_map(ck.cell_node_map())[0] for ck in coeffs] @@ -508,14 +508,14 @@ def update_De(data): RtAP.buff.destroy() @PETSc.Log.EventDecorator("FDMCoefficients") - def assemble_coef(self, J, form_compiler_parameters): + def assemble_coefficients(self, J, fcp): """ Obtain coefficients for the auxiliary operator as the diagonal of a weighted mass matrix in broken(V^k) * broken(V^{k+1}). See Section 3.2 of Brubeck2022b. :arg J: the Jacobian bilinear :class:`ufl.Form`, - :arg form_compiler_parameters: a `dict` with tsfc parameters. + :arg fcp: form compiler parameters to assemble the diagonal matrices. :returns: a 2-tuple of a `dict` with the zero-th order and second order coefficients keyed on ``"beta"`` and ``"alpha"``, @@ -600,7 +600,7 @@ def assemble_coef(self, J, form_compiler_parameters): from firedrake.assemble import assemble if block_diagonal and V.shape: M = assemble(mixed_form, mat_type="matfree", - form_compiler_parameters=form_compiler_parameters) + form_compiler_parameters=fcp) coefficients = {} assembly_callables = [] for iset, name in zip(Z.dof_dset.field_ises, ("beta", "alpha")): @@ -612,7 +612,7 @@ def assemble_coef(self, J, form_compiler_parameters): tensor = Function(Z) coefficients = {"beta": tensor.sub(0), "alpha": tensor.sub(1)} assembly_callables = [partial(assemble, mixed_form, tensor=tensor, diagonal=True, - form_compiler_parameters=form_compiler_parameters)] + form_compiler_parameters=fcp)] return cache.setdefault(key, (coefficients, assembly_callables)) @PETSc.Log.EventDecorator("FDMRefTensor") @@ -1207,7 +1207,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): # assemble zero-th order term separately, including off-diagonals (mixed components) # I cannot do this for hdiv elements as off-diagonals are not sparse, this is because - # the FDM eigenbases for CG(k) and DG(k-1) are not orthogonal to each other + # the FDM eigenbases for CG(k) and CG(k-1) are not orthogonal to each other rindices = None use_diag_Bq = Bq is None or len(Bq.ufl_shape) != 2 or static_condensation if not use_diag_Bq: @@ -1246,10 +1246,10 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): # get second order coefficient on this cell if Gq is not None: - mue.flat[:] = numpy.sum(Gq.dat.data_ro[je], axis=0) + numpy.sum(Gq.dat.data_ro[je], axis=0, out=mue) # get zero-th order coefficient on this cell if Bq is not None: - bqe.flat[:] = numpy.sum(Bq.dat.data_ro[je], axis=0) + numpy.sum(Bq.dat.data_ro[je], axis=0, out=bqe) for k in range(ncomp): # permutation of axes with respect to the first vector component @@ -1382,7 +1382,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): Ae.destroy() @PETSc.Log.EventDecorator("FDMCoefficients") - def assemble_coef(self, J, form_compiler_parameters): + def assemble_coefficients(self, J, fcp): from firedrake.assemble import assemble coefficients = {} assembly_callables = [] @@ -1399,11 +1399,9 @@ def assemble_coef(self, J, form_compiler_parameters): except TypeError: pass quad_deg = 2*degree+1 - quad_deg = (form_compiler_parameters or {}).get("degree", quad_deg) - dx = ufl.dx(degree=quad_deg) - + quad_deg = fcp.get("degree", quad_deg) + dx = ufl.dx(degree=quad_deg, domain=mesh) family = "Discontinuous Lagrange" if tdim == 1 else "DQ" - degree = 0 # extract coefficients directly from the bilinear form integrals_J = J.integrals_by_type("cell") @@ -1435,22 +1433,19 @@ def assemble_coef(self, J, form_compiler_parameters): beta = ufl.replace(beta, {dummy_Piola: Piola}) # discard mixed derivatives and mixed components - G = alpha - if len(G.ufl_shape) == 2: - G = ufl.diag_vector(G) + if len(alpha.ufl_shape) == 2: + alpha = ufl.diag_vector(alpha) else: - Gshape = G.ufl_shape - Gshape = Gshape[:len(Gshape)//2] - G = ufl.as_tensor(numpy.reshape([G[i+i] for i in numpy.ndindex(Gshape)], (Gshape[0], -1))) - Qe = ufl.TensorElement(family, mesh.ufl_cell(), degree=degree, quad_scheme="default", shape=G.ufl_shape) + ashape = alpha.ufl_shape + ashape = ashape[:len(ashape)//2] + alpha = ufl.as_tensor(numpy.reshape([alpha[i+i] for i in numpy.ndindex(ashape)], (ashape[0], -1))) + Qe = ufl.TensorElement(family, mesh.ufl_cell(), degree=0, shape=alpha.ufl_shape) # assemble second order coefficient if not isinstance(alpha, ufl.constantvalue.Zero): Q = FunctionSpace(mesh, Qe) - q = TestFunction(Q) - Gq = Function(Q) - coefficients["alpha"] = Gq - assembly_callables.append(partial(assemble, ufl.inner(G, q)*dx, Gq)) + tensor = coefficients.setdefault("alpha", Function(Q)) + assembly_callables.append(partial(assemble, ufl.inner(TestFunction(Q), alpha)*dx, tensor)) # assemble zero-th order coefficient if not isinstance(beta, ufl.constantvalue.Zero): @@ -1458,42 +1453,36 @@ def assemble_coef(self, J, form_compiler_parameters): # keep diagonal beta = ufl.diag_vector(beta) shape = beta.ufl_shape - Qe = ufl.FiniteElement(family, mesh.ufl_cell(), degree=degree, quad_scheme="default") + Qe = ufl.FiniteElement(family, mesh.ufl_cell(), degree=0) if shape: Qe = ufl.TensorElement(Qe, shape=shape) Q = FunctionSpace(mesh, Qe) - q = TestFunction(Q) - Bq = Function(Q) - coefficients["beta"] = Bq - assembly_callables.append(partial(assemble, ufl.inner(beta, q)*dx, Bq)) + tensor = coefficients.setdefault("beta", Function(Q)) + assembly_callables.append(partial(assemble, ufl.inner(TestFunction(Q), beta)*dx, tensor)) if Piola: # make DGT functions with the second order coefficient # and the Piola tensor for each side of each facet extruded = mesh.cell_set._extruded dS_int = ufl.dS_h(degree=quad_deg) + ufl.dS_v(degree=quad_deg) if extruded else ufl.dS(degree=quad_deg) - ele = ufl.BrokenElement(ufl.FiniteElement("DGT", mesh.ufl_cell(), 0)) - area = ufl.FacetArea(mesh) + ifacet_inner = lambda v, u: ((ufl.inner(v('+'), u('+')) + ufl.inner(v('-'), u('-')))/ufl.FacetArea(mesh))*dS_int replace_grad = {ufl.grad(t): ufl.dot(dt, Finv) for t, dt in zip(args_J, ref_grad)} alpha = expand_derivatives(sum([ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_grad), ref_grad[0]), ref_grad[1]) for i in integrals_J])) - vol = abs(ufl.JacobianDeterminant(mesh)) - G = vol * alpha + G = alpha G = ufl.as_tensor([[[G[i, k, j, k] for i in range(G.ufl_shape[0])] for j in range(G.ufl_shape[2])] for k in range(G.ufl_shape[3])]) + G = G * abs(ufl.JacobianDeterminant(mesh)) + ele = ufl.BrokenElement(ufl.FiniteElement("DGT", cell=mesh.ufl_cell(), degree=0)) Q = FunctionSpace(mesh, ufl.TensorElement(ele, shape=G.ufl_shape)) - q = TestFunction(Q) - Gq_facet = Function(Q) - coefficients["Gq_facet"] = Gq_facet - assembly_callables.append(partial(assemble, ((ufl.inner(q('+'), G('+')) + ufl.inner(q('-'), G('-')))/area)*dS_int, Gq_facet)) + tensor = coefficients.setdefault("Gq_facet", Function(Q)) + assembly_callables.append(partial(assemble, ifacet_inner(TestFunction(Q), G), tensor)) PT = Piola.T Q = FunctionSpace(mesh, ufl.TensorElement(ele, shape=PT.ufl_shape)) - q = TestFunction(Q) - PT_facet = Function(Q) - coefficients["PT_facet"] = PT_facet - assembly_callables.append(partial(assemble, ((ufl.inner(q('+'), PT('+')) + ufl.inner(q('-'), PT('-')))/area)*dS_int, PT_facet)) + tensor = coefficients.setdefault("PT_facet", Function(Q)) + assembly_callables.append(partial(assemble, ifacet_inner(TestFunction(Q), PT), tensor)) # make DGT functions with BC flags rvs = V.ufl_element().reference_value_shape() @@ -1525,8 +1514,8 @@ def assemble_coef(self, J, form_compiler_parameters): if len(forms): form = sum(forms) if len(form.arguments()) == 1: - assembly_callables.append(partial(assemble, form, bcflags)) coefficients["bcflags"] = bcflags + assembly_callables.append(partial(assemble, form, bcflags)) # set arbitrary non-zero coefficients for preallocation for coef in coefficients.values(): From ab144a25e72886dda15ff4c4af7a49412bce4492 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Sat, 25 Mar 2023 09:28:54 +0000 Subject: [PATCH 45/75] use OneFormAssembler, compute reference tensor via dual evaluation --- firedrake/preconditioners/fdm.py | 50 ++++++++++++++------------------ 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 09c7cc730a..7e410a6411 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -508,14 +508,15 @@ def update_De(data): RtAP.buff.destroy() @PETSc.Log.EventDecorator("FDMCoefficients") - def assemble_coefficients(self, J, fcp): + def assemble_coefficients(self, J, fcp, block_diagonal=True): """ Obtain coefficients for the auxiliary operator as the diagonal of a weighted mass matrix in broken(V^k) * broken(V^{k+1}). See Section 3.2 of Brubeck2022b. :arg J: the Jacobian bilinear :class:`ufl.Form`, - :arg fcp: form compiler parameters to assemble the diagonal matrices. + :arg fcp: form compiler parameters to assemble the diagonal of the mass matrices. + :arg block_diagonal: are we assembling the block diagonal of the mass matrices? :returns: a 2-tuple of a `dict` with the zero-th order and second order coefficients keyed on ``"beta"`` and ``"alpha"``, @@ -593,12 +594,11 @@ def assemble_coefficients(self, J, fcp): # Return coefficients and assembly callables, and cache them class key = (mixed_form.signature(), mesh) cache = self._cache.setdefault("coefficients", {}) - block_diagonal = True try: return cache[key] except KeyError: - from firedrake.assemble import assemble if block_diagonal and V.shape: + from firedrake.assemble import assemble M = assemble(mixed_form, mat_type="matfree", form_compiler_parameters=fcp) coefficients = {} @@ -609,10 +609,11 @@ def assemble_coefficients(self, J, fcp): coefficients[name] = ctx._block_diagonal assembly_callables.append(ctx._assemble_block_diagonal) else: + from firedrake.assemble import OneFormAssembler tensor = Function(Z) coefficients = {"beta": tensor.sub(0), "alpha": tensor.sub(1)} - assembly_callables = [partial(assemble, mixed_form, tensor=tensor, diagonal=True, - form_compiler_parameters=fcp)] + assembly_callables = [OneFormAssembler(mixed_form, tensor=tensor, diagonal=True, + form_compiler_parameters=fcp).assemble] return cache.setdefault(key, (coefficients, assembly_callables)) @PETSc.Log.EventDecorator("FDMRefTensor") @@ -657,24 +658,10 @@ def assemble_reference_tensor(self, V): e1 = elements[-1] if elements[-1].formdegree == 1 else FIAT.FDMDiscontinuousLagrange(ref_el, degree-1) if is_interior: e0 = FIAT.RestrictedElement(e0, restriction_domain="interior") - if hasattr(eq.dual, "rule"): - rule = eq.dual.rule - else: - rule = FIAT.quadrature.make_quadrature(ref_el, degree+1) - - pts = rule.get_points() - wts = rule.get_weights() - - phiq = eq.tabulate(0, pts) - phi1 = e1.tabulate(0, pts) - phi0 = e0.tabulate(1, pts) - moments = lambda v, u: numpy.dot(numpy.multiply(v, wts), u.T) - A00 = moments(phiq[(0, )], phi0[(0, )]) - A11 = moments(phi1[(0, )], phi1[(0, )]) - A10 = moments(phi1[(0, )], phi0[(1, )]) - A10 = numpy.linalg.solve(A11, A10) - A11 = numpy.eye(A11.shape[0]) + A00 = fiat_reference_prolongator(e0, eq) + A10 = fiat_reference_prolongator(e0, e1, derivative=True) + A11 = numpy.eye(e1.space_dimension(), dtype=A00.dtype) B_blocks = mass_blocks(tdim, formdegree, A00, A11) A_blocks = diff_blocks(tdim, formdegree, A00, A11, A10) @@ -1383,7 +1370,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): @PETSc.Log.EventDecorator("FDMCoefficients") def assemble_coefficients(self, J, fcp): - from firedrake.assemble import assemble + from firedrake.assemble import OneFormAssembler coefficients = {} assembly_callables = [] @@ -1445,7 +1432,8 @@ def assemble_coefficients(self, J, fcp): if not isinstance(alpha, ufl.constantvalue.Zero): Q = FunctionSpace(mesh, Qe) tensor = coefficients.setdefault("alpha", Function(Q)) - assembly_callables.append(partial(assemble, ufl.inner(TestFunction(Q), alpha)*dx, tensor)) + assembly_callables.append(OneFormAssembler(ufl.inner(TestFunction(Q), alpha)*dx, tensor=tensor, + form_compiler_parameters=fcp).assemble) # assemble zero-th order coefficient if not isinstance(beta, ufl.constantvalue.Zero): @@ -1458,7 +1446,8 @@ def assemble_coefficients(self, J, fcp): Qe = ufl.TensorElement(Qe, shape=shape) Q = FunctionSpace(mesh, Qe) tensor = coefficients.setdefault("beta", Function(Q)) - assembly_callables.append(partial(assemble, ufl.inner(TestFunction(Q), beta)*dx, tensor)) + assembly_callables.append(OneFormAssembler(ufl.inner(TestFunction(Q), beta)*dx, tensor=tensor, + form_compiler_parameters=fcp).assemble) if Piola: # make DGT functions with the second order coefficient @@ -1477,12 +1466,14 @@ def assemble_coefficients(self, J, fcp): ele = ufl.BrokenElement(ufl.FiniteElement("DGT", cell=mesh.ufl_cell(), degree=0)) Q = FunctionSpace(mesh, ufl.TensorElement(ele, shape=G.ufl_shape)) tensor = coefficients.setdefault("Gq_facet", Function(Q)) - assembly_callables.append(partial(assemble, ifacet_inner(TestFunction(Q), G), tensor)) + assembly_callables.append(OneFormAssembler(ifacet_inner(TestFunction(Q), G), tensor=tensor, + form_compiler_parameters=fcp).assemble) PT = Piola.T Q = FunctionSpace(mesh, ufl.TensorElement(ele, shape=PT.ufl_shape)) tensor = coefficients.setdefault("PT_facet", Function(Q)) - assembly_callables.append(partial(assemble, ifacet_inner(TestFunction(Q), PT), tensor)) + assembly_callables.append(OneFormAssembler(ifacet_inner(TestFunction(Q), PT), tensor=tensor, + form_compiler_parameters=fcp).assemble) # make DGT functions with BC flags rvs = V.ufl_element().reference_value_shape() @@ -1515,7 +1506,8 @@ def assemble_coefficients(self, J, fcp): form = sum(forms) if len(form.arguments()) == 1: coefficients["bcflags"] = bcflags - assembly_callables.append(partial(assemble, form, bcflags)) + assembly_callables.append(OneFormAssembler(form, tensor=bcflags, + form_compiler_parameters=fcp).assemble) # set arbitrary non-zero coefficients for preallocation for coef in coefficients.values(): From bfcdbb885584f788f1c0efad05dcaf4c7ea6d565 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Sat, 25 Mar 2023 13:05:51 +0000 Subject: [PATCH 46/75] optimise assembly --- firedrake/preconditioners/fdm.py | 88 +++++++++++++++++++------------- 1 file changed, 52 insertions(+), 36 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 7e410a6411..8aa109c225 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -238,14 +238,17 @@ def assemble_fdm_op(self, V, J, bcs, fcp, pmat_type, use_static_condensation): dofs = numpy.arange(value_size * Vbig.finat_element.space_dimension(), dtype=fdofs.dtype) idofs = numpy.setdiff1d(dofs, fdofs, assume_unique=True) self.ises = tuple(PETSc.IS().createGeneral(indices, comm=PETSc.COMM_SELF) for indices in (idofs, fdofs)) - self.submats = [None for _ in range(7)] + self.submats = [None for _ in range(8)] self.reference_tensor_on_diag = {} self.get_static_condensation = {} if Vfacet and use_static_condensation: # If we are in a facet space, we build the Schur complement on its diagonal block + diagonal_interior = Vfacet.finat_element.formdegree == 0 and value_size == 1 + factor = factor_diagonal_mat if diagonal_interior else factor_block_diagonal_mat self.reference_tensor_on_diag[Vfacet] = self.assemble_reference_tensor(Vbig) - self.get_static_condensation[Vfacet] = lambda A: condense_element_mat(A, self.ises[0], self.ises[1], self.submats) + self.get_static_condensation[Vfacet] = lambda A: condense_element_mat(A, self.ises[0], self.ises[1], + self.submats, factor) elif len(fdofs) and V.finat_element.formdegree == 0: # If we are in H(grad), we just pad with zeros on the statically-condensed pattern @@ -272,23 +275,24 @@ def cell_to_global(lgmap, cell_to_local, cell_index, result=None): own = Vsub.dof_dset.layout_vec.getLocalSize() bdofs = numpy.nonzero(lgmap.indices[:own] < 0)[0].astype(PETSc.IntType) bc_rows[Vsub] = Vsub.dof_dset.lgmap.apply(bdofs, result=bdofs) + self.nel = nel coefficients, assembly_callables = self.assemble_coefficients(J, fcp) - coeffs = [coefficients.get(k) for k in ("beta", "alpha")] - cmaps = [extrude_node_map(ck.cell_node_map())[0] for ck in coeffs] + coeffs = [coefficients.get(name) for name in ("beta", "alpha")] + cdata = [c.dat.data_ro for c in coeffs] + cmaps = [extrude_node_map(c.cell_node_map())[0] for c in coeffs] + cindices = [cmap(0) if self.nel else None for cmap in cmaps] @PETSc.Log.EventDecorator("FDMGetCoeffs") def get_coeffs(e, result=None): # Get vector for betas and alphas on a cell - vals = [] - for k, (coeff, cmap) in enumerate(zip(coeffs, cmaps)): - get_coeffs.indices[k] = cmap(e, result=get_coeffs.indices[k]) - vals.append(coeff.dat.data_ro[get_coeffs.indices[k]]) - return numpy.concatenate(vals, out=result) - get_coeffs.indices = [None for _ in range(len(coeffs))] - self.get_coeffs = get_coeffs + if result is None: + return numpy.concatenate([c[cmap(e, result=idx)] for c, cmap, idx in zip(cdata, cmaps, cindices)], out=result) + numpy.take(cdata[0], cmaps[0](e, result=cindices[0]), axis=0, out=result[:cindices[0].size]) + numpy.take(cdata[1], cmaps[1](e, result=cindices[1]), axis=0, out=result[cindices[0].size:]) + return result - self.nel = nel + self.get_coeffs = get_coeffs self.work_mats = {} Pmats = {} @@ -446,28 +450,28 @@ def RtAP(R, A, P, result=None): if A.getType() != PETSc.Mat.Type.PREALLOCATOR: Ae = self.work_mats[Vrow, Vcol] De = self.work_mats[common_key] - data = self.work_csr[2] insert = PETSc.InsertMode.INSERT work_vec = De.getDiagonal() + data = self.work_csr[2] if len(data.shape) == 3: @PETSc.Log.EventDecorator("FDMUpdateDiag") - def update_De(data): + def update_De(): De.setValuesCSR(*self.work_csr, addv=insert) De.assemble() return De else: @PETSc.Log.EventDecorator("FDMUpdateDiag") - def update_De(data): - work_vec.setArray(data) + def update_De(): De.setDiagonal(work_vec, addv=insert) return De + data = work_vec.array_w # Core assembly loop for e in range(self.nel): rindices = get_rindices(e, result=rindices) cindices = get_cindices(e, result=cindices) data = self.get_coeffs(e, result=data) - Ae = assemble_element_mat(update_De(data), result=Ae) + Ae = assemble_element_mat(update_De(), result=Ae) update_A(condense_element_mat(Ae), rindices, cindices) work_vec.destroy() @@ -682,13 +686,26 @@ def assemble_reference_tensor(self, V): return cache.setdefault(key, result) -def factor_interior_mat(A00): +@PETSc.Log.EventDecorator("FDMFactor") +def factor_diagonal_mat(A, work_vec=None): """ - Used in static condensation. Take in A00 on a cell, return its Cholesky + Used in static condensation. Take in A on a cell, return its Cholesky + factorisation. + """ + work_vec = A.getDiagonal(result=work_vec) + work_vec.reciprocal() + work_vec.sqrtabs() + A.setDiagonal(work_vec) + + +@PETSc.Log.EventDecorator("FDMFactor") +def factor_block_diagonal_mat(A, work_vec=None): + """ + Used in static condensation. Take in A on a cell, return its Cholesky factorisation. Assumes that interior DOF have been reordered to make A00 block diagonal with blocks of increasing dimension. """ - indptr, indices, data = A00.getValuesCSR() + indptr, indices, data = A.getValuesCSR() degree = numpy.diff(indptr) # TODO handle non-symmetric case with LU, requires scipy @@ -703,26 +720,25 @@ def factor_interior_mat(A00): zlice = slice(zlice.stop, zlice.stop + k*nblocks) data[zlice] = invchol(data[zlice].reshape((-1, k, k))).reshape((-1,)) flops += nblocks * (((k+1)**3 + 5*(k+1)-12)//3 + k**3) - + A.setValuesCSR(indptr, indices, data) + A.assemble() PETSc.Log.logFlops(flops) - A00.setValuesCSR(indptr, indices, data) - A00.assemble() @PETSc.Log.EventDecorator("FDMCondense") -def condense_element_mat(A, i0, i1, submats): +def condense_element_mat(A, i0, i1, submats, factor): """Return the Schur complement associated to indices in i1, condensing i0 out""" isrows = [i0, i0, i1, i1] iscols = [i0, i1, i0, i1] - structure = PETSc.Mat.Structure.SUBSET if submats[6] else None + structure = PETSc.Mat.Structure.SUBSET if submats[7] else None submats[:4] = A.createSubMatrices(isrows, iscols=iscols, submats=submats[:4] if submats[0] else None) A00, A01, A10, A11 = submats[:4] - factor_interior_mat(A00) - submats[4] = A00.matMult(A01, result=submats[4]) - submats[5] = A10.matTransposeMult(A00, result=submats[5]) - submats[6] = submats[5].matMult(submats[4], result=submats[6]) - submats[6].aypx(-1.0, A11, structure=structure) - return submats[6] + factor(A00, submats[4]) + submats[5] = A00.matMult(A01, result=submats[5]) + submats[6] = A10.matTransposeMult(A00, result=submats[6]) + submats[7] = submats[6].matMult(submats[5], result=submats[7]) + submats[7].aypx(-1.0, A11, structure=structure) + return submats[7] @PETSc.Log.EventDecorator("FDMCondense") @@ -730,14 +746,14 @@ def condense_element_pattern(A, i0, i1, submats): """Add zeroes on the statically condensed pattern so that you can run ICC(0)""" isrows = [i0, i0, i1] iscols = [i0, i1, i0] - structure = PETSc.Mat.Structure.SUBSET if submats[6] else None + structure = PETSc.Mat.Structure.SUBSET if submats[7] else None submats[:3] = A.createSubMatrices(isrows, iscols=iscols, submats=submats[:3] if submats[0] else None) A00, A01, A10 = submats[:3] - submats[4] = A10.matTransposeMult(A00, result=submats[4]) submats[5] = A00.matMult(A01, result=submats[5]) - submats[6] = submats[4].matMult(submats[5], result=submats[6]) - submats[6].aypx(0.0, A, structure=structure) - return submats[6] + submats[6] = A10.matTransposeMult(A00, result=submats[6]) + submats[7] = submats[6].matMult(submats[5], result=submats[7]) + submats[7].aypx(0.0, A, structure=structure) + return submats[7] @PETSc.Log.EventDecorator("LoadCode") From 1ff89011da797ef6f06ca8f84b1a2731d26d76a5 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Sun, 26 Mar 2023 17:05:23 +0100 Subject: [PATCH 47/75] compute Schur compelment via block QR and block SVD --- firedrake/preconditioners/fdm.py | 175 ++++++++++++++++++++++++------- 1 file changed, 138 insertions(+), 37 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 8aa109c225..73e5dfc0e3 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -245,10 +245,10 @@ def assemble_fdm_op(self, V, J, bcs, fcp, pmat_type, use_static_condensation): if Vfacet and use_static_condensation: # If we are in a facet space, we build the Schur complement on its diagonal block diagonal_interior = Vfacet.finat_element.formdegree == 0 and value_size == 1 - factor = factor_diagonal_mat if diagonal_interior else factor_block_diagonal_mat + get_schur = schur_complement_diagonal if diagonal_interior else schur_complement_block_qr self.reference_tensor_on_diag[Vfacet] = self.assemble_reference_tensor(Vbig) self.get_static_condensation[Vfacet] = lambda A: condense_element_mat(A, self.ises[0], self.ises[1], - self.submats, factor) + self.submats, get_schur) elif len(fdofs) and V.finat_element.formdegree == 0: # If we are in H(grad), we just pad with zeros on the statically-condensed pattern @@ -426,7 +426,7 @@ def RtAP(R, A, P, result=None): if Vrow == Vcol: get_cindices = lambda e, result=None: result update_A = lambda Ae, rindices, cindices: set_submat(A, Ae, rindices, rindices, addv) - # moments of orthogonalized basis against basis tabulation and derivative tabulation + # interpolator of basis and exterior derivative onto broken spaces rtensor = self.reference_tensor_on_diag.get(Vrow) or self.assemble_reference_tensor(Vrow) # element matrix obtained via Equation (3.9) of Brubeck2022b assemble_element_mat = lambda De, result=None: De.PtAP(rtensor, result=result) @@ -628,8 +628,8 @@ def assemble_reference_tensor(self, V): :arg V: a :class:`.FunctionSpace` - :returns: a :class:`PETSc.Mat` with the moments of orthogonalized bases - against the basis and its exterior derivative. + :returns: a :class:`PETSc.Mat` interpolating V^k * d(V^k) onto + broken(V^k) * broken(V^{k+1}) on the reference element. """ tdim = V.mesh().topological_dimension() value_size = V.value_size @@ -686,59 +686,160 @@ def assemble_reference_tensor(self, V): return cache.setdefault(key, result) -@PETSc.Log.EventDecorator("FDMFactor") -def factor_diagonal_mat(A, work_vec=None): +@PETSc.Log.EventDecorator("FDMGetSchur") +def schur_complement_diagonal(submats): """ - Used in static condensation. Take in A on a cell, return its Cholesky - factorisation. + Used in static condensation. Take in blocks A00, A01, A10, A11, + return the Schur complement A11 - A10 * inv(A00) * A01. + + Assumes A00 is diagonal. """ - work_vec = A.getDiagonal(result=work_vec) - work_vec.reciprocal() - work_vec.sqrtabs() - A.setDiagonal(work_vec) + structure = PETSc.Mat.Structure.SUBSET if submats[-1] else None + A00, A01, A10, A11 = submats[:4] + submats[4] = A00.getDiagonal(result=submats[4]) + submats[4].reciprocal() + submats[4].scale(-1) + A01.diagonalScale(L=submats[4]) + submats[-1] = A10.matMult(A01, result=submats[-1]) + submats[-1].axpy(1.0, A11, structure=structure) + return submats[-1] -@PETSc.Log.EventDecorator("FDMFactor") -def factor_block_diagonal_mat(A, work_vec=None): +@PETSc.Log.EventDecorator("FDMGetSchur") +def schur_complement_block_cholesky(submats): """ - Used in static condensation. Take in A on a cell, return its Cholesky - factorisation. Assumes that interior DOF have been reordered to make A00 + Used in static condensation. Take in blocks A00, A01, A10, A11, + return A11 - A10 * inv(A00) * A01. + + Assumes that interior DOFs have been reordered to make A00 block diagonal with blocks of increasing dimension. """ - indptr, indices, data = A.getValuesCSR() + structure = PETSc.Mat.Structure.SUBSET if submats[-1] else None + A00, A01, A10, A11 = submats[:4] + indptr, indices, R = A00.getValuesCSR() degree = numpy.diff(indptr) - # TODO handle non-symmetric case with LU, requires scipy - invchol = lambda X: numpy.linalg.inv(numpy.linalg.cholesky(X)) nblocks = numpy.count_nonzero(degree == 1) zlice = slice(0, nblocks) - numpy.sqrt(data[zlice], out=data[zlice]) - numpy.reciprocal(data[zlice], out=data[zlice]) - flops = nblocks * 2 + numpy.sqrt(R[zlice], out=R[zlice]) + numpy.reciprocal(R[zlice], out=R[zlice]) + flops = 2*nblocks for k in range(2, degree[-1]+1): nblocks = numpy.count_nonzero(degree == k) zlice = slice(zlice.stop, zlice.stop + k*nblocks) - data[zlice] = invchol(data[zlice].reshape((-1, k, k))).reshape((-1,)) - flops += nblocks * (((k+1)**3 + 5*(k+1)-12)//3 + k**3) - A.setValuesCSR(indptr, indices, data) - A.assemble() + A = R[zlice].reshape((-1, k, k)) + R[zlice] = numpy.linalg.inv(numpy.linalg.cholesky(A)).reshape((-1)) + flops += nblocks * ((k**3)//3 + k**3) + PETSc.Log.logFlops(flops) + A00.setValuesCSR(indptr, indices, R) + A00.assemble() + submats[4] = A10.matTransposeMult(A00, result=submats[4]) + submats[5] = A00.matMult(A01, result=submats[5]) + submats[-1] = submats[4].matMult(submats[5], result=submats[-1]) + submats[-1].aypx(-1.0, A11, structure=structure) + return submats[-1] + + +@PETSc.Log.EventDecorator("FDMGetSchur") +def schur_complement_block_qr(submats): + """ + Used in static condensation. Take in blocks A00, A01, A10, A11, + return A11 - A10 * inv(A00) * A01. + + Assumes that interior DOFs have been reordered to make A00 + block diagonal with blocks of increasing dimension. + """ + structure = PETSc.Mat.Structure.SUBSET if submats[-1] else None + A00, A01, A10, A11 = submats[:4] + indptr, indices, R = A00.getValuesCSR() + degree = numpy.diff(indptr) + Q = numpy.ones(R.shape, dtype=R.dtype) + + nblocks = numpy.count_nonzero(degree == 1) + zlice = slice(0, nblocks) + numpy.reciprocal(R[zlice], out=R[zlice]) + flops = nblocks + for k in range(2, degree[-1]+1): + nblocks = numpy.count_nonzero(degree == k) + zlice = slice(zlice.stop, zlice.stop + k*nblocks) + A = R[zlice].reshape((-1, k, k)) + q, r = numpy.linalg.qr(A, mode="complete") + R[zlice] = numpy.linalg.inv(r).reshape((-1,)) + Q[zlice] = q.reshape((-1,)) + flops += nblocks * ((4*k**3)//3 + k**3) + + PETSc.Log.logFlops(flops) + A00.setValuesCSR(indptr, indices, R) + A00.assemble() + submats[4] = A10.matMult(A00, result=submats[4]) + A00.setValuesCSR(indptr, indices, Q) + A00.assemble() + submats[5] = A00.transposeMatMult(A01, result=submats[5]) + submats[-1] = submats[4].matMult(submats[5], result=submats[-1]) + submats[-1].aypx(-1.0, A11, structure=structure) + return submats[-1] + + +@PETSc.Log.EventDecorator("FDMGetSchur") +def schur_complement_block_svd(submats): + """ + Used in static condensation. Take in blocks A00, A01, A10, A11, + return A11 - A10 * inv(A00) * A01. + + Assumes that interior DOFs have been reordered to make A00 + block diagonal with blocks of increasing dimension. + """ + structure = PETSc.Mat.Structure.SUBSET if submats[-1] else None + A00, A01, A10, A11 = submats[:4] + indptr, indices, U = A00.getValuesCSR() + degree = numpy.diff(indptr) + V = numpy.ones(U.shape, dtype=U.dtype) + submats[4] = A00.getDiagonal(result=submats[4]) + D = submats[4] + + nblocks = numpy.count_nonzero(degree == 1) + bslice = slice(0, nblocks) + dslice = slice(0, nblocks) + numpy.sign(D.array_r[dslice], out=U[bslice]) + + flops = nblocks + for k in range(2, degree[-1]+1): + nblocks = numpy.count_nonzero(degree == k) + bslice = slice(bslice.stop, bslice.stop + k*nblocks) + dslice = slice(dslice.stop, dslice.stop + nblocks) + A = U[bslice].reshape((-1, k, k)) + + u, s, v = numpy.linalg.svd(A, full_matrices=False) + D.array_w[dslice] = s.reshape((-1,)) + U[bslice] = u.reshape((-1,)) + V[bslice] = v.reshape((-1,)) + flops += nblocks * ((4*k**3)//3 + 4*k**3) + + PETSc.Log.logFlops(flops) + + A00.setValuesCSR(indptr, indices, V) + A00.assemble() + D.sqrtabs() + D.reciprocal() + A00.diagonalScale(L=D) + submats[5] = A10.matTransposeMult(A00, result=submats[5]) + A00.setValuesCSR(indptr, indices, U) + A00.assemble() + A00.diagonalScale(R=D) + submats[6] = A00.transposeMatMult(A01, result=submats[6]) + submats[-1] = submats[5].matMult(submats[6], result=submats[-1]) + submats[-1].aypx(-1.0, A11, structure=structure) + return submats[-1] @PETSc.Log.EventDecorator("FDMCondense") -def condense_element_mat(A, i0, i1, submats, factor): +def condense_element_mat(A, i0, i1, submats, get_schur_complement): """Return the Schur complement associated to indices in i1, condensing i0 out""" isrows = [i0, i0, i1, i1] iscols = [i0, i1, i0, i1] - structure = PETSc.Mat.Structure.SUBSET if submats[7] else None submats[:4] = A.createSubMatrices(isrows, iscols=iscols, submats=submats[:4] if submats[0] else None) - A00, A01, A10, A11 = submats[:4] - factor(A00, submats[4]) - submats[5] = A00.matMult(A01, result=submats[5]) - submats[6] = A10.matTransposeMult(A00, result=submats[6]) - submats[7] = submats[6].matMult(submats[5], result=submats[7]) - submats[7].aypx(-1.0, A11, structure=structure) - return submats[7] + return get_schur_complement(submats) @PETSc.Log.EventDecorator("FDMCondense") @@ -750,7 +851,7 @@ def condense_element_pattern(A, i0, i1, submats): submats[:3] = A.createSubMatrices(isrows, iscols=iscols, submats=submats[:3] if submats[0] else None) A00, A01, A10 = submats[:3] submats[5] = A00.matMult(A01, result=submats[5]) - submats[6] = A10.matTransposeMult(A00, result=submats[6]) + submats[6] = A10.matMult(A00, result=submats[6]) submats[7] = submats[6].matMult(submats[5], result=submats[7]) submats[7].aypx(0.0, A, structure=structure) return submats[7] From dd7b2de8b4a50dffe96afb6e9941e84651405c77 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 28 Mar 2023 12:43:55 +0100 Subject: [PATCH 48/75] use triple matrix product --- firedrake/preconditioners/fdm.py | 123 ++++++++++++++++++++----------- 1 file changed, 78 insertions(+), 45 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 73e5dfc0e3..271a392184 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -416,28 +416,24 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): :arg addv: a `PETSc.Mat.InsertMode` :arg triu: are we assembling only the upper triangular part? """ - def RtAP(R, A, P, result=None): - RtAP.buff = A.matMult(P, result=RtAP.buff) - return R.transposeMatMult(RtAP.buff, result=result) - RtAP.buff = None - set_submat = self.setSubMatCSR(PETSc.COMM_SELF, triu=triu) get_rindices = self.cell_to_global[Vrow] if Vrow == Vcol: + condense_element_mat = self.get_static_condensation.get(Vrow) get_cindices = lambda e, result=None: result update_A = lambda Ae, rindices, cindices: set_submat(A, Ae, rindices, rindices, addv) - # interpolator of basis and exterior derivative onto broken spaces - rtensor = self.reference_tensor_on_diag.get(Vrow) or self.assemble_reference_tensor(Vrow) - # element matrix obtained via Equation (3.9) of Brubeck2022b - assemble_element_mat = lambda De, result=None: De.PtAP(rtensor, result=result) - condense_element_mat = self.get_static_condensation.get(Vrow) + # interpolators of basis and exterior derivative onto broken spaces + ctensor = self.reference_tensor_on_diag.get(Vrow) or self.assemble_reference_tensor(Vrow) + rtensor = PETSc.Mat().createTranspose(ctensor).convert(ctensor.getType()) else: + condense_element_mat = None get_cindices = self.cell_to_global[Vcol] update_A = lambda Ae, rindices, cindices: set_submat(A, Ae, rindices, cindices, addv) - rtensor = self.assemble_reference_tensor(Vrow) ctensor = self.assemble_reference_tensor(Vcol) - assemble_element_mat = lambda De, result=None: RtAP(rtensor, De, ctensor, result=result) - condense_element_mat = None + rtensor = self.assemble_reference_tensor(Vrow, transpose=True) + + # element matrix obtained via Equation (3.9) of Brubeck2022b + assemble_element_mat = lambda De, result=None: rtensor.matMatMult(De, ctensor, result=result) do_sort = True if condense_element_mat is None: @@ -508,8 +504,8 @@ def update_De(): self.work_csr = (None, None, None) self.work_mats[common_key] = None self.work_mats[Vrow, Vcol] = None - if RtAP.buff: - RtAP.buff.destroy() + if Vcol == Vrow: + rtensor.destroy() @PETSc.Log.EventDecorator("FDMCoefficients") def assemble_coefficients(self, J, fcp, block_diagonal=True): @@ -621,7 +617,7 @@ def assemble_coefficients(self, J, fcp, block_diagonal=True): return cache.setdefault(key, (coefficients, assembly_callables)) @PETSc.Log.EventDecorator("FDMRefTensor") - def assemble_reference_tensor(self, V): + def assemble_reference_tensor(self, V, transpose=False): """ Return the reference tensor used in the diagonal factorisation of the sparse cell matrices. See Section 3.2 of Brubeck2022b. @@ -642,12 +638,17 @@ def assemble_reference_tensor(self, V): if formdegree == tdim: degree = degree + 1 is_interior, is_facet = is_restricted(V.finat_element) - key = (degree, tdim, formdegree, value_size, is_interior, is_facet) + key = (degree, tdim, formdegree, value_size, is_interior, is_facet, transpose) cache = self._cache.setdefault("reference_tensor", {}) try: return cache[key] except KeyError: - full_key = (degree, tdim, formdegree, value_size, False, False) + if transpose: + result = self.assemble_reference_tensor(V, transpose=False) + result = PETSc.Mat().createTranspose(result).convert(result.getType()) + return cache.setdefault(key, result) + + full_key = (degree, tdim, formdegree, value_size, False, False, False) if is_facet and full_key in cache: result = cache[full_key] noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.comm) @@ -705,6 +706,40 @@ def schur_complement_diagonal(submats): return submats[-1] +@PETSc.Log.EventDecorator("FDMGetSchur") +def schur_complement_block_inv(submats): + """ + Used in static condensation. Take in blocks A00, A01, A10, A11, + return A11 - A10 * inv(A00) * A01. + + Assumes that interior DOFs have been reordered to make A00 + block diagonal with blocks of increasing dimension. + """ + structure = PETSc.Mat.Structure.SUBSET if submats[-1] else None + A00, A01, A10, A11 = submats[:4] + indptr, indices, R = A00.getValuesCSR() + degree = numpy.diff(indptr) + + nblocks = numpy.count_nonzero(degree == 1) + zlice = slice(0, nblocks) + numpy.reciprocal(R[zlice], out=R[zlice]) + flops = nblocks + for k in range(2, degree[-1]+1): + nblocks = numpy.count_nonzero(degree == k) + zlice = slice(zlice.stop, zlice.stop + k*nblocks) + A = R[zlice].reshape((-1, k, k)) + R[zlice] = numpy.linalg.inv(A).reshape((-1,)) + flops += nblocks * (k**3) + + PETSc.Log.logFlops(flops) + A00.setValuesCSR(indptr, indices, R) + A00.assemble() + A00.scale(-1.0) + submats[-1] = A10.matMatMult(A00, A01, result=submats[-1]) + submats[-1].axpy(1.0, A11, structure=structure) + return submats[-1] + + @PETSc.Log.EventDecorator("FDMGetSchur") def schur_complement_block_cholesky(submats): """ @@ -735,9 +770,9 @@ def schur_complement_block_cholesky(submats): A00.setValuesCSR(indptr, indices, R) A00.assemble() submats[4] = A10.matTransposeMult(A00, result=submats[4]) - submats[5] = A00.matMult(A01, result=submats[5]) - submats[-1] = submats[4].matMult(submats[5], result=submats[-1]) - submats[-1].aypx(-1.0, A11, structure=structure) + A00.scale(-1.0) + submats[-1] = submats[4].matMatMult(A00, A01, result=submats[-1]) + submats[-1].axpy(1.0, A11, structure=structure) return submats[-1] @@ -765,19 +800,19 @@ def schur_complement_block_qr(submats): zlice = slice(zlice.stop, zlice.stop + k*nblocks) A = R[zlice].reshape((-1, k, k)) q, r = numpy.linalg.qr(A, mode="complete") - R[zlice] = numpy.linalg.inv(r).reshape((-1,)) Q[zlice] = q.reshape((-1,)) + R[zlice] = numpy.linalg.inv(r).reshape((-1,)) flops += nblocks * ((4*k**3)//3 + k**3) PETSc.Log.logFlops(flops) - A00.setValuesCSR(indptr, indices, R) - A00.assemble() - submats[4] = A10.matMult(A00, result=submats[4]) A00.setValuesCSR(indptr, indices, Q) A00.assemble() - submats[5] = A00.transposeMatMult(A01, result=submats[5]) - submats[-1] = submats[4].matMult(submats[5], result=submats[-1]) - submats[-1].aypx(-1.0, A11, structure=structure) + submats[4] = A00.transposeMatMult(A01, result=submats[4]) + A00.setValuesCSR(indptr, indices, R) + A00.assemble() + A00.scale(-1.0) + submats[-1] = A10.matMatMult(A00, submats[4], result=submats[-1]) + submats[-1].axpy(1.0, A11, structure=structure) return submats[-1] @@ -812,24 +847,23 @@ def schur_complement_block_svd(submats): u, s, v = numpy.linalg.svd(A, full_matrices=False) D.array_w[dslice] = s.reshape((-1,)) - U[bslice] = u.reshape((-1,)) - V[bslice] = v.reshape((-1,)) + U[bslice] = numpy.transpose(u, axes=(0, 2, 1)).reshape((-1,)) + V[bslice] = numpy.transpose(v, axes=(0, 2, 1)).reshape((-1,)) flops += nblocks * ((4*k**3)//3 + 4*k**3) PETSc.Log.logFlops(flops) - - A00.setValuesCSR(indptr, indices, V) - A00.assemble() D.sqrtabs() D.reciprocal() - A00.diagonalScale(L=D) - submats[5] = A10.matTransposeMult(A00, result=submats[5]) - A00.setValuesCSR(indptr, indices, U) + A00.setValuesCSR(indptr, indices, V) A00.assemble() A00.diagonalScale(R=D) - submats[6] = A00.transposeMatMult(A01, result=submats[6]) - submats[-1] = submats[5].matMult(submats[6], result=submats[-1]) - submats[-1].aypx(-1.0, A11, structure=structure) + submats[5] = A10.matMult(A00, result=submats[5]) + D.scale(-1.0) + A00.setValuesCSR(indptr, indices, U) + A00.assemble() + A00.diagonalScale(L=D) + submats[-1] = submats[5].matMatMult(A00, A01, result=submats[-1]) + submats[-1].axpy(1.0, A11, structure=structure) return submats[-1] @@ -847,14 +881,13 @@ def condense_element_pattern(A, i0, i1, submats): """Add zeroes on the statically condensed pattern so that you can run ICC(0)""" isrows = [i0, i0, i1] iscols = [i0, i1, i0] - structure = PETSc.Mat.Structure.SUBSET if submats[7] else None + structure = PETSc.Mat.Structure.SUBSET if submats[3] else None submats[:3] = A.createSubMatrices(isrows, iscols=iscols, submats=submats[:3] if submats[0] else None) A00, A01, A10 = submats[:3] - submats[5] = A00.matMult(A01, result=submats[5]) - submats[6] = A10.matMult(A00, result=submats[6]) - submats[7] = submats[6].matMult(submats[5], result=submats[7]) - submats[7].aypx(0.0, A, structure=structure) - return submats[7] + A00.scale(0.0) + submats[3] = A10.matMatMult(A00, A01, result=submats[3]) + submats[3].axpy(1.0, A, structure=structure) + return submats[3] @PETSc.Log.EventDecorator("LoadCode") From 20f2db55a2b3070b3fd4f6eeaa834e920659d2cf Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 28 Mar 2023 17:52:31 +0100 Subject: [PATCH 49/75] remove unnecessary lru_caches in fdm.py --- firedrake/preconditioners/fdm.py | 51 +++++++++++++++----------------- firedrake/preconditioners/pmg.py | 17 +++++------ 2 files changed, 32 insertions(+), 36 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 271a392184..77c55cf1c1 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -1,4 +1,4 @@ -from functools import partial, lru_cache +from functools import partial from itertools import product from firedrake.petsc import PETSc from firedrake.preconditioners.base import PCBase @@ -11,14 +11,13 @@ from firedrake.function import Function from firedrake.functionspace import FunctionSpace from firedrake.ufl_expr import TestFunction, TestFunctions, TrialFunctions - from firedrake_citations import Citations -from pyop2.compilation import load -from pyop2.utils import get_petsc_dir -from pyop2.sparsity import get_preallocation -from tsfc.finatinterface import create_element from ufl.algorithms.ad import expand_derivatives from ufl.algorithms.expand_indices import expand_indices +from tsfc.finatinterface import create_element +from pyop2.compilation import load +from pyop2.sparsity import get_preallocation +from pyop2.utils import get_petsc_dir import firedrake.dmhooks as dmhooks import ctypes @@ -464,8 +463,8 @@ def update_De(): # Core assembly loop for e in range(self.nel): - rindices = get_rindices(e, result=rindices) cindices = get_cindices(e, result=cindices) + rindices = get_rindices(e, result=rindices) data = self.get_coeffs(e, result=data) Ae = assemble_element_mat(update_De(), result=Ae) update_A(condense_element_mat(Ae), rindices, cindices) @@ -497,8 +496,8 @@ def update_De(): Se = condense_element_mat(Ae) for e in range(self.nel): - rindices = get_rindices(e, result=rindices) cindices = get_cindices(e, result=cindices) + rindices = get_rindices(e, result=rindices) update_A(Se, rindices, cindices) else: self.work_csr = (None, None, None) @@ -1286,8 +1285,14 @@ def assemble_reference_tensor(self, V): Afdm = [] # sparse interval mass and stiffness matrices for each direction Dfdm = [] # tabulation of normal derivatives at the boundary for each direction bdof = [] # indices of point evaluation dofs for each direction + cache = {} for e in line_elements: - Afdm[:0], Dfdm[:0], bdof[:0] = tuple(zip(fdm_setup_ipdg(e, eta))) + key = e.degree() + try: + rtensor = cache[key] + except KeyError: + rtensor = cache.setdefault(key, fdm_setup_ipdg(e, eta, comm=PETSc.COMM_SELF)) + Afdm[:0], Dfdm[:0], bdof[:0] = tuple(zip(rtensor)) if not is_dg and e.degree() == degree: # do not apply SIPG along continuous directions Dfdm[0] = None @@ -1436,8 +1441,8 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): eta = float(self.appctx.get("eta")) lgmap = self.lgmaps[V] - index_facet, local_facet_data, nfacets = get_interior_facet_maps(V) - index_coef, _, _ = get_interior_facet_maps(Gq_facet or Gq) + index_facet, local_facet_data, nfacets = extrude_interior_facet_maps(V) + index_coef, _, _ = extrude_interior_facet_maps(Gq_facet or Gq) rows = numpy.zeros((2, sdim), dtype=PETSc.IntType) for e in range(nfacets): @@ -1685,7 +1690,7 @@ def pull_axis(x, pshape, idir): return numpy.reshape(numpy.moveaxis(numpy.reshape(x.copy(), pshape), idir, 0), x.shape) -def numpy_to_petsc(A_numpy, dense_indices, diag=True, block=False): +def numpy_to_petsc(A_numpy, dense_indices, diag=True, block=False, comm=None): """ Create a SeqAIJ Mat from a dense matrix using the diagonal and a subset of rows and columns. If dense_indices is empty, then also include the off-diagonal corners of the matrix. @@ -1696,8 +1701,7 @@ def numpy_to_petsc(A_numpy, dense_indices, diag=True, block=False): nnz[dense_indices] = len(dense_indices) if block else n imode = PETSc.InsertMode.INSERT - A_petsc = PETSc.Mat().createAIJ(A_numpy.shape, nnz=(nnz, 0), comm=PETSc.COMM_SELF) - + A_petsc = PETSc.Mat().createAIJ(A_numpy.shape, nnz=(nnz, 0), comm=comm) idx = numpy.arange(n, dtype=PETSc.IntType) if block: values = A_numpy[dense_indices, :][:, dense_indices] @@ -1706,18 +1710,15 @@ def numpy_to_petsc(A_numpy, dense_indices, diag=True, block=False): for j in dense_indices: A_petsc.setValues(j, idx, A_numpy[j, :], imode) A_petsc.setValues(idx, j, A_numpy[:, j], imode) - if diag: idx = idx[:, None] values = A_numpy.diagonal()[:, None] A_petsc.setValuesRCV(idx, idx, values, imode) - A_petsc.assemble() return A_petsc -@lru_cache(maxsize=10) -def fdm_setup_ipdg(fdm_element, eta): +def fdm_setup_ipdg(fdm_element, eta, comm=None): """ Setup for the fast diagonalisation method for the IP-DG formulation. Compute sparsified interval stiffness and mass matrices @@ -1725,6 +1726,7 @@ def fdm_setup_ipdg(fdm_element, eta): :arg fdm_element: a :class:`FIAT.FDMElement` :arg eta: penalty coefficient as a `float` + :arg comm: a :class:`PETSc.Comm` :returns: 3-tuple of: Afdm: a list of :class:`PETSc.Mats` with the sparse interval matrices @@ -1735,10 +1737,7 @@ def fdm_setup_ipdg(fdm_element, eta): """ ref_el = fdm_element.get_reference_element() degree = fdm_element.degree() - if hasattr(fdm_element.dual, "rule"): - rule = fdm_element.dual.rule - else: - rule = FIAT.quadrature.make_quadrature(ref_el, degree+1) + rule = FIAT.quadrature.make_quadrature(ref_el, degree+1) edof = fdm_element.entity_dofs() bdof = edof[0][0] + edof[0][1] @@ -1753,7 +1752,7 @@ def fdm_setup_ipdg(fdm_element, eta): Dfacet = basis[(1,)] Dfacet[:, 0] = -Dfacet[:, 0] - Afdm = [numpy_to_petsc(Bhat, bdof, block=True)] + Afdm = [numpy_to_petsc(Bhat, bdof, block=True, comm=comm)] for bc in range(4): bcs = (bc % 2, bc//2) Abc = Ahat.copy() @@ -1763,12 +1762,11 @@ def fdm_setup_ipdg(fdm_element, eta): Abc[:, j] -= Dfacet[:, k] Abc[j, :] -= Dfacet[:, k] Abc[j, j] += eta - Afdm.append(numpy_to_petsc(Abc, bdof)) + Afdm.append(numpy_to_petsc(Abc, bdof, comm=comm)) return Afdm, Dfacet, bdof -@lru_cache(maxsize=10) -def get_interior_facet_maps(V): +def extrude_interior_facet_maps(V): """ Extrude V.interior_facet_node_map and V.mesh().interior_facets.local_facet_dat @@ -1841,7 +1839,6 @@ def get_interior_facet_maps(V): return facet_to_nodes_fun, local_facet_data_fun, nfacets -@lru_cache(maxsize=20) def extrude_node_map(node_map, bsize=1): """ Construct a (possibly vector-valued) cell to node map from an un-extruded scalar map. diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 1fb2e267da..c6b482b5ee 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -9,10 +9,9 @@ from firedrake.nullspace import VectorSpaceBasis, MixedVectorSpaceBasis from firedrake.tsfc_interface import extract_numbered_coefficients from firedrake.utils import ScalarType_c, IntType_c, cached_property -from pyop2 import op2, PermutedMap from tsfc import compile_expression_dual_evaluation from tsfc.finatinterface import create_element -from FIAT.reference_element import LINE +from pyop2 import op2 import firedrake import finat @@ -559,8 +558,9 @@ def expand_element(ele): def evaluate_dual(source, target, alpha=None): - # Evaluate the action of a set of dual functionals of the target element - # on the (derivatives of the) basis functions of the source element. + """Evaluate the action of a set of dual functionals of the target element + on the (derivative of order alpha of the) basis functions of the source + element.""" primal = source.get_nodal_basis() dual = target.get_dual_set() A = dual.to_riesz(primal) @@ -642,7 +642,7 @@ def get_permutation_to_line_elements(finat_element): for term in terms: factors = term.factors if hasattr(term, "factors") else (term,) fiat_factors = [e.fiat_equivalent for e in reversed(factors)] - if any(e.get_reference_element().shape != LINE for e in fiat_factors): + if any(e.get_reference_element().get_spatial_dimension() != 1 for e in fiat_factors): raise ValueError("Failed to decompose %s into line elements" % fiat_factors) # use the same FIAT element if it appears multiple times in the expansion @@ -1142,7 +1142,7 @@ def get_permuted_map(V): indices, _, _ = get_permutation_to_line_elements(V.finat_element) if numpy.all(indices[:-1] < indices[1:]): return V.cell_node_map() - return PermutedMap(V.cell_node_map(), indices) + return op2.PermutedMap(V.cell_node_map(), indices) class StandaloneInterpolationMatrix(object): @@ -1237,7 +1237,6 @@ def view(self, mat, viewer=None): type(self).__name__) def getInfo(self, mat, info=None): - from mpi4py import MPI memory = self.uf.dat.nbytes + self.uc.dat.nbytes if self._weight is not None: memory += self._weight.dat.nbytes @@ -1246,10 +1245,10 @@ def getInfo(self, mat, info=None): if info == PETSc.Mat.InfoType.LOCAL: return {"memory": memory} elif info == PETSc.Mat.InfoType.GLOBAL_SUM: - gmem = mat.comm.tompi4py().allreduce(memory, op=MPI.SUM) + gmem = mat.comm.tompi4py().allreduce(memory, op=op2.MPI.SUM) return {"memory": gmem} elif info == PETSc.Mat.InfoType.GLOBAL_MAX: - gmem = mat.comm.tompi4py().allreduce(memory, op=MPI.MAX) + gmem = mat.comm.tompi4py().allreduce(memory, op=op2.MPI.MAX) return {"memory": gmem} else: raise ValueError("Unknown info type %s" % info) From cb9de37f10b63a3b0944eb03034dbf3eff4831c3 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 29 Mar 2023 09:46:22 +0100 Subject: [PATCH 50/75] remove repeated import --- firedrake/matrix_free/operators.py | 1 - 1 file changed, 1 deletion(-) diff --git a/firedrake/matrix_free/operators.py b/firedrake/matrix_free/operators.py index 5e73f69785..b39c4d2e76 100644 --- a/firedrake/matrix_free/operators.py +++ b/firedrake/matrix_free/operators.py @@ -309,7 +309,6 @@ def view(self, mat, viewer=None): type(self).__name__) def getInfo(self, mat, info=None): - from mpi4py import MPI memory = self._x.dat.nbytes + self._y.dat.nbytes if hasattr(self, "_xbc"): memory += self._xbc.dat.nbytes From 24c8b80393828b8bcd8f815c6857ef6fa7e09ad6 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 29 Mar 2023 10:39:35 +0100 Subject: [PATCH 51/75] cleanup --- firedrake/preconditioners/fdm.py | 78 +++++++++++++------------------- 1 file changed, 32 insertions(+), 46 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 77c55cf1c1..e0b98f5da6 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -265,7 +265,7 @@ def cell_to_global(lgmap, cell_to_local, cell_index, result=None): self.lgmaps = {} bc_rows = {} for Vsub in V: - lgmap = Vsub.local_to_global_map([bc.reconstruct(V=Vsub, g=0) for bc in bcs]) + lgmap = Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) bsize = Vsub.dof_dset.layout_vec.getBlockSize() cell_to_local, nel = extrude_node_map(Vsub.cell_node_map(), bsize=bsize) self.cell_to_global[Vsub] = partial(cell_to_global, lgmap, cell_to_local) @@ -335,7 +335,7 @@ def get_coeffs(e, result=None): if len(V) == 1: Pmat = Pmats[V, V] else: - Pmat = PETSc.Mat().createNest([[Pmats[Vrow, Vcol] for Vcol in V] for Vrow in V], comm=V.comm) + Pmat = PETSc.Mat().createNest([[Pmats[Vrow, Vcol] for Vcol in V] for Vrow in V], comm=self.comm) @PETSc.Log.EventDecorator("FDMAssemble") def assemble_P(): @@ -1540,10 +1540,10 @@ def assemble_coefficients(self, J, fcp): degree = max(degree) except TypeError: pass - quad_deg = 2*degree+1 - quad_deg = fcp.get("degree", quad_deg) + quad_deg = fcp.get("degree", 2*degree+1) dx = ufl.dx(degree=quad_deg, domain=mesh) family = "Discontinuous Lagrange" if tdim == 1 else "DQ" + DG = ufl.FiniteElement(family, mesh.ufl_cell(), degree=0) # extract coefficients directly from the bilinear form integrals_J = J.integrals_by_type("cell") @@ -1556,24 +1556,8 @@ def assemble_coefficients(self, J, fcp): replace_grad = {ufl.grad(t): ufl.dot(Piola, ufl.dot(dt, Finv)) for t, dt in zip(args_J, ref_grad)} else: replace_grad = {ufl.grad(t): ufl.dot(dt, Finv) for t, dt in zip(args_J, ref_grad)} - alpha = expand_derivatives(sum([ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_grad), ref_grad[0]), ref_grad[1]) for i in integrals_J])) - - # get zero-th order coefficent - ref_val = [ufl.variable(t) for t in args_J] - if Piola: - dummy_element = ufl.TensorElement("DQ", cell=mesh.ufl_cell(), degree=1, shape=Piola.ufl_shape) - dummy_Piola = ufl.Coefficient(ufl.FunctionSpace(mesh, dummy_element)) - replace_val = {t: ufl.dot(dummy_Piola, s) for t, s in zip(args_J, ref_val)} - else: - replace_val = {t: s for t, s in zip(args_J, ref_val)} - - beta = expand_derivatives(sum([ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_val), - ref_val[0]), ref_val[1]) for i in integrals_J])) - if Piola: - beta = ufl.replace(beta, {dummy_Piola: Piola}) - # discard mixed derivatives and mixed components if len(alpha.ufl_shape) == 2: alpha = ufl.diag_vector(alpha) @@ -1581,35 +1565,46 @@ def assemble_coefficients(self, J, fcp): ashape = alpha.ufl_shape ashape = ashape[:len(ashape)//2] alpha = ufl.as_tensor(numpy.reshape([alpha[i+i] for i in numpy.ndindex(ashape)], (ashape[0], -1))) - Qe = ufl.TensorElement(family, mesh.ufl_cell(), degree=0, shape=alpha.ufl_shape) # assemble second order coefficient if not isinstance(alpha, ufl.constantvalue.Zero): - Q = FunctionSpace(mesh, Qe) + Q = FunctionSpace(mesh, ufl.TensorElement(DG, shape=alpha.ufl_shape)) tensor = coefficients.setdefault("alpha", Function(Q)) assembly_callables.append(OneFormAssembler(ufl.inner(TestFunction(Q), alpha)*dx, tensor=tensor, form_compiler_parameters=fcp).assemble) + # get zero-th order coefficent + ref_val = [ufl.variable(t) for t in args_J] + if Piola: + dummy_element = ufl.TensorElement(family, cell=mesh.ufl_cell(), degree=1, shape=Piola.ufl_shape) + dummy_Piola = ufl.Coefficient(ufl.FunctionSpace(mesh, dummy_element)) + replace_val = {t: ufl.dot(dummy_Piola, s) for t, s in zip(args_J, ref_val)} + else: + replace_val = {t: s for t, s in zip(args_J, ref_val)} + beta = expand_derivatives(sum([ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_val), + ref_val[0]), ref_val[1]) for i in integrals_J])) + if Piola: + beta = ufl.replace(beta, {dummy_Piola: Piola}) # assemble zero-th order coefficient if not isinstance(beta, ufl.constantvalue.Zero): if Piola: # keep diagonal beta = ufl.diag_vector(beta) - shape = beta.ufl_shape - Qe = ufl.FiniteElement(family, mesh.ufl_cell(), degree=0) - if shape: - Qe = ufl.TensorElement(Qe, shape=shape) - Q = FunctionSpace(mesh, Qe) + Q = FunctionSpace(mesh, ufl.TensorElement(DG, shape=beta.ufl_shape) if beta.ufl_shape else DG) tensor = coefficients.setdefault("beta", Function(Q)) assembly_callables.append(OneFormAssembler(ufl.inner(TestFunction(Q), beta)*dx, tensor=tensor, form_compiler_parameters=fcp).assemble) + family = "CG" if tdim == 1 else "DGT" + degree = 1 if tdim == 1 else 0 + DGT = ufl.BrokenElement(ufl.FiniteElement(family, cell=mesh.ufl_cell(), degree=degree)) if Piola: # make DGT functions with the second order coefficient # and the Piola tensor for each side of each facet extruded = mesh.cell_set._extruded dS_int = ufl.dS_h(degree=quad_deg) + ufl.dS_v(degree=quad_deg) if extruded else ufl.dS(degree=quad_deg) - ifacet_inner = lambda v, u: ((ufl.inner(v('+'), u('+')) + ufl.inner(v('-'), u('-')))/ufl.FacetArea(mesh))*dS_int + area = ufl.FacetArea(mesh) + ifacet_inner = lambda v, u: ((ufl.inner(v('+'), u('+')) + ufl.inner(v('-'), u('-')))/area)*dS_int replace_grad = {ufl.grad(t): ufl.dot(dt, Finv) for t, dt in zip(args_J, ref_grad)} alpha = expand_derivatives(sum([ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_grad), @@ -1618,29 +1613,20 @@ def assemble_coefficients(self, J, fcp): G = ufl.as_tensor([[[G[i, k, j, k] for i in range(G.ufl_shape[0])] for j in range(G.ufl_shape[2])] for k in range(G.ufl_shape[3])]) G = G * abs(ufl.JacobianDeterminant(mesh)) - ele = ufl.BrokenElement(ufl.FiniteElement("DGT", cell=mesh.ufl_cell(), degree=0)) - Q = FunctionSpace(mesh, ufl.TensorElement(ele, shape=G.ufl_shape)) + Q = FunctionSpace(mesh, ufl.TensorElement(DGT, shape=G.ufl_shape)) tensor = coefficients.setdefault("Gq_facet", Function(Q)) assembly_callables.append(OneFormAssembler(ifacet_inner(TestFunction(Q), G), tensor=tensor, form_compiler_parameters=fcp).assemble) - PT = Piola.T - Q = FunctionSpace(mesh, ufl.TensorElement(ele, shape=PT.ufl_shape)) + Q = FunctionSpace(mesh, ufl.TensorElement(DGT, shape=PT.ufl_shape)) tensor = coefficients.setdefault("PT_facet", Function(Q)) assembly_callables.append(OneFormAssembler(ifacet_inner(TestFunction(Q), PT), tensor=tensor, form_compiler_parameters=fcp).assemble) # make DGT functions with BC flags - rvs = V.ufl_element().reference_value_shape() - cell = mesh.ufl_cell() - family = "CG" if cell.topological_dimension() == 1 else "DGT" - degree = 1 if cell.topological_dimension() == 1 else 0 - Qe = ufl.FiniteElement(family, cell=cell, degree=degree) - if rvs: - Qe = ufl.TensorElement(Qe, shape=rvs) - Q = FunctionSpace(mesh, Qe) - q = TestFunction(Q) - bcflags = Function(Q) + shape = V.ufl_element().reference_value_shape() + Q = FunctionSpace(mesh, ufl.TensorElement(DGT, shape=shape) if shape else DGT) + test = TestFunction(Q) ref_args = [ufl.variable(t) for t in args_J] replace_args = {t: s for t, s in zip(args_J, ref_args)} @@ -1652,16 +1638,16 @@ def assemble_coefficients(self, J, fcp): if itype.startswith("exterior_facet"): beta = ufl.diff(ufl.diff(ufl.replace(it.integrand(), replace_args), ref_args[0]), ref_args[1]) beta = expand_derivatives(beta) - if rvs: + if beta.ufl_shape: beta = ufl.diag_vector(beta) ds_ext = ufl.Measure(itype, domain=mesh, subdomain_id=it.subdomain_id(), metadata=md) - forms.append(ufl.inner(q, beta)*ds_ext) + forms.append(ufl.inner(test, beta)*ds_ext) if len(forms): form = sum(forms) if len(form.arguments()) == 1: - coefficients["bcflags"] = bcflags - assembly_callables.append(OneFormAssembler(form, tensor=bcflags, + tensor = coefficients.setdefault("bcflags", Function(Q)) + assembly_callables.append(OneFormAssembler(form, tensor=tensor, form_compiler_parameters=fcp).assemble) # set arbitrary non-zero coefficients for preallocation From 1ace1b8d47faf03620c76b4b10559e22c6624da9 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 29 Mar 2023 10:58:24 +0100 Subject: [PATCH 52/75] use numpy.take in extrude_node_map --- firedrake/preconditioners/fdm.py | 41 ++++++++++---------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index e0b98f5da6..0601fe0a16 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -1649,7 +1649,6 @@ def assemble_coefficients(self, J, fcp): tensor = coefficients.setdefault("bcflags", Function(Q)) assembly_callables.append(OneFormAssembler(form, tensor=tensor, form_compiler_parameters=fcp).assemble) - # set arbitrary non-zero coefficients for preallocation for coef in coefficients.values(): with coef.dat.vec as cvec: @@ -1834,52 +1833,37 @@ def extrude_node_map(node_map, bsize=1): :returns: a 2-tuple with the cell to node map and the number of cells owned by this process """ - nelv = node_map.values.shape[0] + nel = node_map.values.shape[0] if node_map.offset is None: - nel = nelv - def scalar_map(e, result=None): - if result is None: - result = numpy.copy(node_map.values_with_halo[e]) - else: - numpy.copyto(result, node_map.values_with_halo[e]) - return result - + return numpy.take(node_map.values_with_halo, e, axis=0, out=result) else: layers = node_map.iterset.layers_array if layers.shape[0] == 1: - nelz = layers[0, 1]-layers[0, 0]-1 - nel = nelz*nelv - def _scalar_map(node_map, nelz, e, result=None): - if result is None: - result = numpy.copy(node_map.values_with_halo[e // nelz]) - else: - numpy.copyto(result, node_map.values_with_halo[e // nelz]) + result = numpy.take(node_map.values_with_halo, e // nelz, axis=0, out=result) result += (e % nelz)*node_map.offset return result + + nelz = layers[0, 1]-layers[0, 0]-1 + nel *= nelz scalar_map = partial(_scalar_map, node_map, nelz) else: - nelz = layers[:, 1]-layers[:, 0]-1 - nel = sum(nelz[:nelv]) - to_base = numpy.repeat(numpy.arange(node_map.values_with_halo.shape[0], dtype=node_map.offset.dtype), nelz) - to_layer = numpy.concatenate([numpy.arange(nz, dtype=node_map.offset.dtype) for nz in nelz]) - def _scalar_map(node_map, to_base, to_layer, e, result=None): - if result is None: - result = numpy.copy(node_map.values_with_halo[to_base[e]]) - else: - numpy.copyto(result, node_map.values_with_halo[to_base[e]]) + result = numpy.take(node_map.values_with_halo, to_base[e], axis=0, out=result) result += to_layer[e]*node_map.offset return result + + nelz = layers[:, 1]-layers[:, 0]-1 + nel = sum(nelz[:nel]) + to_base = numpy.repeat(numpy.arange(node_map.values_with_halo.shape[0], dtype=node_map.offset.dtype), nelz) + to_layer = numpy.concatenate([numpy.arange(nz, dtype=node_map.offset.dtype) for nz in nelz]) scalar_map = partial(_scalar_map, node_map, to_base, to_layer) if bsize == 1: return scalar_map, nel - ibase = numpy.arange(bsize, dtype=node_map.values.dtype) - def vector_map(bsize, ibase, e, result=None): index = None if result is not None: @@ -1888,4 +1872,5 @@ def vector_map(bsize, ibase, e, result=None): index *= bsize return numpy.add.outer(index, ibase, out=result) + ibase = numpy.arange(bsize, dtype=node_map.values.dtype) return partial(vector_map, bsize, ibase), nel From 9f95e2b1bc3084a2110afbe8a676c00a47aeb059 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 29 Mar 2023 12:01:08 +0100 Subject: [PATCH 53/75] optimise FDMGetIndices --- firedrake/preconditioners/fdm.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 0601fe0a16..2e6f8fb552 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -1835,31 +1835,41 @@ def extrude_node_map(node_map, bsize=1): """ nel = node_map.values.shape[0] if node_map.offset is None: - def scalar_map(e, result=None): - return numpy.take(node_map.values_with_halo, e, axis=0, out=result) + def scalar_map(map_values, e, result=None): + if result is None: + result = numpy.empty_like(map_values[e]) + numpy.copyto(result, map_values[e]) + return result + + scalar_map = partial(_scalar_map, node_map.values_with_halo) else: layers = node_map.iterset.layers_array if layers.shape[0] == 1: - def _scalar_map(node_map, nelz, e, result=None): - result = numpy.take(node_map.values_with_halo, e // nelz, axis=0, out=result) - result += (e % nelz)*node_map.offset + def _scalar_map(map_values, offset, nelz, e, result=None): + if result is None: + result = numpy.empty_like(offset) + numpy.copyto(result, offset) + result *= (e % nelz) + result += map_values[e // nelz] return result nelz = layers[0, 1]-layers[0, 0]-1 nel *= nelz - scalar_map = partial(_scalar_map, node_map, nelz) - + scalar_map = partial(_scalar_map, node_map.values_with_halo, node_map.offset, nelz) else: - def _scalar_map(node_map, to_base, to_layer, e, result=None): - result = numpy.take(node_map.values_with_halo, to_base[e], axis=0, out=result) - result += to_layer[e]*node_map.offset + def _scalar_map(map_values, offset, to_base, to_layer, e, result=None): + if result is None: + result = numpy.empty_like(offset) + numpy.copyto(result, offset) + result *= to_layer[e] + result += map_values[to_base[e]] return result nelz = layers[:, 1]-layers[:, 0]-1 nel = sum(nelz[:nel]) to_base = numpy.repeat(numpy.arange(node_map.values_with_halo.shape[0], dtype=node_map.offset.dtype), nelz) to_layer = numpy.concatenate([numpy.arange(nz, dtype=node_map.offset.dtype) for nz in nelz]) - scalar_map = partial(_scalar_map, node_map, to_base, to_layer) + scalar_map = partial(_scalar_map, node_map.values_with_halo, node_map.offset, to_base, to_layer) if bsize == 1: return scalar_map, nel From a42d76a6a8bed6b43a60bb77b4f1632cdefa3716 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 29 Mar 2023 12:12:31 +0100 Subject: [PATCH 54/75] fix typo --- firedrake/preconditioners/fdm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 2e6f8fb552..7a9a9d112c 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -1835,7 +1835,7 @@ def extrude_node_map(node_map, bsize=1): """ nel = node_map.values.shape[0] if node_map.offset is None: - def scalar_map(map_values, e, result=None): + def _scalar_map(map_values, e, result=None): if result is None: result = numpy.empty_like(map_values[e]) numpy.copyto(result, map_values[e]) From da6899dd66f65367b021d05b539a5db92452016a Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 30 Mar 2023 17:17:10 +0100 Subject: [PATCH 55/75] fuse prealloaction and assembly loops, more elegant handling of element mass matrices --- firedrake/preconditioners/fdm.py | 277 ++++++++++++++----------------- firedrake/preconditioners/pmg.py | 41 +++-- 2 files changed, 149 insertions(+), 169 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 7a9a9d112c..ef6a328a35 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -11,6 +11,7 @@ from firedrake.function import Function from firedrake.functionspace import FunctionSpace from firedrake.ufl_expr import TestFunction, TestFunctions, TrialFunctions +from firedrake.utils import cached_property from firedrake_citations import Citations from ufl.algorithms.ad import expand_derivatives from ufl.algorithms.expand_indices import expand_indices @@ -70,9 +71,6 @@ class FDMPC(PCBase): The PETSc options inspected by this class are: - 'fdm_mat_type': can be either 'aij' or 'sbaij' - 'fdm_static_condensation': are we assembling the Schur complement on facets? - - Static condensation is currently only implemented for the symmetric case, - use it at your own risk. """ _prefix = "fdm_" @@ -175,11 +173,11 @@ def initialize(self, pc): self.bc_nodes = numpy.empty(0, dtype=PETSc.IntType) # Assemble the FDM preconditioner with sparse local matrices - Pmat, self._assemble_P = self.assemble_fdm_op(V_fdm, J_fdm, bcs_fdm, fcp, pmat_type, use_static_condensation) - self._assemble_P() + Pmat, self._assemble_P = self.allocate_matrix(V_fdm, J_fdm, bcs_fdm, fcp, pmat_type, use_static_condensation) Pmat.setNullSpace(Amat.getNullSpace()) Pmat.setTransposeNullSpace(Amat.getTransposeNullSpace()) Pmat.setNearNullSpace(Amat.getNearNullSpace()) + self._assemble_P() # Internally, we just set up a PC object that the user can configure # however from the PETSc command line. Since PC allows the user to specify @@ -202,9 +200,9 @@ def initialize(self, pc): fdmpc.setFromOptions() @PETSc.Log.EventDecorator("FDMPrealloc") - def assemble_fdm_op(self, V, J, bcs, fcp, pmat_type, use_static_condensation): + def allocate_matrix(self, V, J, bcs, fcp, pmat_type, use_static_condensation): """ - Assemble the sparse preconditioner from diagonal mass matrices. + Allocate the FDM sparse preconditioner. :arg V: the :class:`.FunctionSpace` of the form arguments :arg J: the Jacobian bilinear form @@ -215,7 +213,7 @@ def assemble_fdm_op(self, V, J, bcs, fcp, pmat_type, use_static_condensation): :returns: 2-tuple with the preconditioner :class:`PETSc.Mat` and its assembly callable """ - ifacet, = numpy.nonzero([is_restricted(Vsub.finat_element)[1] for Vsub in V]) + ifacet = [i for i, Vsub in enumerate(V) if is_restricted(Vsub.finat_element)[1]] if len(ifacet) == 0: Vfacet = None Vbig = V @@ -237,22 +235,21 @@ def assemble_fdm_op(self, V, J, bcs, fcp, pmat_type, use_static_condensation): dofs = numpy.arange(value_size * Vbig.finat_element.space_dimension(), dtype=fdofs.dtype) idofs = numpy.setdiff1d(dofs, fdofs, assume_unique=True) self.ises = tuple(PETSc.IS().createGeneral(indices, comm=PETSc.COMM_SELF) for indices in (idofs, fdofs)) - self.submats = [None for _ in range(8)] + self.submats = [None for _ in range(7)] - self.reference_tensor_on_diag = {} + # Dictionary with the parent space and a method to form the Schur complement self.get_static_condensation = {} if Vfacet and use_static_condensation: # If we are in a facet space, we build the Schur complement on its diagonal block diagonal_interior = Vfacet.finat_element.formdegree == 0 and value_size == 1 get_schur = schur_complement_diagonal if diagonal_interior else schur_complement_block_qr - self.reference_tensor_on_diag[Vfacet] = self.assemble_reference_tensor(Vbig) - self.get_static_condensation[Vfacet] = lambda A: condense_element_mat(A, self.ises[0], self.ises[1], - self.submats, get_schur) + self.get_static_condensation[Vfacet] = Vbig, lambda A: condense_element_mat(A, self.ises[0], self.ises[1], + self.submats, get_schur) elif len(fdofs) and V.finat_element.formdegree == 0: # If we are in H(grad), we just pad with zeros on the statically-condensed pattern i1 = PETSc.IS().createGeneral(dofs, comm=PETSc.COMM_SELF) - self.get_static_condensation[V] = lambda Ae: condense_element_pattern(Ae, self.ises[0], i1, self.submats) + self.get_static_condensation[V] = Vbig, lambda Ae: condense_element_pattern(Ae, self.ises[0], i1, self.submats) @PETSc.Log.EventDecorator("FDMGetIndices") def cell_to_global(lgmap, cell_to_local, cell_index, result=None): @@ -391,8 +388,8 @@ def destroy(self, pc): if hasattr(self, "A"): objs.append(self.A) if hasattr(self, "pc"): - objs.append(self.pc.getOperators()[-1]) objs.append(self.pc) + objs.append(self.pc.getOperators()[-1]) if hasattr(self, "submats"): objs.extend(self.submats) if hasattr(self, "work_mats"): @@ -403,6 +400,24 @@ def destroy(self, pc): if hasattr(obj, "destroy"): obj.destroy() + @cached_property + def _element_mass_matrix(self): + data = self.get_coeffs(0) + data.fill(1.0E0) + shape = data.shape + (1,)*(3-len(data.shape)) + nrows = shape[0] * shape[1] + ai = numpy.arange(nrows+1, dtype=PETSc.IntType) + aj = numpy.tile(ai[:-1].reshape((-1, shape[1])), (1, shape[2])) + if shape[2] > 1: + ai *= shape[2] + data = numpy.tile(numpy.eye(shape[2], dtype=data.dtype), shape[:1] + (1,)*(len(shape)-1)) + Me = PETSc.Mat().createAIJ((nrows, nrows), bsize=shape[2], csr=(ai, aj, data), comm=PETSc.COMM_SELF) + return self.work_mats.setdefault("mass_matrix", Me) + + @cached_property + def _element_mass_diagonal(self): + return self.work_mats.setdefault("mass_diagonal", self._element_mass_matrix.getDiagonal()) + @PETSc.Log.EventDecorator("FDMSetValues") def set_values(self, A, Vrow, Vcol, addv, triu=False): """ @@ -415,96 +430,69 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): :arg addv: a `PETSc.Mat.InsertMode` :arg triu: are we assembling only the upper triangular part? """ + if self.nel == 0: + # This MPI rank does not own any elements, nothing to be done + return + + Vbig = None + condense_element_mat = lambda x: x set_submat = self.setSubMatCSR(PETSc.COMM_SELF, triu=triu) get_rindices = self.cell_to_global[Vrow] if Vrow == Vcol: - condense_element_mat = self.get_static_condensation.get(Vrow) get_cindices = lambda e, result=None: result update_A = lambda Ae, rindices, cindices: set_submat(A, Ae, rindices, rindices, addv) - # interpolators of basis and exterior derivative onto broken spaces - ctensor = self.reference_tensor_on_diag.get(Vrow) or self.assemble_reference_tensor(Vrow) - rtensor = PETSc.Mat().createTranspose(ctensor).convert(ctensor.getType()) + Vbig, condense_element_mat = self.get_static_condensation.get(Vrow, (Vbig, condense_element_mat)) else: - condense_element_mat = None get_cindices = self.cell_to_global[Vcol] update_A = lambda Ae, rindices, cindices: set_submat(A, Ae, rindices, cindices, addv) - ctensor = self.assemble_reference_tensor(Vcol) - rtensor = self.assemble_reference_tensor(Vrow, transpose=True) + Me = self._element_mass_matrix + # interpolation of basis and exterior derivative onto broken spaces + ctensor = self.assemble_reference_tensor(Vbig or Vcol) + rtensor = self.assemble_reference_tensor(Vbig or Vrow, transpose=True) # element matrix obtained via Equation (3.9) of Brubeck2022b - assemble_element_mat = lambda De, result=None: rtensor.matMatMult(De, ctensor, result=result) - - do_sort = True - if condense_element_mat is None: - condense_element_mat = lambda x: x - do_sort = False - - common_key = "coefs" - rindices = None - cindices = None - if A.getType() != PETSc.Mat.Type.PREALLOCATOR: + assemble_element_mat = partial(rtensor.matMatMult, Me, ctensor) + try: Ae = self.work_mats[Vrow, Vcol] - De = self.work_mats[common_key] - insert = PETSc.InsertMode.INSERT - work_vec = De.getDiagonal() - data = self.work_csr[2] - if len(data.shape) == 3: - @PETSc.Log.EventDecorator("FDMUpdateDiag") - def update_De(): - De.setValuesCSR(*self.work_csr, addv=insert) - De.assemble() - return De - else: - @PETSc.Log.EventDecorator("FDMUpdateDiag") - def update_De(): - De.setDiagonal(work_vec, addv=insert) - return De - data = work_vec.array_w + except KeyError: + Ae = self.work_mats.setdefault((Vrow, Vcol), assemble_element_mat()) - # Core assembly loop - for e in range(self.nel): - cindices = get_cindices(e, result=cindices) - rindices = get_rindices(e, result=rindices) - data = self.get_coeffs(e, result=data) - Ae = assemble_element_mat(update_De(), result=Ae) - update_A(condense_element_mat(Ae), rindices, cindices) - - work_vec.destroy() - - elif self.nel: - # Preallocation of the sparsity pattern - if common_key not in self.work_mats: - data = self.get_coeffs(0) - data.fill(1.0E0) - shape = data.shape + (1,)*(3-len(data.shape)) - nrows = shape[0] * shape[1] - ai = numpy.arange(nrows+1, dtype=PETSc.IntType) - aj = numpy.tile(ai[:-1].reshape((-1, shape[1])), (1, shape[2])) - if shape[2] > 1: - ai *= shape[2] - data = numpy.tile(numpy.eye(shape[2]), shape[:1] + (1,)*(len(shape)-1)) - - self.work_csr = (ai, aj, data) - De = PETSc.Mat().createAIJ((nrows, nrows), csr=self.work_csr, comm=PETSc.COMM_SELF) - self.work_mats[common_key] = De - - De = self.work_mats[common_key] - Ae = assemble_element_mat(De, result=None) - self.work_mats[Vrow, Vcol] = Ae - if do_sort: + insert = PETSc.InsertMode.INSERT + if A.getType() == PETSc.Mat.Type.PREALLOCATOR: + # Empty kernel for preallocation + if Vbig is not None: sort_interior_dofs(self.ises[0], Ae) Se = condense_element_mat(Ae) - - for e in range(self.nel): - cindices = get_cindices(e, result=cindices) - rindices = get_rindices(e, result=rindices) - update_A(Se, rindices, cindices) + element_kernel = lambda e, result=None: result + condense_element_mat = lambda Ae: Se + elif Me.getBlockSize() == 1: + # Kernel with diagonal mass matrix + diagonal = self._element_mass_diagonal + data = diagonal.array_w.reshape((-1,) + Vrow.shape) + + def element_kernel(e, result=None): + self.get_coeffs(e, result=data) + Me.setDiagonal(diagonal, addv=insert) + return assemble_element_mat(result=result) else: - self.work_csr = (None, None, None) - self.work_mats[common_key] = None - self.work_mats[Vrow, Vcol] = None - if Vcol == Vrow: - rtensor.destroy() + # Kernel with block diagonal mass matrix + ai, aj, data = Me.getValuesCSR() + data = data.reshape((-1,) + Vrow.shape * 2) + + def element_kernel(e, result=None): + self.get_coeffs(e, result=data) + Me.setValuesCSR(ai, aj, data, addv=insert) + Me.assemble() + return assemble_element_mat(result=result) + + cindices = None + rindices = None + # Core assembly loop + for e in range(self.nel): + cindices = get_cindices(e, result=cindices) + rindices = get_rindices(e, result=rindices) + Ae = element_kernel(e, result=Ae) + update_A(condense_element_mat(Ae), rindices, cindices) @PETSc.Log.EventDecorator("FDMCoefficients") def assemble_coefficients(self, J, fcp, block_diagonal=True): @@ -650,7 +638,7 @@ def assemble_reference_tensor(self, V, transpose=False): full_key = (degree, tdim, formdegree, value_size, False, False, False) if is_facet and full_key in cache: result = cache[full_key] - noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.comm) + noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.getComm()) result = result.createSubMatrix(noperm, self.ises[1]) noperm.destroy() return cache.setdefault(key, result) @@ -663,15 +651,19 @@ def assemble_reference_tensor(self, V, transpose=False): if is_interior: e0 = FIAT.RestrictedElement(e0, restriction_domain="interior") - A00 = fiat_reference_prolongator(e0, eq) - A10 = fiat_reference_prolongator(e0, e1, derivative=True) - A11 = numpy.eye(e1.space_dimension(), dtype=A00.dtype) - + comm = PETSc.COMM_SELF + A00 = petsc_sparse(fiat_reference_prolongator(e0, eq), comm=comm) + A10 = petsc_sparse(fiat_reference_prolongator(e0, e1, derivative=True), comm=comm) + A11 = petsc_sparse(numpy.eye(e1.space_dimension(), dtype=PETSc.RealType), comm=comm) B_blocks = mass_blocks(tdim, formdegree, A00, A11) A_blocks = diff_blocks(tdim, formdegree, A00, A11, A10) - result = block_mat(B_blocks + A_blocks, destroy=True) + result = block_mat(B_blocks + A_blocks, destroy_blocks=True) + A00.destroy() + A10.destroy() + A11.destroy() + if value_size != 1: - eye = petsc_sparse(numpy.eye(value_size)) + eye = petsc_sparse(numpy.eye(value_size), comm=comm) temp = result result = temp.kron(eye) temp.destroy() @@ -679,7 +671,7 @@ def assemble_reference_tensor(self, V, transpose=False): if is_facet: cache[full_key] = result - noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.comm) + noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.getComm()) result = result.createSubMatrix(noperm, self.ises[1]) noperm.destroy() @@ -1025,7 +1017,7 @@ def kron3(A, B, C, scale=None): return result -def block_mat(A_blocks, destroy=False): +def block_mat(A_blocks, destroy_blocks=False): """Return a concrete Mat corresponding to a block matrix given as a list of lists. Optionally, destroys the input Mats if a new Mat is created.""" if len(A_blocks) == 1: @@ -1035,24 +1027,19 @@ def block_mat(A_blocks, destroy=False): result = PETSc.Mat().createNest(A_blocks, comm=A_blocks[0][0].getComm()) # A nest Mat would not allow us to take matrix-matrix products result = result.convert(mat_type=A_blocks[0][0].getType()) - if destroy: + if destroy_blocks: for row in A_blocks: for mat in row: mat.destroy() return result -def mass_blocks(tdim, formdegree, B00, B11, comm=None): +def mass_blocks(tdim, formdegree, B00, B11): """Construct mass block matrix on reference cell from 1D mass matrices B00 and B11. The 1D matrices may come with different test and trial spaces.""" - if comm is None: - comm = PETSc.COMM_SELF if tdim == 1: - return [[petsc_sparse(B11 if formdegree else B00, comm=comm)]] - - B00 = petsc_sparse(B00, comm=comm) - B11 = petsc_sparse(B11, comm=comm) - if tdim == 2: + B_diag = [B11 if formdegree else B00] + elif tdim == 2: if formdegree == 0: B_diag = [B00.kron(B00)] elif formdegree == 1: @@ -1069,36 +1056,27 @@ def mass_blocks(tdim, formdegree, B00, B11, comm=None): else: B_diag = [kron3(B11, B11, B11)] - B00.destroy() - B11.destroy() n = len(B_diag) if n == 1: return [B_diag] else: - B_zero = PETSc.Mat().createAIJ(B_diag[0].getSize(), nnz=(0, 0), comm=comm) - B_zero.assemble() - return [[B_diag[i] if i == j else B_zero for j in range(n)] for i in range(n)] + zero = PETSc.Mat().createAIJ(B_diag[0].getSize(), nnz=(0, 0), comm=B_diag[0].getComm()) + zero.assemble() + return [[B_diag[i] if i == j else zero for j in range(n)] for i in range(n)] -def diff_blocks(tdim, formdegree, A00, A11, A10, comm=None): +def diff_blocks(tdim, formdegree, A00, A11, A10): """Construct exterior derivative block matrix on reference cell from 1D mass matrices A00 and A11, and exterior derivative moments A10. The 1D matrices may come with different test and trial spaces.""" - if comm is None: - comm = PETSc.COMM_SELF if formdegree == tdim: ncols = A10.shape[0]**tdim - A_zero = PETSc.Mat().createAIJ((1, ncols), nnz=(0, 0), comm=comm) - A_zero.assemble() - return [[A_zero]] - - A10 = petsc_sparse(A10, comm=comm) - if tdim == 1: - return [[A10]] - - A00 = petsc_sparse(A00, comm=comm) - A11 = petsc_sparse(A11, comm=comm) - if tdim == 2: + zero = PETSc.Mat().createAIJ((1, ncols), nnz=(0, 0), comm=A10.getComm()) + zero.assemble() + A_blocks = [[zero]] + elif tdim == 1: + A_blocks = [[A10]] + elif tdim == 2: if formdegree == 0: A_blocks = [[A00.kron(A10)], [A10.kron(A00)]] elif formdegree == 1: @@ -1109,25 +1087,23 @@ def diff_blocks(tdim, formdegree, A00, A11, A10, comm=None): A_blocks = [[kron3(A00, A00, A10)], [kron3(A00, A10, A00)], [kron3(A10, A00, A00)]] elif formdegree == 1: size = tuple(A11.getSize()[k] * A10.getSize()[k] * A00.getSize()[k] for k in range(2)) - A_zero = PETSc.Mat().createAIJ(size, nnz=(0, 0), comm=comm) - A_zero.assemble() - A_blocks = [[kron3(A00, A10, A11, scale=-1), kron3(A00, A11, A10), A_zero], - [kron3(A10, A00, A11, scale=-1), A_zero, kron3(A11, A00, A10)], - [A_zero, kron3(A10, A11, A00), kron3(A11, A10, A00, scale=-1)]] + zero = PETSc.Mat().createAIJ(size, nnz=(0, 0), comm=A10.getComm()) + zero.assemble() + A_blocks = [[kron3(A00, A10, A11, scale=-1), kron3(A00, A11, A10), zero], + [kron3(A10, A00, A11, scale=-1), zero, kron3(A11, A00, A10)], + [zero, kron3(A10, A11, A00), kron3(A11, A10, A00, scale=-1)]] elif formdegree == 2: A_blocks = [[kron3(A10, A11, A11, scale=-1), kron3(A11, A10, A11), kron3(A11, A11, A10)]] - - A00.destroy() - A11.destroy() - A10.destroy() return A_blocks -def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[]): +def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[], comm=None): """ Tabulate exterior derivative: Vc -> Vf as an explicit sparse matrix. Works for any tensor-product basis. These are the same matrices one needs for HypreAMS and friends. """ + if comm is None: + comm = Vf.comm ec = Vc.finat_element ef = Vf.finat_element if ef.formdegree - ec.formdegree != 1: @@ -1139,10 +1115,13 @@ def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[]): degree = e0.degree() tdim = Vc.mesh().topological_dimension() - A11 = numpy.eye(degree, dtype=PETSc.RealType) - A00 = numpy.eye(degree+1, dtype=PETSc.RealType) - A10 = fiat_reference_prolongator(e0, e1, derivative=True) - Dhat = block_mat(diff_blocks(tdim, ec.formdegree, A00, A11, A10), destroy=True) + A00 = petsc_sparse(numpy.eye(degree+1, dtype=PETSc.RealType), comm=PETSc.COMM_SELF) + A10 = petsc_sparse(fiat_reference_prolongator(e0, e1, derivative=True), comm=PETSc.COMM_SELF) + A11 = petsc_sparse(numpy.eye(degree, dtype=PETSc.RealType), comm=PETSc.COMM_SELF) + Dhat = block_mat(diff_blocks(tdim, ec.formdegree, A00, A11, A10), destroy_blocks=True) + A00.destroy() + A10.destroy() + A11.destroy() if any(is_restricted(ec)) or any(is_restricted(ef)): scalar_element = lambda e: e._sub_element if isinstance(e, (ufl.TensorElement, ufl.VectorElement)) else e @@ -1158,7 +1137,7 @@ def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[]): if Vf.value_size > 1: temp = Dhat - eye = petsc_sparse(numpy.eye(Vf.value_size, dtype=PETSc.RealType)) + eye = petsc_sparse(numpy.eye(Vf.value_size, dtype=PETSc.RealType), comm=PETSc.COMM_SELF) Dhat = temp.kron(eye) temp.destroy() eye.destroy() @@ -1177,7 +1156,7 @@ def cell_to_global(lgmap, cell_to_local, e, result=None): sizes = tuple(V.dof_dset.layout_vec.getSizes() for V in (Vf, Vc)) block_size = Vf.dof_dset.layout_vec.getBlockSize() - preallocator = PETSc.Mat().create(comm=Vf.comm) + preallocator = PETSc.Mat().create(comm=comm) preallocator.setType(PETSc.Mat.Type.PREALLOCATOR) preallocator.setSizes(sizes) preallocator.setUp() @@ -1192,7 +1171,7 @@ def cell_to_global(lgmap, cell_to_local, e, result=None): preallocator.assemble() nnz = get_preallocation(preallocator, sizes[0][0]) preallocator.destroy() - Dmat = PETSc.Mat().createAIJ(sizes, block_size, nnz=nnz, comm=Vf.comm) + Dmat = PETSc.Mat().createAIJ(sizes, block_size, nnz=nnz, comm=comm) Dmat.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) for e in range(nel): @@ -1285,9 +1264,9 @@ def assemble_reference_tensor(self, V): Afdm = [] # sparse interval mass and stiffness matrices for each direction Dfdm = [] # tabulation of normal derivatives at the boundary for each direction bdof = [] # indices of point evaluation dofs for each direction - cache = {} + cache = self._cache.setdefault("ipdg_reference_tensor", {}) for e in line_elements: - key = e.degree() + key = (e.degree(), eta) try: rtensor = cache[key] except KeyError: @@ -1314,11 +1293,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): condense_element_mat = lambda x: x get_rindices = self.cell_to_global[Vrow] - try: - rtensor = self.reference_tensor_on_diag[Vrow] - except KeyError: - rtensor = self.reference_tensor_on_diag.setdefault(Vrow, self.assemble_reference_tensor(Vrow)) - Afdm, Dfdm, bdof, axes_shifts = rtensor + Afdm, Dfdm, bdof, axes_shifts = self.assemble_reference_tensor(Vrow) Gq = self.coefficients.get("alpha") Bq = self.coefficients.get("beta") diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index c6b482b5ee..0d3544f26c 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -203,7 +203,7 @@ def _coarsen_form(a): return a cJ = _coarsen_form(fctx.J) - cJp = _coarsen_form(fctx.Jp) + cJp = cJ if fctx.Jp is fctx.J else _coarsen_form(fctx.Jp) # This fixes a subtle bug where you are applying PMGPC on a mixed # problem with geometric multigrid only on one block and an non-Lagrange element # on the other block (gmg breaks for non-Lagrange elements) @@ -268,20 +268,20 @@ def _coarsen_form(a): if cu in cJ.coefficients(): # Only inject state if the coarse state is a dependency of the coarse Jacobian. - inject_petscmat = cdm.createInjection(fdm) + inject = cdm.createInjection(fdm) def inject_state(): with cu.dat.vec_wo as xc, fu.dat.vec_ro as xf: - inject_petscmat.mult(xf, xc) + inject.mult(xf, xc) add_hook(parent, setup=inject_state, call_setup=True) # Coarsen the nullspace basis - def coarsen_nullspace(coarse_V, mat, fine_nullspace): + def coarsen_nullspace(coarse_V, interpolate, fine_nullspace): if isinstance(fine_nullspace, MixedVectorSpaceBasis): - if mat.type == 'python': - mat = mat.getPythonContext() - submats = [mat.getNestSubMatrix(i, i) for i in range(len(coarse_V))] + if interpolate.getType() == "python": + interpolate = interpolate.getPythonContext() + submats = [interpolate.getNestSubMatrix(i, i) for i in range(len(coarse_V))] coarse_bases = [] for fs, submat, basis in zip(coarse_V, submats, fine_nullspace._bases): if isinstance(basis, VectorSpaceBasis): @@ -294,10 +294,7 @@ def coarsen_nullspace(coarse_V, mat, fine_nullspace): for xf in fine_nullspace._petsc_vecs: wc = firedrake.Function(coarse_V) with wc.dat.vec_wo as xc: - if mat.getSize()[1] == xf.getSize(): - mat.mult(xf, xc) - else: - mat.multTranspose(xf, xc) + interpolate.multTranspose(xf, xc) coarse_vecs.append(wc) vsb = VectorSpaceBasis(coarse_vecs, constant=fine_nullspace._constant) vsb.orthonormalize() @@ -305,16 +302,24 @@ def coarsen_nullspace(coarse_V, mat, fine_nullspace): else: return fine_nullspace - if fctx._nullspace or fctx._near_nullspace or fctx._nullspace_T: - interp_petscmat, _ = cdm.createInterpolation(fdm) + interpolate = None + if fctx._nullspace or fctx._nullspace_T or fctx._near_nullspace: + interpolate, _ = cdm.createInterpolation(fdm) + cctx._nullspace = coarsen_nullspace(cV, interpolate, fctx._nullspace) + if fctx._nullspace_T is fctx._nullspace: + cctx._nullspace_T = cctx._nullspace else: - interp_petscmat = None - cctx._nullspace = coarsen_nullspace(cV, interp_petscmat, fctx._nullspace) + cctx._nullspace_T = coarsen_nullspace(cV, interpolate, fctx._nullspace_T) + if fctx._near_nullspace is fctx._nullspace: + cctx._near_nullspace = cctx._nullspace + elif fctx._near_nullspace is fctx._nullspace_T: + cctx._near_nullspace = cctx._nullspace_T + else: + cctx._near_nullspace = coarsen_nullspace(cV, interpolate, fctx._near_nullspace) + cctx.set_nullspace(cctx._nullspace, cV._ises, transpose=False, near=False) - cctx._near_nullspace = coarsen_nullspace(cV, interp_petscmat, fctx._near_nullspace) - cctx.set_nullspace(cctx._near_nullspace, cV._ises, transpose=False, near=True) - cctx._nullspace_T = coarsen_nullspace(cV, interp_petscmat, fctx._nullspace_T) cctx.set_nullspace(cctx._nullspace_T, cV._ises, transpose=True, near=False) + cctx.set_nullspace(cctx._near_nullspace, cV._ises, transpose=False, near=True) return cdm def coarsen_quadrature(self, metadata, fdeg, cdeg): From 031e95017468a34280d9f2167dc74bdd34ee52f5 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 3 Apr 2023 07:54:00 +0100 Subject: [PATCH 56/75] homogenize IPDG 3D tests --- firedrake/preconditioners/fdm.py | 238 ++++++++++++++++--------------- tests/regression/test_fdm.py | 71 +++++---- 2 files changed, 163 insertions(+), 146 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index ef6a328a35..058d4ddeac 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -234,22 +234,25 @@ def allocate_matrix(self, V, J, bcs, fcp, pmat_type, use_static_condensation): fdofs = numpy.add.outer(value_size * fdofs, numpy.arange(value_size, dtype=fdofs.dtype)) dofs = numpy.arange(value_size * Vbig.finat_element.space_dimension(), dtype=fdofs.dtype) idofs = numpy.setdiff1d(dofs, fdofs, assume_unique=True) - self.ises = tuple(PETSc.IS().createGeneral(indices, comm=PETSc.COMM_SELF) for indices in (idofs, fdofs)) - self.submats = [None for _ in range(7)] + self.ises = [PETSc.IS().createGeneral(indices, comm=PETSc.COMM_SELF) for indices in (idofs, fdofs)] + self.submats = [None for _ in range(6)] # Dictionary with the parent space and a method to form the Schur complement self.get_static_condensation = {} if Vfacet and use_static_condensation: # If we are in a facet space, we build the Schur complement on its diagonal block - diagonal_interior = Vfacet.finat_element.formdegree == 0 and value_size == 1 - get_schur = schur_complement_diagonal if diagonal_interior else schur_complement_block_qr - self.get_static_condensation[Vfacet] = Vbig, lambda A: condense_element_mat(A, self.ises[0], self.ises[1], - self.submats, get_schur) - + if Vfacet.finat_element.formdegree == 0 and value_size == 1: + default_schur = schur_complement_diagonal + elif pmat_type.endswith("sbaij"): + default_schur = schur_complement_block_cholesky + else: + default_schur = schur_complement_block_qr + self.get_static_condensation[Vfacet] = Vbig, partial(condense_element_mat, default_schur, + self.ises[0], self.ises[1], self.submats) elif len(fdofs) and V.finat_element.formdegree == 0: # If we are in H(grad), we just pad with zeros on the statically-condensed pattern - i1 = PETSc.IS().createGeneral(dofs, comm=PETSc.COMM_SELF) - self.get_static_condensation[V] = Vbig, lambda Ae: condense_element_pattern(Ae, self.ises[0], i1, self.submats) + self.ises.append(PETSc.IS().createGeneral(dofs, comm=PETSc.COMM_SELF)) + self.get_static_condensation[V] = Vbig, partial(condense_element_pattern, self.ises[0], self.ises[2], self.submats) @PETSc.Log.EventDecorator("FDMGetIndices") def cell_to_global(lgmap, cell_to_local, cell_index, result=None): @@ -269,7 +272,7 @@ def cell_to_global(lgmap, cell_to_local, cell_index, result=None): self.lgmaps[Vsub] = lgmap own = Vsub.dof_dset.layout_vec.getLocalSize() - bdofs = numpy.nonzero(lgmap.indices[:own] < 0)[0].astype(PETSc.IntType) + bdofs = numpy.flatnonzero(lgmap.indices[:own] < 0).astype(PETSc.IntType) bc_rows[Vsub] = Vsub.dof_dset.lgmap.apply(bdofs, result=bdofs) self.nel = nel @@ -393,11 +396,11 @@ def destroy(self, pc): if hasattr(self, "submats"): objs.extend(self.submats) if hasattr(self, "work_mats"): - objs.extend(list(self.work_mats.values())) + objs.extend(self.work_mats.values()) if hasattr(self, "ises"): objs.extend(self.ises) for obj in objs: - if hasattr(obj, "destroy"): + if isinstance(obj, PETSc.Object): obj.destroy() @cached_property @@ -434,8 +437,11 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): # This MPI rank does not own any elements, nothing to be done return + def get_key(*args): + return tuple(map(lambda V: V.ufl_element() if V else None, args)) + Vbig = None - condense_element_mat = lambda x: x + condense_element_mat = lambda Ae, result=None: Ae set_submat = self.setSubMatCSR(PETSc.COMM_SELF, triu=triu) get_rindices = self.cell_to_global[Vrow] if Vrow == Vcol: @@ -447,24 +453,31 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): update_A = lambda Ae, rindices, cindices: set_submat(A, Ae, rindices, cindices, addv) Me = self._element_mass_matrix - # interpolation of basis and exterior derivative onto broken spaces + # Interpolation of basis and exterior derivative onto broken spaces ctensor = self.assemble_reference_tensor(Vbig or Vcol) rtensor = self.assemble_reference_tensor(Vbig or Vrow, transpose=True) - # element matrix obtained via Equation (3.9) of Brubeck2022b + # Element matrix obtained via Equation (3.9) of Brubeck2022b assemble_element_mat = partial(rtensor.matMatMult, Me, ctensor) + # Preallocate the element matrix + key = get_key(Vbig or Vrow, Vbig or Vcol, None) try: - Ae = self.work_mats[Vrow, Vcol] + Ae = self.work_mats[key] except KeyError: - Ae = self.work_mats.setdefault((Vrow, Vcol), assemble_element_mat()) + Ae = self.work_mats.setdefault(key, assemble_element_mat()) + # Preallocate the element Schur complement + key = get_key(Vrow, Vcol, Vbig) + try: + Se = self.work_mats[key] + except KeyError: + sort_interior_dofs(self.ises[0], Ae) + Se = self.work_mats.setdefault(key, condense_element_mat(Ae)) insert = PETSc.InsertMode.INSERT if A.getType() == PETSc.Mat.Type.PREALLOCATOR: # Empty kernel for preallocation - if Vbig is not None: - sort_interior_dofs(self.ises[0], Ae) - Se = condense_element_mat(Ae) element_kernel = lambda e, result=None: result - condense_element_mat = lambda Ae: Se + condense_element_mat = lambda Ae, result=None: result + elif Me.getBlockSize() == 1: # Kernel with diagonal mass matrix diagonal = self._element_mass_diagonal @@ -492,7 +505,8 @@ def element_kernel(e, result=None): cindices = get_cindices(e, result=cindices) rindices = get_rindices(e, result=rindices) Ae = element_kernel(e, result=Ae) - update_A(condense_element_mat(Ae), rindices, cindices) + Se = condense_element_mat(Ae, result=Se) + update_A(Se, rindices, cindices) @PETSc.Log.EventDecorator("FDMCoefficients") def assemble_coefficients(self, J, fcp, block_diagonal=True): @@ -578,30 +592,25 @@ def assemble_coefficients(self, J, fcp, block_diagonal=True): Jcell = expand_indices(expand_derivatives(ufl.Form(J.integrals_by_type("cell")))) mixed_form = ufl.replace(ufl.replace(Jcell, repgrad), repargs) - # Return coefficients and assembly callables, and cache them class - key = (mixed_form.signature(), mesh) - cache = self._cache.setdefault("coefficients", {}) - try: - return cache[key] - except KeyError: - if block_diagonal and V.shape: - from firedrake.assemble import assemble - M = assemble(mixed_form, mat_type="matfree", - form_compiler_parameters=fcp) - coefficients = {} - assembly_callables = [] - for iset, name in zip(Z.dof_dset.field_ises, ("beta", "alpha")): - sub = M.petscmat.createSubMatrix(iset, iset) - ctx = sub.getPythonContext() - coefficients[name] = ctx._block_diagonal - assembly_callables.append(ctx._assemble_block_diagonal) - else: - from firedrake.assemble import OneFormAssembler - tensor = Function(Z) - coefficients = {"beta": tensor.sub(0), "alpha": tensor.sub(1)} - assembly_callables = [OneFormAssembler(mixed_form, tensor=tensor, diagonal=True, - form_compiler_parameters=fcp).assemble] - return cache.setdefault(key, (coefficients, assembly_callables)) + # Return coefficients and assembly callables + coefficients = {} + assembly_callables = [] + if block_diagonal and V.shape: + from firedrake.assemble import assemble + M = assemble(mixed_form, mat_type="matfree", form_compiler_parameters=fcp) + for iset, name in zip(Z.dof_dset.field_ises, ("beta", "alpha")): + sub = M.petscmat.createSubMatrix(iset, iset) + ctx = sub.getPythonContext() + coefficients[name] = ctx._block_diagonal + assembly_callables.append(ctx._assemble_block_diagonal) + else: + from firedrake.assemble import OneFormAssembler + tensor = Function(Z) + coefficients["beta"] = tensor.subfunctions[0] + coefficients["alpha"] = tensor.subfunctions[1] + assembly_callables.append(OneFormAssembler(mixed_form, tensor=tensor, diagonal=True, + form_compiler_parameters=fcp).assemble) + return coefficients, assembly_callables @PETSc.Log.EventDecorator("FDMRefTensor") def assemble_reference_tensor(self, V, transpose=False): @@ -651,10 +660,9 @@ def assemble_reference_tensor(self, V, transpose=False): if is_interior: e0 = FIAT.RestrictedElement(e0, restriction_domain="interior") - comm = PETSc.COMM_SELF - A00 = petsc_sparse(fiat_reference_prolongator(e0, eq), comm=comm) - A10 = petsc_sparse(fiat_reference_prolongator(e0, e1, derivative=True), comm=comm) - A11 = petsc_sparse(numpy.eye(e1.space_dimension(), dtype=PETSc.RealType), comm=comm) + A00 = petsc_sparse(fiat_reference_prolongator(e0, eq), comm=PETSc.COMM_SELF) + A10 = petsc_sparse(fiat_reference_prolongator(e0, e1, derivative=True), comm=PETSc.COMM_SELF) + A11 = petsc_sparse(numpy.eye(e1.space_dimension(), dtype=PETSc.RealType), comm=PETSc.COMM_SELF) B_blocks = mass_blocks(tdim, formdegree, A00, A11) A_blocks = diff_blocks(tdim, formdegree, A00, A11, A10) result = block_mat(B_blocks + A_blocks, destroy_blocks=True) @@ -663,7 +671,7 @@ def assemble_reference_tensor(self, V, transpose=False): A11.destroy() if value_size != 1: - eye = petsc_sparse(numpy.eye(value_size), comm=comm) + eye = petsc_sparse(numpy.eye(value_size), comm=result.getComm()) temp = result result = temp.kron(eye) temp.destroy() @@ -679,26 +687,26 @@ def assemble_reference_tensor(self, V, transpose=False): @PETSc.Log.EventDecorator("FDMGetSchur") -def schur_complement_diagonal(submats): +def schur_complement_diagonal(submats, result=None): """ Used in static condensation. Take in blocks A00, A01, A10, A11, return the Schur complement A11 - A10 * inv(A00) * A01. Assumes A00 is diagonal. """ - structure = PETSc.Mat.Structure.SUBSET if submats[-1] else None + structure = PETSc.Mat.Structure.SUBSET if result else None A00, A01, A10, A11 = submats[:4] submats[4] = A00.getDiagonal(result=submats[4]) submats[4].reciprocal() submats[4].scale(-1) A01.diagonalScale(L=submats[4]) - submats[-1] = A10.matMult(A01, result=submats[-1]) - submats[-1].axpy(1.0, A11, structure=structure) - return submats[-1] + result = A10.matMult(A01, result=result) + result.axpy(1.0, A11, structure=structure) + return result @PETSc.Log.EventDecorator("FDMGetSchur") -def schur_complement_block_inv(submats): +def schur_complement_block_inv(submats, result=None): """ Used in static condensation. Take in blocks A00, A01, A10, A11, return A11 - A10 * inv(A00) * A01. @@ -706,17 +714,16 @@ def schur_complement_block_inv(submats): Assumes that interior DOFs have been reordered to make A00 block diagonal with blocks of increasing dimension. """ - structure = PETSc.Mat.Structure.SUBSET if submats[-1] else None + structure = PETSc.Mat.Structure.SUBSET if result else None A00, A01, A10, A11 = submats[:4] indptr, indices, R = A00.getValuesCSR() - degree = numpy.diff(indptr) - - nblocks = numpy.count_nonzero(degree == 1) + degree, counts = numpy.unique(numpy.diff(indptr), return_counts=True) + istart = degree[0] == 1 + nblocks = counts[0] if istart else 0 zlice = slice(0, nblocks) numpy.reciprocal(R[zlice], out=R[zlice]) flops = nblocks - for k in range(2, degree[-1]+1): - nblocks = numpy.count_nonzero(degree == k) + for k, nblocks in zip(degree[istart:], counts[istart:]): zlice = slice(zlice.stop, zlice.stop + k*nblocks) A = R[zlice].reshape((-1, k, k)) R[zlice] = numpy.linalg.inv(A).reshape((-1,)) @@ -726,13 +733,13 @@ def schur_complement_block_inv(submats): A00.setValuesCSR(indptr, indices, R) A00.assemble() A00.scale(-1.0) - submats[-1] = A10.matMatMult(A00, A01, result=submats[-1]) - submats[-1].axpy(1.0, A11, structure=structure) - return submats[-1] + result = A10.matMatMult(A00, A01, result=result) + result.axpy(1.0, A11, structure=structure) + return result @PETSc.Log.EventDecorator("FDMGetSchur") -def schur_complement_block_cholesky(submats): +def schur_complement_block_cholesky(submats, result=None): """ Used in static condensation. Take in blocks A00, A01, A10, A11, return A11 - A10 * inv(A00) * A01. @@ -740,18 +747,17 @@ def schur_complement_block_cholesky(submats): Assumes that interior DOFs have been reordered to make A00 block diagonal with blocks of increasing dimension. """ - structure = PETSc.Mat.Structure.SUBSET if submats[-1] else None + structure = PETSc.Mat.Structure.SUBSET if result else None A00, A01, A10, A11 = submats[:4] indptr, indices, R = A00.getValuesCSR() - degree = numpy.diff(indptr) - - nblocks = numpy.count_nonzero(degree == 1) + degree, counts = numpy.unique(numpy.diff(indptr), return_counts=True) + istart = degree[0] == 1 + nblocks = counts[0] if istart else 0 zlice = slice(0, nblocks) numpy.sqrt(R[zlice], out=R[zlice]) numpy.reciprocal(R[zlice], out=R[zlice]) flops = 2*nblocks - for k in range(2, degree[-1]+1): - nblocks = numpy.count_nonzero(degree == k) + for k, nblocks in zip(degree[istart:], counts[istart:]): zlice = slice(zlice.stop, zlice.stop + k*nblocks) A = R[zlice].reshape((-1, k, k)) R[zlice] = numpy.linalg.inv(numpy.linalg.cholesky(A)).reshape((-1)) @@ -762,13 +768,13 @@ def schur_complement_block_cholesky(submats): A00.assemble() submats[4] = A10.matTransposeMult(A00, result=submats[4]) A00.scale(-1.0) - submats[-1] = submats[4].matMatMult(A00, A01, result=submats[-1]) - submats[-1].axpy(1.0, A11, structure=structure) - return submats[-1] + result = submats[4].matMatMult(A00, A01, result=result) + result.axpy(1.0, A11, structure=structure) + return result @PETSc.Log.EventDecorator("FDMGetSchur") -def schur_complement_block_qr(submats): +def schur_complement_block_qr(submats, result=None): """ Used in static condensation. Take in blocks A00, A01, A10, A11, return A11 - A10 * inv(A00) * A01. @@ -776,18 +782,18 @@ def schur_complement_block_qr(submats): Assumes that interior DOFs have been reordered to make A00 block diagonal with blocks of increasing dimension. """ - structure = PETSc.Mat.Structure.SUBSET if submats[-1] else None + structure = PETSc.Mat.Structure.SUBSET if result else None A00, A01, A10, A11 = submats[:4] indptr, indices, R = A00.getValuesCSR() - degree = numpy.diff(indptr) Q = numpy.ones(R.shape, dtype=R.dtype) - nblocks = numpy.count_nonzero(degree == 1) + degree, counts = numpy.unique(numpy.diff(indptr), return_counts=True) + istart = degree[0] == 1 + nblocks = counts[0] if istart else 0 zlice = slice(0, nblocks) numpy.reciprocal(R[zlice], out=R[zlice]) flops = nblocks - for k in range(2, degree[-1]+1): - nblocks = numpy.count_nonzero(degree == k) + for k, nblocks in zip(degree[istart:], counts[istart:]): zlice = slice(zlice.stop, zlice.stop + k*nblocks) A = R[zlice].reshape((-1, k, k)) q, r = numpy.linalg.qr(A, mode="complete") @@ -802,13 +808,13 @@ def schur_complement_block_qr(submats): A00.setValuesCSR(indptr, indices, R) A00.assemble() A00.scale(-1.0) - submats[-1] = A10.matMatMult(A00, submats[4], result=submats[-1]) - submats[-1].axpy(1.0, A11, structure=structure) - return submats[-1] + result = A10.matMatMult(A00, submats[4], result=result) + result.axpy(1.0, A11, structure=structure) + return result @PETSc.Log.EventDecorator("FDMGetSchur") -def schur_complement_block_svd(submats): +def schur_complement_block_svd(submats, result=None): """ Used in static condensation. Take in blocks A00, A01, A10, A11, return A11 - A10 * inv(A00) * A01. @@ -816,22 +822,21 @@ def schur_complement_block_svd(submats): Assumes that interior DOFs have been reordered to make A00 block diagonal with blocks of increasing dimension. """ - structure = PETSc.Mat.Structure.SUBSET if submats[-1] else None + structure = PETSc.Mat.Structure.SUBSET if result else None A00, A01, A10, A11 = submats[:4] indptr, indices, U = A00.getValuesCSR() - degree = numpy.diff(indptr) V = numpy.ones(U.shape, dtype=U.dtype) submats[4] = A00.getDiagonal(result=submats[4]) D = submats[4] - nblocks = numpy.count_nonzero(degree == 1) + degree, counts = numpy.unique(numpy.diff(indptr), return_counts=True) + istart = degree[0] == 1 + nblocks = counts[0] if istart else 0 bslice = slice(0, nblocks) dslice = slice(0, nblocks) numpy.sign(D.array_r[dslice], out=U[bslice]) - flops = nblocks - for k in range(2, degree[-1]+1): - nblocks = numpy.count_nonzero(degree == k) + for k, nblocks in zip(degree[istart:], counts[istart:]): bslice = slice(bslice.stop, bslice.stop + k*nblocks) dslice = slice(dslice.stop, dslice.stop + nblocks) A = U[bslice].reshape((-1, k, k)) @@ -853,32 +858,32 @@ def schur_complement_block_svd(submats): A00.setValuesCSR(indptr, indices, U) A00.assemble() A00.diagonalScale(L=D) - submats[-1] = submats[5].matMatMult(A00, A01, result=submats[-1]) - submats[-1].axpy(1.0, A11, structure=structure) - return submats[-1] + result = submats[5].matMatMult(A00, A01, result=result) + result.axpy(1.0, A11, structure=structure) + return result @PETSc.Log.EventDecorator("FDMCondense") -def condense_element_mat(A, i0, i1, submats, get_schur_complement): +def condense_element_mat(get_schur_complement, i0, i1, submats, A, result=None): """Return the Schur complement associated to indices in i1, condensing i0 out""" isrows = [i0, i0, i1, i1] iscols = [i0, i1, i0, i1] submats[:4] = A.createSubMatrices(isrows, iscols=iscols, submats=submats[:4] if submats[0] else None) - return get_schur_complement(submats) + return get_schur_complement(submats, result=result) @PETSc.Log.EventDecorator("FDMCondense") -def condense_element_pattern(A, i0, i1, submats): +def condense_element_pattern(i0, i1, submats, A, result=None): """Add zeroes on the statically condensed pattern so that you can run ICC(0)""" + structure = PETSc.Mat.Structure.SUBSET if result else None isrows = [i0, i0, i1] iscols = [i0, i1, i0] - structure = PETSc.Mat.Structure.SUBSET if submats[3] else None submats[:3] = A.createSubMatrices(isrows, iscols=iscols, submats=submats[:3] if submats[0] else None) A00, A01, A10 = submats[:3] A00.scale(0.0) - submats[3] = A10.matMatMult(A00, A01, result=submats[3]) - submats[3].axpy(1.0, A, structure=structure) - return submats[3] + result = A10.matMatMult(A00, A01, result=result) + result.axpy(1.0, A, structure=structure) + return result @PETSc.Log.EventDecorator("LoadCode") @@ -900,7 +905,7 @@ def get_pointer(obj): @PETSc.Log.EventDecorator(name) def wrapper(*args): - return funptr(*list(map(get_pointer, args))) + return funptr(*map(get_pointer, args)) return wrapper @@ -978,17 +983,18 @@ def sort_interior_dofs(idofs, A): increasing dimension along its diagonal.""" Aii = A.createSubMatrix(idofs, idofs) indptr, indices, _ = Aii.getValuesCSR() - n = idofs.getSize() - visit = numpy.zeros((n, ), dtype=bool) + degree = numpy.diff(indptr) perm = [] - degree = 0 - while not visit.all(): - degree += 1 - for i in range(n): - if not visit[i]: - neigh = indices[slice(*indptr[i:i+2])] - if len(neigh) == degree: - visit[neigh] = True + for k in sorted(numpy.unique(degree)): + if k == 1: + neigh = numpy.flatnonzero(degree == k) + degree[neigh] = 0 + perm.extend(neigh) + else: + for i in range(len(degree)): + if degree[i] == k: + neigh = indices[slice(*indptr[i:i+2])] + degree[neigh] = 0 perm.extend(neigh) idofs.setIndices(idofs.getIndices()[perm]) Aii.destroy() @@ -1127,9 +1133,9 @@ def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[], comm=None): scalar_element = lambda e: e._sub_element if isinstance(e, (ufl.TensorElement, ufl.VectorElement)) else e fdofs = restricted_dofs(ef, create_element(unrestrict_element(scalar_element(Vf.ufl_element())))) cdofs = restricted_dofs(ec, create_element(unrestrict_element(scalar_element(Vc.ufl_element())))) - fises = PETSc.IS().createGeneral(fdofs, comm=PETSc.COMM_SELF) - cises = PETSc.IS().createGeneral(cdofs, comm=PETSc.COMM_SELF) temp = Dhat + fises = PETSc.IS().createGeneral(fdofs, comm=temp.getComm()) + cises = PETSc.IS().createGeneral(cdofs, comm=temp.getComm()) Dhat = temp.createSubMatrix(fises, cises) temp.destroy() fises.destroy() @@ -1137,7 +1143,7 @@ def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[], comm=None): if Vf.value_size > 1: temp = Dhat - eye = petsc_sparse(numpy.eye(Vf.value_size, dtype=PETSc.RealType), comm=PETSc.COMM_SELF) + eye = petsc_sparse(numpy.eye(Vf.value_size, dtype=PETSc.RealType), comm=temp.getComm()) Dhat = temp.kron(eye) temp.destroy() eye.destroy() diff --git a/tests/regression/test_fdm.py b/tests/regression/test_fdm.py index 1104c8bb91..42b4319eab 100644 --- a/tests/regression/test_fdm.py +++ b/tests/regression/test_fdm.py @@ -226,20 +226,26 @@ def test_ipdg_direct_solver(fs): x = SpatialCoordinate(mesh) gdim = mesh.geometric_dimension() ncomp = fs.ufl_element().value_size() - u_exact = dot(x, x) - if ncomp: - u_exact = as_vector([u_exact + Constant(k) for k in range(ncomp)]) + + homogenize = gdim > 2 + if homogenize: + rg = RandomGenerator(PCG64(seed=123456789)) + uh = rg.uniform(fs, -1, 1) + u_exact = zero(uh.ufl_shape) + u_bc = 0 + else: + uh = Function(fs) + u_exact = dot(x, x) + if ncomp: + u_exact = as_vector([u_exact + Constant(k) for k in range(ncomp)]) + u_bc = u_exact degree = fs.ufl_element().degree() try: - degree, = set(degree) + degree = max(degree) except TypeError: pass - quad_degree = 2*(degree+1)-1 - uh = Function(fs) - u = TrialFunction(fs) - v = TestFunction(fs) # problem coefficients A1 = diag(Constant(range(1, gdim+1))) @@ -247,19 +253,13 @@ def test_ipdg_direct_solver(fs): alpha = lambda grad_u: dot(dot(A2, grad_u), A1) beta = diag(Constant(range(2, ncomp+2))) - n = FacetNormal(mesh) - f_exact = alpha(grad(u_exact)) - B = dot(beta, u_exact) - div(f_exact) - T = dot(f_exact, n) - extruded = mesh.cell_set._extruded subs = (1,) if gdim > 1: subs += (3,) if extruded: subs += ("top",) - - bcs = [DirichletBC(fs, u_exact, sub) for sub in subs] + bcs = [DirichletBC(fs, u_bc, sub) for sub in subs] dirichlet_ids = subs if "on_boundary" in dirichlet_ids: @@ -287,24 +287,31 @@ def test_ipdg_direct_solver(fs): ds_Dir = sum(ds_Dir, ds(tuple())) ds_Neu = sum(ds_Neu, ds(tuple())) + n = FacetNormal(mesh) + h = CellVolume(mesh) / FacetArea(mesh) eta = Constant((degree+1)**2) - h = CellVolume(mesh)/FacetArea(mesh) - penalty = eta/h + penalty = eta / h - outer_jump = lambda w, n: outer(w("+"), n("+")) + outer(w("-"), n("-")) - num_flux = lambda w: alpha(avg(penalty/2) * outer_jump(w, n)) - num_flux_b = lambda w: alpha((penalty/2) * outer(w, n)) + num_flux = lambda u: avg(penalty) * avg(outer(u, n)) + num_flux_b = lambda u: (penalty/2) * outer(u, n) + alpha_inner = lambda v, u: inner(v, alpha(u)) - a = (inner(v, dot(beta, u)) * dxq - + inner(grad(v), alpha(grad(u))) * dxq - + inner(outer_jump(v, n), num_flux(u)-avg(alpha(grad(u)))) * dS_int - + inner(outer_jump(u, n), num_flux(v)-avg(alpha(grad(v)))) * dS_int - + inner(outer(v, n), num_flux_b(u)-alpha(grad(u))) * ds_Dir - + inner(outer(u, n), num_flux_b(v)-alpha(grad(v))) * ds_Dir) + a_int = lambda v, u: alpha_inner(2 * avg(outer(v, n)), num_flux(u) - avg(grad(u))) * dS_int + a_Dir = lambda v, u: alpha_inner(outer(v, n), num_flux_b(u) - grad(u)) * ds_Dir - L = (inner(v, B)*dxq - + inner(v, T)*ds_Neu - + inner(outer(u_exact, n), 2*num_flux_b(v)-alpha(grad(v))) * ds_Dir) + u = TrialFunction(fs) + v = TestFunction(fs) + a = ((inner(v, dot(beta, u)) + alpha_inner(grad(v), grad(u))) * dxq + + a_int(v, u) + a_int(u, v) + a_Dir(v, u) + a_Dir(u, v)) + + if homogenize: + L = 0 + else: + f_exact = alpha(grad(u_exact)) + B = dot(beta, u_exact) - div(f_exact) + T = dot(f_exact, n) + L = (inner(v, B)*dxq + inner(v, T)*ds_Neu + + alpha_inner(outer(u_exact, n), 2*num_flux_b(v) - grad(v)) * ds_Dir) problem = LinearVariationalProblem(a, L, uh, bcs=bcs) solver = LinearVariationalSolver(problem, solver_parameters={ @@ -324,4 +331,8 @@ def test_ipdg_direct_solver(fs): solver.solve() assert solver.snes.ksp.getIterationNumber() == 1 - assert norm(u_exact-uh, "H1") < 1.0E-8 + if homogenize: + with uh.dat.vec_ro as uvec: + assert uvec.norm() < 1E-8 + else: + assert norm(u_exact-uh, "H1") < 1.0E-8 From 24dbf065fb7eb4a2c2a0ca1615c351239254cbe2 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 3 Apr 2023 08:56:02 +0100 Subject: [PATCH 57/75] cast bool to int --- firedrake/preconditioners/fdm.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 058d4ddeac..922d3eb23e 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -718,7 +718,7 @@ def schur_complement_block_inv(submats, result=None): A00, A01, A10, A11 = submats[:4] indptr, indices, R = A00.getValuesCSR() degree, counts = numpy.unique(numpy.diff(indptr), return_counts=True) - istart = degree[0] == 1 + istart = int(degree[0] == 1) nblocks = counts[0] if istart else 0 zlice = slice(0, nblocks) numpy.reciprocal(R[zlice], out=R[zlice]) @@ -751,7 +751,7 @@ def schur_complement_block_cholesky(submats, result=None): A00, A01, A10, A11 = submats[:4] indptr, indices, R = A00.getValuesCSR() degree, counts = numpy.unique(numpy.diff(indptr), return_counts=True) - istart = degree[0] == 1 + istart = int(degree[0] == 1) nblocks = counts[0] if istart else 0 zlice = slice(0, nblocks) numpy.sqrt(R[zlice], out=R[zlice]) @@ -788,7 +788,7 @@ def schur_complement_block_qr(submats, result=None): Q = numpy.ones(R.shape, dtype=R.dtype) degree, counts = numpy.unique(numpy.diff(indptr), return_counts=True) - istart = degree[0] == 1 + istart = int(degree[0] == 1) nblocks = counts[0] if istart else 0 zlice = slice(0, nblocks) numpy.reciprocal(R[zlice], out=R[zlice]) @@ -830,7 +830,7 @@ def schur_complement_block_svd(submats, result=None): D = submats[4] degree, counts = numpy.unique(numpy.diff(indptr), return_counts=True) - istart = degree[0] == 1 + istart = int(degree[0] == 1) nblocks = counts[0] if istart else 0 bslice = slice(0, nblocks) dslice = slice(0, nblocks) From 2747d63798d74199c40c79b2781fc7e40e78d971 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 3 Apr 2023 14:26:02 +0100 Subject: [PATCH 58/75] SchurComplementBuilder class --- firedrake/preconditioners/fdm.py | 471 +++++++++++++++---------------- tests/regression/test_fdm.py | 15 +- 2 files changed, 240 insertions(+), 246 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 922d3eb23e..5b4d4e8c61 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -232,27 +232,24 @@ def allocate_matrix(self, V, J, bcs, fcp, pmat_type, use_static_condensation): value_size = Vbig.value_size if value_size != 1: fdofs = numpy.add.outer(value_size * fdofs, numpy.arange(value_size, dtype=fdofs.dtype)) + self.fises = PETSc.IS().createGeneral(fdofs, comm=PETSc.COMM_SELF) dofs = numpy.arange(value_size * Vbig.finat_element.space_dimension(), dtype=fdofs.dtype) idofs = numpy.setdiff1d(dofs, fdofs, assume_unique=True) - self.ises = [PETSc.IS().createGeneral(indices, comm=PETSc.COMM_SELF) for indices in (idofs, fdofs)] - self.submats = [None for _ in range(6)] # Dictionary with the parent space and a method to form the Schur complement self.get_static_condensation = {} if Vfacet and use_static_condensation: # If we are in a facet space, we build the Schur complement on its diagonal block if Vfacet.finat_element.formdegree == 0 and value_size == 1: - default_schur = schur_complement_diagonal + sc_builder = SchurComplementDiagonal elif pmat_type.endswith("sbaij"): - default_schur = schur_complement_block_cholesky + sc_builder = SchurComplementBlockCholesky else: - default_schur = schur_complement_block_qr - self.get_static_condensation[Vfacet] = Vbig, partial(condense_element_mat, default_schur, - self.ises[0], self.ises[1], self.submats) + sc_builder = SchurComplementBlockQR + self.get_static_condensation[Vfacet] = Vbig, sc_builder(idofs, fdofs, PETSc.COMM_SELF).condense elif len(fdofs) and V.finat_element.formdegree == 0: # If we are in H(grad), we just pad with zeros on the statically-condensed pattern - self.ises.append(PETSc.IS().createGeneral(dofs, comm=PETSc.COMM_SELF)) - self.get_static_condensation[V] = Vbig, partial(condense_element_pattern, self.ises[0], self.ises[2], self.submats) + self.get_static_condensation[V] = Vbig, SchurComplementPattern(idofs, dofs, PETSc.COMM_SELF).condense @PETSc.Log.EventDecorator("FDMGetIndices") def cell_to_global(lgmap, cell_to_local, cell_index, result=None): @@ -387,21 +384,14 @@ def view(self, pc, viewer=None): self.pc.view(viewer) def destroy(self, pc): - objs = [] if hasattr(self, "A"): - objs.append(self.A) + self.A.petscmat.destroy() if hasattr(self, "pc"): - objs.append(self.pc) - objs.append(self.pc.getOperators()[-1]) - if hasattr(self, "submats"): - objs.extend(self.submats) + self.pc.getOperators()[-1].destroy() + self.pc.destroy() if hasattr(self, "work_mats"): - objs.extend(self.work_mats.values()) - if hasattr(self, "ises"): - objs.extend(self.ises) - for obj in objs: - if isinstance(obj, PETSc.Object): - obj.destroy() + for mat in self.work_mats.values(): + mat.destroy() @cached_property def _element_mass_matrix(self): @@ -469,7 +459,6 @@ def get_key(*args): try: Se = self.work_mats[key] except KeyError: - sort_interior_dofs(self.ises[0], Ae) Se = self.work_mats.setdefault(key, condense_element_mat(Ae)) insert = PETSc.InsertMode.INSERT @@ -648,7 +637,7 @@ def assemble_reference_tensor(self, V, transpose=False): if is_facet and full_key in cache: result = cache[full_key] noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.getComm()) - result = result.createSubMatrix(noperm, self.ises[1]) + result = result.createSubMatrix(noperm, self.fises) noperm.destroy() return cache.setdefault(key, result) @@ -686,204 +675,232 @@ def assemble_reference_tensor(self, V, transpose=False): return cache.setdefault(key, result) -@PETSc.Log.EventDecorator("FDMGetSchur") -def schur_complement_diagonal(submats, result=None): - """ - Used in static condensation. Take in blocks A00, A01, A10, A11, - return the Schur complement A11 - A10 * inv(A00) * A01. - - Assumes A00 is diagonal. - """ - structure = PETSc.Mat.Structure.SUBSET if result else None - A00, A01, A10, A11 = submats[:4] - submats[4] = A00.getDiagonal(result=submats[4]) - submats[4].reciprocal() - submats[4].scale(-1) - A01.diagonalScale(L=submats[4]) - result = A10.matMult(A01, result=result) - result.axpy(1.0, A11, structure=structure) - return result - - -@PETSc.Log.EventDecorator("FDMGetSchur") -def schur_complement_block_inv(submats, result=None): - """ - Used in static condensation. Take in blocks A00, A01, A10, A11, - return A11 - A10 * inv(A00) * A01. - - Assumes that interior DOFs have been reordered to make A00 - block diagonal with blocks of increasing dimension. - """ - structure = PETSc.Mat.Structure.SUBSET if result else None - A00, A01, A10, A11 = submats[:4] - indptr, indices, R = A00.getValuesCSR() - degree, counts = numpy.unique(numpy.diff(indptr), return_counts=True) - istart = int(degree[0] == 1) - nblocks = counts[0] if istart else 0 - zlice = slice(0, nblocks) - numpy.reciprocal(R[zlice], out=R[zlice]) - flops = nblocks - for k, nblocks in zip(degree[istart:], counts[istart:]): - zlice = slice(zlice.stop, zlice.stop + k*nblocks) - A = R[zlice].reshape((-1, k, k)) - R[zlice] = numpy.linalg.inv(A).reshape((-1,)) - flops += nblocks * (k**3) - - PETSc.Log.logFlops(flops) - A00.setValuesCSR(indptr, indices, R) - A00.assemble() - A00.scale(-1.0) - result = A10.matMatMult(A00, A01, result=result) - result.axpy(1.0, A11, structure=structure) - return result - - -@PETSc.Log.EventDecorator("FDMGetSchur") -def schur_complement_block_cholesky(submats, result=None): - """ - Used in static condensation. Take in blocks A00, A01, A10, A11, - return A11 - A10 * inv(A00) * A01. - - Assumes that interior DOFs have been reordered to make A00 - block diagonal with blocks of increasing dimension. - """ - structure = PETSc.Mat.Structure.SUBSET if result else None - A00, A01, A10, A11 = submats[:4] - indptr, indices, R = A00.getValuesCSR() - degree, counts = numpy.unique(numpy.diff(indptr), return_counts=True) - istart = int(degree[0] == 1) - nblocks = counts[0] if istart else 0 - zlice = slice(0, nblocks) - numpy.sqrt(R[zlice], out=R[zlice]) - numpy.reciprocal(R[zlice], out=R[zlice]) - flops = 2*nblocks - for k, nblocks in zip(degree[istart:], counts[istart:]): - zlice = slice(zlice.stop, zlice.stop + k*nblocks) - A = R[zlice].reshape((-1, k, k)) - R[zlice] = numpy.linalg.inv(numpy.linalg.cholesky(A)).reshape((-1)) - flops += nblocks * ((k**3)//3 + k**3) - - PETSc.Log.logFlops(flops) - A00.setValuesCSR(indptr, indices, R) - A00.assemble() - submats[4] = A10.matTransposeMult(A00, result=submats[4]) - A00.scale(-1.0) - result = submats[4].matMatMult(A00, A01, result=result) - result.axpy(1.0, A11, structure=structure) - return result - - -@PETSc.Log.EventDecorator("FDMGetSchur") -def schur_complement_block_qr(submats, result=None): +class SchurComplementBuilder(object): """ - Used in static condensation. Take in blocks A00, A01, A10, A11, - return A11 - A10 * inv(A00) * A01. - - Assumes that interior DOFs have been reordered to make A00 - block diagonal with blocks of increasing dimension. - """ - structure = PETSc.Mat.Structure.SUBSET if result else None - A00, A01, A10, A11 = submats[:4] - indptr, indices, R = A00.getValuesCSR() - Q = numpy.ones(R.shape, dtype=R.dtype) - - degree, counts = numpy.unique(numpy.diff(indptr), return_counts=True) - istart = int(degree[0] == 1) - nblocks = counts[0] if istart else 0 - zlice = slice(0, nblocks) - numpy.reciprocal(R[zlice], out=R[zlice]) - flops = nblocks - for k, nblocks in zip(degree[istart:], counts[istart:]): - zlice = slice(zlice.stop, zlice.stop + k*nblocks) - A = R[zlice].reshape((-1, k, k)) - q, r = numpy.linalg.qr(A, mode="complete") - Q[zlice] = q.reshape((-1,)) - R[zlice] = numpy.linalg.inv(r).reshape((-1,)) - flops += nblocks * ((4*k**3)//3 + k**3) - - PETSc.Log.logFlops(flops) - A00.setValuesCSR(indptr, indices, Q) - A00.assemble() - submats[4] = A00.transposeMatMult(A01, result=submats[4]) - A00.setValuesCSR(indptr, indices, R) - A00.assemble() - A00.scale(-1.0) - result = A10.matMatMult(A00, submats[4], result=result) - result.axpy(1.0, A11, structure=structure) - return result - - -@PETSc.Log.EventDecorator("FDMGetSchur") -def schur_complement_block_svd(submats, result=None): + Class to build element Schur complement. """ - Used in static condensation. Take in blocks A00, A01, A10, A11, - return A11 - A10 * inv(A00) * A01. - Assumes that interior DOFs have been reordered to make A00 - block diagonal with blocks of increasing dimension. - """ - structure = PETSc.Mat.Structure.SUBSET if result else None - A00, A01, A10, A11 = submats[:4] - indptr, indices, U = A00.getValuesCSR() - V = numpy.ones(U.shape, dtype=U.dtype) - submats[4] = A00.getDiagonal(result=submats[4]) - D = submats[4] - - degree, counts = numpy.unique(numpy.diff(indptr), return_counts=True) - istart = int(degree[0] == 1) - nblocks = counts[0] if istart else 0 - bslice = slice(0, nblocks) - dslice = slice(0, nblocks) - numpy.sign(D.array_r[dslice], out=U[bslice]) - flops = nblocks - for k, nblocks in zip(degree[istart:], counts[istart:]): - bslice = slice(bslice.stop, bslice.stop + k*nblocks) - dslice = slice(dslice.stop, dslice.stop + nblocks) - A = U[bslice].reshape((-1, k, k)) - - u, s, v = numpy.linalg.svd(A, full_matrices=False) - D.array_w[dslice] = s.reshape((-1,)) - U[bslice] = numpy.transpose(u, axes=(0, 2, 1)).reshape((-1,)) - V[bslice] = numpy.transpose(v, axes=(0, 2, 1)).reshape((-1,)) - flops += nblocks * ((4*k**3)//3 + 4*k**3) - - PETSc.Log.logFlops(flops) - D.sqrtabs() - D.reciprocal() - A00.setValuesCSR(indptr, indices, V) - A00.assemble() - A00.diagonalScale(R=D) - submats[5] = A10.matMult(A00, result=submats[5]) - D.scale(-1.0) - A00.setValuesCSR(indptr, indices, U) - A00.assemble() - A00.diagonalScale(L=D) - result = submats[5].matMatMult(A00, A01, result=result) - result.axpy(1.0, A11, structure=structure) - return result + def __init__(self, idofs, fdofs, comm): + i0 = PETSc.IS().createGeneral(idofs, comm=comm) + i1 = PETSc.IS().createGeneral(fdofs, comm=comm) + self.ises = (i0, i1) + self.isrows = [i0, i0, i1, i1] + self.iscols = [i0, i1, i0, i1] + self.work = [None for _ in range(2)] + self.submats = [] + self.slices = {} + + def __del__(self): + self.ises[0].destroy() + self.ises[1].destroy() + for mat in self.submats: + if isinstance(mat, PETSc.Object): + mat.destroy() + for obj in self.work: + if isinstance(obj, PETSc.Object): + obj.destroy() + def sort_interior_dofs(self, idofs, A): + """Permute `idofs` to have A[idofs, idofs] with square blocks of + increasing dimension along its diagonal.""" + Aii = A.createSubMatrix(idofs, idofs) + indptr, indices, _ = Aii.getValuesCSR() + degree = numpy.diff(indptr) + + perm = list(numpy.flatnonzero(degree == 1)) + degree[perm] = 0 + + iend = len(perm) + if iend: + self.slices[1] = slice(0, iend) + + for k in sorted(numpy.unique(degree)): + if k > 1: + nblocks = 0 + for i in numpy.flatnonzero(degree == k): + if degree[i] == k: + block = indices[slice(*indptr[i:i+2])] + degree[block] = 0 + perm.extend(block) + nblocks += 1 + + istart = iend + iend += k * k * nblocks + self.slices[k] = slice(istart, iend) + + idofs.setIndices(idofs.getIndices()[perm]) + Aii.destroy() + + def get_blocks(self, A): + if len(self.submats) == 0: + self.sort_interior_dofs(self.ises[0], A) + self.submats = A.createSubMatrices(self.isrows, iscols=self.iscols, submats=self.submats or None) + return self.submats + + @PETSc.Log.EventDecorator("FDMCondense") + def condense(self, A, result=None): + return result + + +class SchurComplementDiagonal(SchurComplementBuilder): + + @PETSc.Log.EventDecorator("FDMCondense") + def condense(self, A, result=None): + structure = PETSc.Mat.Structure.SUBSET if result else None + A00, A01, A10, A11 = self.get_blocks(A) + self.work[0] = A00.getDiagonal(result=self.work[0]) + self.work[0].reciprocal() + self.work[0].scale(-1) + A01.diagonalScale(L=self.work[0]) + result = A10.matMult(A01, result=result) + result.axpy(1.0, A11, structure=structure) + return result + + +class SchurComplementPattern(SchurComplementBuilder): + + @PETSc.Log.EventDecorator("FDMCondense") + def condense(self, A, result=None): + structure = PETSc.Mat.Structure.SUBSET if result else None + if result is None: + A00, A01, A10, _ = self.get_blocks(A) + result = A10.matMatMult(A00, A01, result=result) + result.aypx(0.0, A, structure=structure) + return result + + +class SchurComplementBlockCholesky(SchurComplementBuilder): + + @PETSc.Log.EventDecorator("FDMCondense") + def condense(self, A, result=None): + structure = PETSc.Mat.Structure.SUBSET if result else None + A00, A01, A10, A11 = self.get_blocks(A) + indptr, indices, R = A00.getValuesCSR() + + flops = 0 + for k in sorted(self.slices): + zlice = self.slices[k] + if k == 1: + numpy.sqrt(R[zlice], out=R[zlice]) + numpy.reciprocal(R[zlice], out=R[zlice]) + flops += 2 * (zlice.stop - zlice.start) + else: + A = R[zlice].reshape((-1, k, k)) + R[zlice] = numpy.linalg.inv(numpy.linalg.cholesky(A)).reshape((-1)) + flops += A.shape[0] * ((k**3)//3 + k**3) + + PETSc.Log.logFlops(flops) + A00.setValuesCSR(indptr, indices, R) + A00.assemble() + self.work[0] = A10.matTransposeMult(A00, result=self.work[0]) + A00.scale(-1.0) + result = self.work[0].matMatMult(A00, A01, result=result) + result.axpy(1.0, A11, structure=structure) + return result + + +class SchurComplementBlockQR(SchurComplementBuilder): + + @PETSc.Log.EventDecorator("FDMGetSchur") + def condense(self, A, result=None): + structure = PETSc.Mat.Structure.SUBSET if result else None + A00, A01, A10, A11 = self.get_blocks(A) + indptr, indices, R = A00.getValuesCSR() + Q = numpy.ones(R.shape, dtype=R.dtype) + + flops = 0 + for k in sorted(self.slices): + zlice = self.slices[k] + if k == 1: + numpy.reciprocal(R[zlice], out=R[zlice]) + flops += zlice.stop - zlice.start + else: + A = R[zlice].reshape((-1, k, k)) + q, r = numpy.linalg.qr(A, mode="complete") + Q[zlice] = q.reshape((-1,)) + R[zlice] = numpy.linalg.inv(r).reshape((-1,)) + flops += A.shape[0] * ((4*k**3)//3 + k**3) + + PETSc.Log.logFlops(flops) + A00.setValuesCSR(indptr, indices, Q) + A00.assemble() + self.work[0] = A00.transposeMatMult(A01, result=self.work[0]) + A00.setValuesCSR(indptr, indices, R) + A00.assemble() + A00.scale(-1.0) + result = A10.matMatMult(A00, self.work[0], result=result) + result.axpy(1.0, A11, structure=structure) + return result + + +class SchurComplementBlockSVD(SchurComplementBuilder): + + @PETSc.Log.EventDecorator("FDMGetSchur") + def condense(self, A, result=None): + structure = PETSc.Mat.Structure.SUBSET if result else None + A00, A01, A10, A11 = self.get_blocks(A) + indptr, indices, U = A00.getValuesCSR() + V = numpy.ones(U.shape, dtype=U.dtype) + self.work[0] = A00.getDiagonal(result=self.work[0]) + D = self.work[0] + dslice = self.slices.get(1, slice(0, 0)) + flops = 0 + for k in sorted(self.slices): + bslice = self.slices[k] + if k == 1: + numpy.sign(D.array_r[bslice], out=U[bslice]) + flops += bslice.stop - bslice.start + else: + A = U[bslice].reshape((-1, k, k)) + u, s, v = numpy.linalg.svd(A, full_matrices=False) + dslice = slice(dslice.stop, dslice.stop + k * A.shape[0]) + D.array_w[dslice] = s.reshape((-1,)) + U[bslice] = numpy.transpose(u, axes=(0, 2, 1)).reshape((-1,)) + V[bslice] = numpy.transpose(v, axes=(0, 2, 1)).reshape((-1,)) + flops += A.shape[0] * ((4*k**3)//3 + 4*k**3) + + PETSc.Log.logFlops(flops) + D.sqrtabs() + D.reciprocal() + A00.setValuesCSR(indptr, indices, V) + A00.assemble() + A00.diagonalScale(R=D) + self.work[1] = A10.matMult(A00, result=self.work[1]) + D.scale(-1.0) + A00.setValuesCSR(indptr, indices, U) + A00.assemble() + A00.diagonalScale(L=D) + result = self.work[1].matMatMult(A00, A01, result=result) + result.axpy(1.0, A11, structure=structure) + return result + + +class SchurComplementBlockInverse(SchurComplementBuilder): + + @PETSc.Log.EventDecorator("FDMGetSchur") + def condense(self, A, result=None): + structure = PETSc.Mat.Structure.SUBSET if result else None + A00, A01, A10, A11 = self.get_blocks(A) + indptr, indices, R = A00.getValuesCSR() + + flops = 0 + for k in sorted(self.slices): + zlice = self.slices[k] + if k == 1: + numpy.reciprocal(R[zlice], out=R[zlice]) + flops += zlice.stop - zlice.start + else: + A = R[zlice].reshape((-1, k, k)) + R[zlice] = numpy.linalg.inv(A).reshape((-1,)) + flops += A.shape[0] * (k**3) -@PETSc.Log.EventDecorator("FDMCondense") -def condense_element_mat(get_schur_complement, i0, i1, submats, A, result=None): - """Return the Schur complement associated to indices in i1, condensing i0 out""" - isrows = [i0, i0, i1, i1] - iscols = [i0, i1, i0, i1] - submats[:4] = A.createSubMatrices(isrows, iscols=iscols, submats=submats[:4] if submats[0] else None) - return get_schur_complement(submats, result=result) - - -@PETSc.Log.EventDecorator("FDMCondense") -def condense_element_pattern(i0, i1, submats, A, result=None): - """Add zeroes on the statically condensed pattern so that you can run ICC(0)""" - structure = PETSc.Mat.Structure.SUBSET if result else None - isrows = [i0, i0, i1] - iscols = [i0, i1, i0] - submats[:3] = A.createSubMatrices(isrows, iscols=iscols, submats=submats[:3] if submats[0] else None) - A00, A01, A10 = submats[:3] - A00.scale(0.0) - result = A10.matMatMult(A00, A01, result=result) - result.axpy(1.0, A, structure=structure) - return result + PETSc.Log.logFlops(flops) + A00.setValuesCSR(indptr, indices, R) + A00.assemble() + A00.scale(-1.0) + result = A10.matMatMult(A00, A01, result=result) + result.axpy(1.0, A11, structure=structure) + return result @PETSc.Log.EventDecorator("LoadCode") @@ -978,28 +995,6 @@ def is_restricted(finat_element): return is_interior, is_facet -def sort_interior_dofs(idofs, A): - """Permute `idofs` to have A[idofs, idofs] with square blocks of - increasing dimension along its diagonal.""" - Aii = A.createSubMatrix(idofs, idofs) - indptr, indices, _ = Aii.getValuesCSR() - degree = numpy.diff(indptr) - perm = [] - for k in sorted(numpy.unique(degree)): - if k == 1: - neigh = numpy.flatnonzero(degree == k) - degree[neigh] = 0 - perm.extend(neigh) - else: - for i in range(len(degree)): - if degree[i] == k: - neigh = indices[slice(*indptr[i:i+2])] - degree[neigh] = 0 - perm.extend(neigh) - idofs.setIndices(idofs.getIndices()[perm]) - Aii.destroy() - - def petsc_sparse(A_numpy, rtol=1E-10, comm=None): """Convert dense numpy matrix into a sparse PETSc matrix""" atol = rtol * max(A_numpy.min(), A_numpy.max(), key=abs) diff --git a/tests/regression/test_fdm.py b/tests/regression/test_fdm.py index 42b4319eab..3ad7db838e 100644 --- a/tests/regression/test_fdm.py +++ b/tests/regression/test_fdm.py @@ -294,15 +294,14 @@ def test_ipdg_direct_solver(fs): num_flux = lambda u: avg(penalty) * avg(outer(u, n)) num_flux_b = lambda u: (penalty/2) * outer(u, n) - alpha_inner = lambda v, u: inner(v, alpha(u)) + a_int = lambda v, u: inner(2 * avg(outer(v, n)), alpha(num_flux(u) - avg(grad(u)))) + a_Dir = lambda v, u: inner(outer(v, n), alpha(num_flux_b(u) - grad(u))) - a_int = lambda v, u: alpha_inner(2 * avg(outer(v, n)), num_flux(u) - avg(grad(u))) * dS_int - a_Dir = lambda v, u: alpha_inner(outer(v, n), num_flux_b(u) - grad(u)) * ds_Dir - - u = TrialFunction(fs) v = TestFunction(fs) - a = ((inner(v, dot(beta, u)) + alpha_inner(grad(v), grad(u))) * dxq - + a_int(v, u) + a_int(u, v) + a_Dir(v, u) + a_Dir(u, v)) + u = TrialFunction(fs) + a = ((inner(v, dot(beta, u)) + inner(grad(v), alpha(grad(u)))) * dxq + + (a_int(v, u) + a_int(u, v)) * dS_int + + (a_Dir(v, u) + a_Dir(u, v)) * ds_Dir) if homogenize: L = 0 @@ -311,7 +310,7 @@ def test_ipdg_direct_solver(fs): B = dot(beta, u_exact) - div(f_exact) T = dot(f_exact, n) L = (inner(v, B)*dxq + inner(v, T)*ds_Neu - + alpha_inner(outer(u_exact, n), 2*num_flux_b(v) - grad(v)) * ds_Dir) + + inner(outer(u_exact, n), alpha(2*num_flux_b(v) - grad(v))) * ds_Dir) problem = LinearVariationalProblem(a, L, uh, bcs=bcs) solver = LinearVariationalSolver(problem, solver_parameters={ From 2ad64a5c75bf91a519f41f7b8eb2d0edade3394d Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 3 Apr 2023 14:37:46 +0100 Subject: [PATCH 59/75] small change --- firedrake/preconditioners/fdm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 5b4d4e8c61..549250eb5d 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -669,7 +669,7 @@ def assemble_reference_tensor(self, V, transpose=False): if is_facet: cache[full_key] = result noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.getComm()) - result = result.createSubMatrix(noperm, self.ises[1]) + result = result.createSubMatrix(noperm, self.fises) noperm.destroy() return cache.setdefault(key, result) From 6fce996257a5942a2621521fdb17c7828d4517e2 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 4 Apr 2023 18:14:27 +0100 Subject: [PATCH 60/75] style --- firedrake/preconditioners/fdm.py | 210 ++++++++++++++++--------------- 1 file changed, 106 insertions(+), 104 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 549250eb5d..646feb7dbb 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -246,10 +246,10 @@ def allocate_matrix(self, V, J, bcs, fcp, pmat_type, use_static_condensation): sc_builder = SchurComplementBlockCholesky else: sc_builder = SchurComplementBlockQR - self.get_static_condensation[Vfacet] = Vbig, sc_builder(idofs, fdofs, PETSc.COMM_SELF).condense + self.get_static_condensation[Vfacet] = Vbig, sc_builder(idofs, fdofs).condense elif len(fdofs) and V.finat_element.formdegree == 0: # If we are in H(grad), we just pad with zeros on the statically-condensed pattern - self.get_static_condensation[V] = Vbig, SchurComplementPattern(idofs, dofs, PETSc.COMM_SELF).condense + self.get_static_condensation[V] = Vbig, SchurComplementPattern(idofs, dofs).condense @PETSc.Log.EventDecorator("FDMGetIndices") def cell_to_global(lgmap, cell_to_local, cell_index, result=None): @@ -432,15 +432,8 @@ def get_key(*args): Vbig = None condense_element_mat = lambda Ae, result=None: Ae - set_submat = self.setSubMatCSR(PETSc.COMM_SELF, triu=triu) - get_rindices = self.cell_to_global[Vrow] if Vrow == Vcol: - get_cindices = lambda e, result=None: result - update_A = lambda Ae, rindices, cindices: set_submat(A, Ae, rindices, rindices, addv) Vbig, condense_element_mat = self.get_static_condensation.get(Vrow, (Vbig, condense_element_mat)) - else: - get_cindices = self.cell_to_global[Vcol] - update_A = lambda Ae, rindices, cindices: set_submat(A, Ae, rindices, cindices, addv) Me = self._element_mass_matrix # Interpolation of basis and exterior derivative onto broken spaces @@ -461,6 +454,16 @@ def get_key(*args): except KeyError: Se = self.work_mats.setdefault(key, condense_element_mat(Ae)) + get_rindices = self.cell_to_global[Vrow] + rindices = numpy.empty(Se.getSize()[:1], dtype=PETSc.IntType) + if Vrow == Vcol: + get_cindices = lambda e, result=None: result + cindices = rindices + else: + get_cindices = self.cell_to_global[Vcol] + cindices = numpy.empty(Se.getSize()[1:], dtype=PETSc.IntType) + + setSubMatCSR = self.setSubMatCSR(PETSc.COMM_SELF, triu=triu) insert = PETSc.InsertMode.INSERT if A.getType() == PETSc.Mat.Type.PREALLOCATOR: # Empty kernel for preallocation @@ -487,15 +490,13 @@ def element_kernel(e, result=None): Me.assemble() return assemble_element_mat(result=result) - cindices = None - rindices = None # Core assembly loop for e in range(self.nel): - cindices = get_cindices(e, result=cindices) - rindices = get_rindices(e, result=rindices) + get_rindices(e, result=rindices) + get_cindices(e, result=cindices) Ae = element_kernel(e, result=Ae) Se = condense_element_mat(Ae, result=Se) - update_A(Se, rindices, cindices) + setSubMatCSR(A, Se, rindices, cindices, addv) @PETSc.Log.EventDecorator("FDMCoefficients") def assemble_coefficients(self, J, fcp, block_diagonal=True): @@ -677,63 +678,71 @@ def assemble_reference_tensor(self, V, transpose=False): class SchurComplementBuilder(object): """ - Class to build element Schur complement. + Class to build Schur complement matrices that reuses work matrices and the + symbolic factorization of the interior block. """ - def __init__(self, idofs, fdofs, comm): - i0 = PETSc.IS().createGeneral(idofs, comm=comm) - i1 = PETSc.IS().createGeneral(fdofs, comm=comm) - self.ises = (i0, i1) - self.isrows = [i0, i0, i1, i1] - self.iscols = [i0, i1, i0, i1] - self.work = [None for _ in range(2)] - self.submats = [] + def __init__(self, idofs, fdofs): + self.idofs = idofs + self.fdofs = fdofs self.slices = {} + self.ises = tuple() + self.isrows = [] + self.iscols = [] + self.submats = [] + self.work = [None for _ in range(2)] def __del__(self): - self.ises[0].destroy() - self.ises[1].destroy() - for mat in self.submats: - if isinstance(mat, PETSc.Object): - mat.destroy() + self.reset() + + def reset(self): + for obj in self.ises: + if isinstance(obj, PETSc.Object): + obj.destroy() + for obj in self.submats: + if isinstance(obj, PETSc.Object): + obj.destroy() for obj in self.work: if isinstance(obj, PETSc.Object): obj.destroy() + self.submats = [] + self.work = [None for _ in range(2)] - def sort_interior_dofs(self, idofs, A): - """Permute `idofs` to have A[idofs, idofs] with square blocks of - increasing dimension along its diagonal.""" - Aii = A.createSubMatrix(idofs, idofs) - indptr, indices, _ = Aii.getValuesCSR() + def sort_interior_dofs(self, i0, A): + """Permute `i0` to have A[i0, i0] with square blocks of + increasing dimension along its diagonal. Add slices with the extents + of each set of blocks in the CSR representation of A.""" + A00 = A.createSubMatrix(i0, i0) + indptr, indices, _ = A00.getValuesCSR() degree = numpy.diff(indptr) - - perm = list(numpy.flatnonzero(degree == 1)) - degree[perm] = 0 - - iend = len(perm) - if iend: - self.slices[1] = slice(0, iend) - - for k in sorted(numpy.unique(degree)): + perm = numpy.argsort(degree) + icur = 0 + istart = 0 + self.slices[1] = slice(0, 0) + unique_degree, counts = numpy.unique(degree, return_counts=True) + for k, kdofs in sorted(zip(unique_degree, counts)): if k > 1: - nblocks = 0 - for i in numpy.flatnonzero(degree == k): - if degree[i] == k: - block = indices[slice(*indptr[i:i+2])] - degree[block] = 0 - perm.extend(block) - nblocks += 1 - - istart = iend - iend += k * k * nblocks - self.slices[k] = slice(istart, iend) - - idofs.setIndices(idofs.getIndices()[perm]) - Aii.destroy() + neigh = numpy.empty((kdofs, k), dtype=indices.dtype) + for row in range(kdofs): + i = perm[icur+row] + neigh[row] = indices[slice(*indptr[i:i+2])] + perm[icur:icur+kdofs] = list(dict.fromkeys(neigh.flat)) + + self.slices[k] = slice(istart, istart + k * kdofs) + istart += k * kdofs + icur += kdofs + i0.setIndices(i0.getIndices()[perm]) + A00.destroy() def get_blocks(self, A): if len(self.submats) == 0: - self.sort_interior_dofs(self.ises[0], A) + comm = A.getComm() + i0 = PETSc.IS().createGeneral(self.idofs, comm=comm) + i1 = PETSc.IS().createGeneral(self.fdofs, comm=comm) + self.sort_interior_dofs(i0, A) + self.isrows = [i0, i0, i1, i1] + self.iscols = [i0, i1, i0, i1] + self.ises = (i0, i1) self.submats = A.createSubMatrices(self.isrows, iscols=self.iscols, submats=self.submats or None) return self.submats @@ -777,17 +786,15 @@ def condense(self, A, result=None): A00, A01, A10, A11 = self.get_blocks(A) indptr, indices, R = A00.getValuesCSR() - flops = 0 - for k in sorted(self.slices): + zlice = self.slices[1] + numpy.sqrt(R[zlice], out=R[zlice]) + numpy.reciprocal(R[zlice], out=R[zlice]) + flops = 2 * (zlice.stop - zlice.start) + for k in sorted(degree for degree in self.slices if degree > 1): zlice = self.slices[k] - if k == 1: - numpy.sqrt(R[zlice], out=R[zlice]) - numpy.reciprocal(R[zlice], out=R[zlice]) - flops += 2 * (zlice.stop - zlice.start) - else: - A = R[zlice].reshape((-1, k, k)) - R[zlice] = numpy.linalg.inv(numpy.linalg.cholesky(A)).reshape((-1)) - flops += A.shape[0] * ((k**3)//3 + k**3) + A = R[zlice].reshape((-1, k, k)) + R[zlice] = numpy.linalg.inv(numpy.linalg.cholesky(A)).reshape((-1)) + flops += A.shape[0] * ((k**3)//3 + k**3) PETSc.Log.logFlops(flops) A00.setValuesCSR(indptr, indices, R) @@ -808,18 +815,16 @@ def condense(self, A, result=None): indptr, indices, R = A00.getValuesCSR() Q = numpy.ones(R.shape, dtype=R.dtype) - flops = 0 - for k in sorted(self.slices): + zlice = self.slices[1] + numpy.reciprocal(R[zlice], out=R[zlice]) + flops = zlice.stop - zlice.start + for k in sorted(degree for degree in self.slices if degree > 1): zlice = self.slices[k] - if k == 1: - numpy.reciprocal(R[zlice], out=R[zlice]) - flops += zlice.stop - zlice.start - else: - A = R[zlice].reshape((-1, k, k)) - q, r = numpy.linalg.qr(A, mode="complete") - Q[zlice] = q.reshape((-1,)) - R[zlice] = numpy.linalg.inv(r).reshape((-1,)) - flops += A.shape[0] * ((4*k**3)//3 + k**3) + A = R[zlice].reshape((-1, k, k)) + q, r = numpy.linalg.qr(A, mode="complete") + Q[zlice] = q.reshape((-1,)) + R[zlice] = numpy.linalg.inv(r).reshape((-1,)) + flops += A.shape[0] * ((4*k**3)//3 + k**3) PETSc.Log.logFlops(flops) A00.setValuesCSR(indptr, indices, Q) @@ -843,21 +848,18 @@ def condense(self, A, result=None): V = numpy.ones(U.shape, dtype=U.dtype) self.work[0] = A00.getDiagonal(result=self.work[0]) D = self.work[0] - dslice = self.slices.get(1, slice(0, 0)) - flops = 0 - for k in sorted(self.slices): + dslice = self.slices[1] + numpy.sign(D.array_r[dslice], out=U[dslice]) + flops = dslice.stop - dslice.start + for k in sorted(degree for degree in self.slices if degree > 1): bslice = self.slices[k] - if k == 1: - numpy.sign(D.array_r[bslice], out=U[bslice]) - flops += bslice.stop - bslice.start - else: - A = U[bslice].reshape((-1, k, k)) - u, s, v = numpy.linalg.svd(A, full_matrices=False) - dslice = slice(dslice.stop, dslice.stop + k * A.shape[0]) - D.array_w[dslice] = s.reshape((-1,)) - U[bslice] = numpy.transpose(u, axes=(0, 2, 1)).reshape((-1,)) - V[bslice] = numpy.transpose(v, axes=(0, 2, 1)).reshape((-1,)) - flops += A.shape[0] * ((4*k**3)//3 + 4*k**3) + A = U[bslice].reshape((-1, k, k)) + u, s, v = numpy.linalg.svd(A, full_matrices=False) + dslice = slice(dslice.stop, dslice.stop + k * A.shape[0]) + D.array_w[dslice] = s.reshape((-1,)) + U[bslice] = numpy.transpose(u, axes=(0, 2, 1)).reshape((-1,)) + V[bslice] = numpy.transpose(v, axes=(0, 2, 1)).reshape((-1,)) + flops += A.shape[0] * ((4*k**3)//3 + 4*k**3) PETSc.Log.logFlops(flops) D.sqrtabs() @@ -883,16 +885,14 @@ def condense(self, A, result=None): A00, A01, A10, A11 = self.get_blocks(A) indptr, indices, R = A00.getValuesCSR() - flops = 0 - for k in sorted(self.slices): + zlice = self.slices[1] + numpy.reciprocal(R[zlice], out=R[zlice]) + flops = zlice.stop - zlice.start + for k in sorted(degree for degree in self.slices if degree > 1): zlice = self.slices[k] - if k == 1: - numpy.reciprocal(R[zlice], out=R[zlice]) - flops += zlice.stop - zlice.start - else: - A = R[zlice].reshape((-1, k, k)) - R[zlice] = numpy.linalg.inv(A).reshape((-1,)) - flops += A.shape[0] * (k**3) + A = R[zlice].reshape((-1, k, k)) + R[zlice] = numpy.linalg.inv(A).reshape((-1,)) + flops += A.shape[0] * (k**3) PETSc.Log.logFlops(flops) A00.setValuesCSR(indptr, indices, R) @@ -1001,9 +1001,11 @@ def petsc_sparse(A_numpy, rtol=1E-10, comm=None): sparsity = abs(A_numpy) > atol nnz = numpy.count_nonzero(sparsity, axis=1).astype(PETSc.IntType) A = PETSc.Mat().createAIJ(A_numpy.shape, nnz=(nnz, 0), comm=comm) - for row, (Arow, Srow) in enumerate(zip(A_numpy, sparsity)): - cols = numpy.argwhere(Srow).astype(PETSc.IntType).flat - A.setValues(row, cols, Arow[cols], PETSc.InsertMode.INSERT) + rows, cols = numpy.nonzero(sparsity) + rows = rows.astype(PETSc.IntType) + cols = cols.astype(PETSc.IntType) + vals = A_numpy[sparsity] + A.setValuesRCV(rows[:, None], cols[:, None], vals[:, None], PETSc.InsertMode.INSERT) A.assemble() return A From e5dfbc80de87191088f811d4281417e0c172d0e7 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 5 Apr 2023 16:06:35 +0100 Subject: [PATCH 61/75] cleanup --- firedrake/preconditioners/fdm.py | 57 +++++---- firedrake/preconditioners/pmg.py | 199 +++++++++++++------------------ 2 files changed, 111 insertions(+), 145 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 646feb7dbb..f861132fd1 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -258,9 +258,10 @@ def cell_to_global(lgmap, cell_to_local, cell_index, result=None): return lgmap.apply(result, result=result) # Create data structures needed for assembly + bc_rows = {} + bc_vals = {} self.cell_to_global = {} self.lgmaps = {} - bc_rows = {} for Vsub in V: lgmap = Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) bsize = Vsub.dof_dset.layout_vec.getBlockSize() @@ -269,8 +270,9 @@ def cell_to_global(lgmap, cell_to_local, cell_index, result=None): self.lgmaps[Vsub] = lgmap own = Vsub.dof_dset.layout_vec.getLocalSize() - bdofs = numpy.flatnonzero(lgmap.indices[:own] < 0).astype(PETSc.IntType) + bdofs = numpy.flatnonzero(lgmap.indices[:own] < 0).astype(PETSc.IntType)[:, None] bc_rows[Vsub] = Vsub.dof_dset.lgmap.apply(bdofs, result=bdofs) + bc_vals[Vsub] = numpy.ones(bdofs.shape, dtype=PETSc.RealType) self.nel = nel coefficients, assembly_callables = self.assemble_coefficients(J, fcp) @@ -342,11 +344,11 @@ def assemble_P(): P = Pmats[Vrow, Vcol] if P.getType().endswith("aij"): P.zeroEntries() - if Vrow == Vcol and len(bc_rows[Vrow]) > 0: - rows = bc_rows[Vrow][:, None] - vals = numpy.ones(rows.shape, dtype=PETSc.RealType) - P.setValuesRCV(rows, rows, vals, addv) self.set_values(P, Vrow, Vcol, addv) + for Vrow in Vsort: + rows = bc_rows[Vrow] + if len(rows) > 0: + Pmats[Vrow, Vrow].setValuesRCV(rows, rows, bc_vals[Vrow], addv) Pmat.assemble() return Pmat, assemble_P @@ -1328,8 +1330,8 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): # assemble zero-th order term separately, including off-diagonals (mixed components) # I cannot do this for hdiv elements as off-diagonals are not sparse, this is because # the FDM eigenbases for CG(k) and CG(k-1) are not orthogonal to each other - rindices = None use_diag_Bq = Bq is None or len(Bq.ufl_shape) != 2 or static_condensation + rindices = None if not use_diag_Bq: bshape = Bq.ufl_shape # Be = Bhat kron ... kron Bhat @@ -1351,60 +1353,57 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): Bq = None # assemble the second order term and the zero-th order term if any, - # discarding mixed derivatives and mixed componentsget_weak_bc_flags(J) - mue = numpy.zeros((ncomp, tdim), dtype=PETSc.RealType) - bqe = numpy.zeros((ncomp,), dtype=PETSc.RealType) - + # discarding mixed derivatives and mixed components + ae = numpy.zeros((ncomp, tdim), dtype=PETSc.RealType) + be = numpy.zeros((ncomp,), dtype=PETSc.RealType) + je = None for e in range(self.nel): - je = index_coef(e) + je = index_coef(e, result=je) bce = bcflags.dat.data_ro_with_halos[index_bc(e)] > 1E-8 + # get coefficients on this cell + if Gq is not None: + numpy.sum(Gq.dat.data_ro[je], axis=0, out=ae) + if Bq is not None: + numpy.sum(Bq.dat.data_ro[je], axis=0, out=be) rindices = get_rindices(e, result=rindices) rows = numpy.reshape(rindices, (-1, bsize)) rows = numpy.transpose(rows) rows = numpy.reshape(rows, (ncomp, -1)) - - # get second order coefficient on this cell - if Gq is not None: - numpy.sum(Gq.dat.data_ro[je], axis=0, out=mue) - # get zero-th order coefficient on this cell - if Bq is not None: - numpy.sum(Bq.dat.data_ro[je], axis=0, out=bqe) - + # for each component: compute the stiffness matrix Ae for k in range(ncomp): # permutation of axes with respect to the first vector component axes = numpy.roll(numpy.arange(tdim), -shift[k]) - # for each component: compute the stiffness matrix Ae bck = bce[:, k] if len(bce.shape) == 2 else bce fbc = numpy.dot(bck, flag2id) if Gq is not None: - # Ae = mue[k][0] Ahat + bqe[k] Bhat + # Ae = ae[k][0] Ahat + be[k] Bhat Be = Afdm[axes[0]][0].copy() Ae = Afdm[axes[0]][1+fbc[0]].copy() - Ae.scale(mue[k][0]) + Ae.scale(ae[k][0]) if Bq is not None: - Ae.axpy(bqe[k], Be) + Ae.axpy(be[k], Be) if tdim > 1: - # Ae = Ae kron Bhat + mue[k][1] Bhat kron Ahat + # Ae = Ae kron Bhat + ae[k][1] Bhat kron Ahat Ae = Ae.kron(Afdm[axes[1]][0]) if Gq is not None: - Ae.axpy(mue[k][1], Be.kron(Afdm[axes[1]][1+fbc[1]])) + Ae.axpy(ae[k][1], Be.kron(Afdm[axes[1]][1+fbc[1]])) if tdim > 2: - # Ae = Ae kron Bhat + mue[k][2] Bhat kron Bhat kron Ahat + # Ae = Ae kron Bhat + ae[k][2] Bhat kron Bhat kron Ahat Be = Be.kron(Afdm[axes[1]][0]) Ae = Ae.kron(Afdm[axes[2]][0]) if Gq is not None: - Ae.axpy(mue[k][2], Be.kron(Afdm[axes[2]][1+fbc[2]])) + Ae.axpy(ae[k][2], Be.kron(Afdm[axes[2]][1+fbc[2]])) Be.destroy() elif Bq is not None: Ae = Afdm[axes[0]][0] for m in range(1, tdim): Ae = Ae.kron(Afdm[axes[m]][0]) - Ae.scale(bqe[k]) + Ae.scale(be[k]) Ae = condense_element_mat(Ae) update_A(A, Ae, rows[k].astype(PETSc.IntType)) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 0d3544f26c..8719c76ece 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -1,16 +1,16 @@ from functools import partial, lru_cache from itertools import chain -from firedrake.petsc import PETSc -from firedrake.preconditioners.base import PCBase, SNESBase, PCSNESBase from firedrake.dmhooks import (attach_hooks, get_appctx, push_appctx, pop_appctx, add_hook, get_parent, push_parent, pop_parent, get_function_space, set_function_space) -from firedrake.solving_utils import _SNESContext +from firedrake.petsc import PETSc +from firedrake.preconditioners.base import PCBase, SNESBase, PCSNESBase from firedrake.nullspace import VectorSpaceBasis, MixedVectorSpaceBasis +from firedrake.solving_utils import _SNESContext from firedrake.tsfc_interface import extract_numbered_coefficients from firedrake.utils import ScalarType_c, IntType_c, cached_property -from tsfc import compile_expression_dual_evaluation from tsfc.finatinterface import create_element +from tsfc import compile_expression_dual_evaluation from pyop2 import op2 import firedrake @@ -36,7 +36,8 @@ class PMGBase(PCSNESBase): Other PETSc options inspected by this class are: - 'pmg_mg_coarse_degree': polynomial degree of the coarse level - - 'pmg_mg_coarse_mat_type': can be either 'aij' or 'matfree' + - 'pmg_mg_coarse_mat_type': can be either a `PETSc.Mat.Type`, or 'matfree' + - 'pmg_mg_coarse_pmat_type': can be either a `PETSc.Mat.Type`, or 'matfree' - 'pmg_mg_coarse_form_compiler_mode': can be 'spectral' (default), 'vanilla', 'coffee', or 'tensor' - 'pmg_mg_levels_transfer_mat_type': can be either 'aij' or 'matfree' @@ -90,6 +91,8 @@ def initialize(self, obj): raise ValueError("No context found.") if not isinstance(ctx, _SNESContext): raise ValueError("Don't know how to get form from %r" % ctx) + fcp = ctx._problem.form_compiler_parameters + mode = fcp.get("mode", "spectral") if fcp is not None else "spectral" test, trial = ctx.J.arguments() if test.function_space() != trial.function_space(): @@ -103,11 +106,8 @@ def initialize(self, obj): ppc = self.configure_pmg(obj, pdm) self.is_snes = isinstance(obj, PETSc.SNES) - copts = PETSc.Options(ppc.getOptionsPrefix() + ppc.getType() + "_coarse_") - # Get the coarse degree from PETSc options - fcp = ctx._problem.form_compiler_parameters - mode = fcp.get("mode", "spectral") if fcp is not None else "spectral" + copts = PETSc.Options(ppc.getOptionsPrefix() + ppc.getType() + "_coarse_") self.coarse_degree = copts.getInt("degree", default=1) self.coarse_mat_type = copts.getString("mat_type", default=ctx.mat_type) self.coarse_pmat_type = copts.getString("pmat_type", default=self.coarse_mat_type) @@ -234,11 +234,7 @@ def _coarsen_form(a): except ValueError: mat_type = self.coarse_mat_type pmat_type = self.coarse_pmat_type - if fcp is None: - fcp = dict() - elif fcp is fproblem.form_compiler_parameters: - fcp = dict(fcp) - fcp["mode"] = self.coarse_form_compiler_mode + fcp = dict(fcp or {}, mode=self.coarse_form_compiler_mode) # Coarsen the problem and the _SNESContext cproblem = firedrake.NonlinearVariationalProblem(cF, cu, bcs=cbcs, J=cJ, Jp=cJp, @@ -276,63 +272,17 @@ def inject_state(): add_hook(parent, setup=inject_state, call_setup=True) - # Coarsen the nullspace basis - def coarsen_nullspace(coarse_V, interpolate, fine_nullspace): - if isinstance(fine_nullspace, MixedVectorSpaceBasis): - if interpolate.getType() == "python": - interpolate = interpolate.getPythonContext() - submats = [interpolate.getNestSubMatrix(i, i) for i in range(len(coarse_V))] - coarse_bases = [] - for fs, submat, basis in zip(coarse_V, submats, fine_nullspace._bases): - if isinstance(basis, VectorSpaceBasis): - coarse_bases.append(coarsen_nullspace(fs, submat, basis)) - else: - coarse_bases.append(coarse_V.sub(basis.index)) - return MixedVectorSpaceBasis(coarse_V, coarse_bases) - elif isinstance(fine_nullspace, VectorSpaceBasis): - coarse_vecs = [] - for xf in fine_nullspace._petsc_vecs: - wc = firedrake.Function(coarse_V) - with wc.dat.vec_wo as xc: - interpolate.multTranspose(xf, xc) - coarse_vecs.append(wc) - vsb = VectorSpaceBasis(coarse_vecs, constant=fine_nullspace._constant) - vsb.orthonormalize() - return vsb - else: - return fine_nullspace - interpolate = None if fctx._nullspace or fctx._nullspace_T or fctx._near_nullspace: interpolate, _ = cdm.createInterpolation(fdm) - cctx._nullspace = coarsen_nullspace(cV, interpolate, fctx._nullspace) - if fctx._nullspace_T is fctx._nullspace: - cctx._nullspace_T = cctx._nullspace - else: - cctx._nullspace_T = coarsen_nullspace(cV, interpolate, fctx._nullspace_T) - if fctx._near_nullspace is fctx._nullspace: - cctx._near_nullspace = cctx._nullspace - elif fctx._near_nullspace is fctx._nullspace_T: - cctx._near_nullspace = cctx._nullspace_T - else: - cctx._near_nullspace = coarsen_nullspace(cV, interpolate, fctx._near_nullspace) - + cctx._nullspace = self.coarsen_nullspace(cV, interpolate, fctx._nullspace) + cctx._nullspace_T = self.coarsen_nullspace(cV, interpolate, fctx._nullspace_T) + cctx._near_nullspace = self.coarsen_nullspace(cV, interpolate, fctx._near_nullspace) cctx.set_nullspace(cctx._nullspace, cV._ises, transpose=False, near=False) cctx.set_nullspace(cctx._nullspace_T, cV._ises, transpose=True, near=False) cctx.set_nullspace(cctx._near_nullspace, cV._ises, transpose=False, near=True) return cdm - def coarsen_quadrature(self, metadata, fdeg, cdeg): - if isinstance(metadata, dict): - # Coarsen the quadrature degree in a dictionary - # preserving the ratio of quadrature nodes to interpolation nodes (qdeg+1)//(fdeg+1) - qdeg = metadata.get("quadrature_degree", None) - if qdeg is not None: - cmd = dict(metadata) - cmd["quadrature_degree"] = max(2*cdeg+1, ((qdeg+1)*(cdeg+1)+fdeg)//(fdeg+1)-1) - return cmd - return metadata - def coarsen_bcs(self, fbcs, cV): cbcs = [] for bc in fbcs: @@ -346,13 +296,55 @@ def coarsen_bcs(self, fbcs, cV): raise NotImplementedError("Unsupported BC type, please get in touch if you need this") return cbcs + def coarsen_quadrature(self, metadata, fdeg, cdeg): + """Coarsen the quadrature degree in a dictionary preserving the ratio of + quadrature nodes to interpolation nodes (qdeg+1)//(fdeg+1).""" + try: + qdeg = metadata["quadrature_degree"] + coarse_qdeg = max(2*cdeg+1, ((qdeg+1)*(cdeg+1)+fdeg)//(fdeg+1)-1) + return dict(metadata, quadrature_degree=coarse_qdeg) + except (KeyError, TypeError): + return metadata + + def coarsen_nullspace(self, coarse_V, interpolate, fine_nullspace): + """Coarsen a nullspace or retrieve it from class cache""" + cache = self._cache.setdefault("nullspace", {}) + key = (coarse_V.ufl_element(), fine_nullspace) + try: + return cache[key] + except KeyError: + if isinstance(fine_nullspace, MixedVectorSpaceBasis): + if interpolate.getType() == "python": + interpolate = interpolate.getPythonContext() + submats = [interpolate.getNestSubMatrix(i, i) for i in range(len(coarse_V))] + coarse_bases = [] + for fs, submat, basis in zip(coarse_V, submats, fine_nullspace._bases): + if isinstance(basis, VectorSpaceBasis): + coarse_bases.append(self.coarsen_nullspace(fs, submat, basis)) + else: + coarse_bases.append(coarse_V.sub(basis.index)) + coarse_nullspace = MixedVectorSpaceBasis(coarse_V, coarse_bases) + elif isinstance(fine_nullspace, VectorSpaceBasis): + coarse_vecs = [] + for xf in fine_nullspace._petsc_vecs: + wc = firedrake.Function(coarse_V) + with wc.dat.vec_wo as xc: + # the nullspace basis is in the dual of V + interpolate.multTranspose(xf, xc) + coarse_vecs.append(wc) + coarse_nullspace = VectorSpaceBasis(coarse_vecs, constant=fine_nullspace._constant) + coarse_nullspace.orthonormalize() + else: + return fine_nullspace + return cache.setdefault(key, coarse_nullspace) + def create_transfer(self, mat_type, cctx, fctx, cbcs, fbcs): - # Create a transfer or retrieve it from the class cache + """Create a transfer or retrieve it from class cache""" cV = cctx.J.arguments()[0].function_space() fV = fctx.J.arguments()[0].function_space() cbcs = tuple(cctx._problem.bcs) if cbcs else tuple() fbcs = tuple(fctx._problem.bcs) if fbcs else tuple() - key = (mat_type, cV, fV, cbcs, fbcs) + key = (mat_type, fV.mesh(), cV.ufl_element(), fV.ufl_element(), cbcs, fbcs) cache = self._cache.setdefault("transfer", {}) try: return cache[key] @@ -379,9 +371,7 @@ def create_injection(self, dmc, dmf): @staticmethod def max_degree(ele): - """ - Return the maximum degree of a :class:`ufl.FiniteElement` - """ + """Return the maximum degree of a :class:`ufl.FiniteElement`""" if isinstance(ele, (ufl.VectorElement, ufl.TensorElement)): return PMGBase.max_degree(ele._sub_element) elif isinstance(ele, (ufl.MixedElement, ufl.TensorProductElement)): @@ -537,7 +527,8 @@ def prolongation_transfer_kernel_action(Vf, expr): def expand_element(ele): - # Expand a FiniteElement as an EnrichedElement of TensorProductElements, discarding modifiers. + """Expand a FiniteElement as an EnrichedElement of TensorProductElements, + discarding modifiers.""" if isinstance(ele, finat.FlattenedDimensions): return expand_element(ele.product) elif isinstance(ele, (finat.HDivElement, finat.HCurlElement)): @@ -554,7 +545,7 @@ def expand_element(ele): new_terms = [] for f in e.elements if isinstance(e, finat.EnrichedElement) else [e]: f_factors = tuple(f.factors) if isinstance(f, finat.TensorProductElement) else (f,) - new_terms.extend([t_factors + f_factors for t_factors in terms]) + new_terms.extend(t_factors + f_factors for t_factors in terms) terms = new_terms terms = list(map(finat.TensorProductElement, terms)) return finat.EnrichedElement(terms) @@ -579,6 +570,8 @@ def evaluate_dual(source, target, alpha=None): def compare_element(e1, e2): + """Numerically compare two :class:`FIAT.elements`. + Equality is satisfied if e2.dual_basis(e1.primal_basis) == identity.""" if e1 is e2: return True if e1.space_dimension() != e2.space_dimension(): @@ -588,37 +581,10 @@ def compare_element(e1, e2): return numpy.allclose(B, 0.0, rtol=1E-14, atol=1E-14) -def compare_dual(b1, b2): - p1 = b1.get_point_dict() - p2 = b2.get_point_dict() - if len(p1) != len(p2): - return False - - k1 = numpy.array(list(p1.keys())) - k2 = numpy.array(list(p2.keys())) - if not numpy.allclose(k1, k2, rtol=1E-16, atol=1E-16): - return False - - k1 = numpy.array([p1[k][0][0] for k in p1]) - k2 = numpy.array([p2[k][0][0] for k in p2]) - return numpy.allclose(k1, k2, rtol=1E-16, atol=1E-16) - - -def compare_dual_basis(l1, l2): - if len(l1) != len(l2): - return False - return all(compare_dual(b1, b2) for b1, b2 in zip(l1, l2)) - - @lru_cache(maxsize=10) def fiat_reference_prolongator(celem, felem, derivative=False): - ckey = (felem.formdegree,) if derivative else None - fkey = (celem.formdegree,) if derivative else None - fdual = felem.dual_basis() - cdual = celem.dual_basis() - if fkey == ckey and (celem is felem or compare_dual_basis(cdual, fdual)): - return numpy.array([]) - return evaluate_dual(celem, felem, alpha=ckey) + alpha = (1,) if derivative else None + return evaluate_dual(celem, felem, alpha=alpha) @lru_cache(maxsize=10) @@ -704,6 +670,17 @@ def get_permutation_to_line_elements(finat_element): return dof_perm, unique_line_elements, shifts +def get_permuted_map(V): + """ + Return a PermutedMap with the same tensor product shape for + every component of H(div) or H(curl) tensor product elements + """ + indices, _, _ = get_permutation_to_line_elements(V.finat_element) + if numpy.all(indices[:-1] < indices[1:]): + return V.cell_node_map() + return op2.PermutedMap(V.cell_node_map(), indices) + + # Common kernel to compute y = kron(A3, kron(A2, A1)) * x # Vector and tensor field generalization from Deville, Fischer, and Mund section 8.3.1. kronmxv_code = """ @@ -929,6 +906,7 @@ def make_kron_code(Vc, Vf, t_in, t_out, mat_name, scratch): fshapes = [] cshapes = [] has_code = False + identity_filter = lambda A: numpy.array([]) if A.shape[0] == A.shape[1] and numpy.allclose(A, numpy.eye(A.shape[0])) else A for celem, felem, shift in zip(celems, felems, shifts): if len(felem) != len(celem): raise ValueError("Fine and coarse elements do not have the same number of factors") @@ -942,7 +920,7 @@ def make_kron_code(Vc, Vf, t_in, t_out, mat_name, scratch): fshapes.append((nscal,) + tuple(fshape)) cshapes.append((nscal,) + tuple(cshape)) - J = [fiat_reference_prolongator(ce, fe).T for ce, fe in zip(celem, felem)] + J = [identity_filter(fiat_reference_prolongator(ce, fe)).T for ce, fe in zip(celem, felem)] if any(Jk.size and numpy.isclose(Jk, 0.0E0).all() for Jk in J): prolong_code.append(f""" for({IntType_c} i=0; i<{nscal*numpy.prod(fshape)}; i++) {t_out}[i+{fskip}] = 0.0E0; @@ -1139,17 +1117,6 @@ def make_permutation_code(V, vshape, pshape, t_in, t_out, array_name): return decl, prolong, restrict -def get_permuted_map(V): - """ - Return a PermutedMap with the same tensor product shape for - every component of H(div) or H(curl) tensor product elements - """ - indices, _, _ = get_permutation_to_line_elements(V.finat_element) - if numpy.all(indices[:-1] < indices[1:]): - return V.cell_node_map() - return op2.PermutedMap(V.cell_node_map(), indices) - - class StandaloneInterpolationMatrix(object): """ Interpolation matrix for a single standalone space. @@ -1168,11 +1135,11 @@ def __init__(self, Vc, Vf, Vc_bcs, Vf_bcs): def work_function(self, V): if isinstance(V, firedrake.Function): return V - else: - try: - return self._cache_work[V] - except KeyError: - return self._cache_work.setdefault(V, firedrake.Function(V)) + key = (V.ufl_element(), V.mesh()) + try: + return self._cache_work[key] + except KeyError: + return self._cache_work.setdefault(key, firedrake.Function(V)) @cached_property def _weight(self): From 6b701743d928df6e74764745127b7f702f39d367 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 5 Apr 2023 21:21:47 +0100 Subject: [PATCH 62/75] list of assembly callables for matrix blocks --- firedrake/preconditioners/fdm.py | 78 ++++++++++++++------------------ 1 file changed, 35 insertions(+), 43 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index f861132fd1..f0655ec48f 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -173,7 +173,7 @@ def initialize(self, pc): self.bc_nodes = numpy.empty(0, dtype=PETSc.IntType) # Assemble the FDM preconditioner with sparse local matrices - Pmat, self._assemble_P = self.allocate_matrix(V_fdm, J_fdm, bcs_fdm, fcp, pmat_type, use_static_condensation) + Pmat, self.assembly_callables = self.allocate_matrix(V_fdm, J_fdm, bcs_fdm, fcp, pmat_type, use_static_condensation) Pmat.setNullSpace(Amat.getNullSpace()) Pmat.setTransposeNullSpace(Amat.getTransposeNullSpace()) Pmat.setNearNullSpace(Amat.getNearNullSpace()) @@ -211,7 +211,7 @@ def allocate_matrix(self, V, J, bcs, fcp, pmat_type, use_static_condensation): :arg pmat_type: the preconditioner `PETSc.Mat.Type` :arg use_static_condensation: are we assembling the statically-condensed Schur complement on facets? - :returns: 2-tuple with the preconditioner :class:`PETSc.Mat` and its assembly callable + :returns: 2-tuple with the preconditioner :class:`PETSc.Mat` and a list of assembly callables """ ifacet = [i for i, Vsub in enumerate(V) if is_restricted(Vsub.finat_element)[1]] if len(ifacet) == 0: @@ -258,8 +258,6 @@ def cell_to_global(lgmap, cell_to_local, cell_index, result=None): return lgmap.apply(result, result=result) # Create data structures needed for assembly - bc_rows = {} - bc_vals = {} self.cell_to_global = {} self.lgmaps = {} for Vsub in V: @@ -268,11 +266,6 @@ def cell_to_global(lgmap, cell_to_local, cell_index, result=None): cell_to_local, nel = extrude_node_map(Vsub.cell_node_map(), bsize=bsize) self.cell_to_global[Vsub] = partial(cell_to_global, lgmap, cell_to_local) self.lgmaps[Vsub] = lgmap - - own = Vsub.dof_dset.layout_vec.getLocalSize() - bdofs = numpy.flatnonzero(lgmap.indices[:own] < 0).astype(PETSc.IntType)[:, None] - bc_rows[Vsub] = Vsub.dof_dset.lgmap.apply(bdofs, result=bdofs) - bc_vals[Vsub] = numpy.ones(bdofs.shape, dtype=PETSc.RealType) self.nel = nel coefficients, assembly_callables = self.assemble_coefficients(J, fcp) @@ -329,6 +322,16 @@ def get_coeffs(e, result=None): if ptype.endswith("sbaij"): P.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True) P.setUp() + # append callables to zero entries, insert element matrices, and apply BCs + assembly_callables.append(P.zeroEntries) + assembly_callables.append(partial(self.set_values, P, Vrow, Vcol, addv)) + if on_diag: + own = Vrow.dof_dset.layout_vec.getLocalSize() + bdofs = numpy.flatnonzero(self.lgmaps[Vrow].indices[:own] < 0).astype(PETSc.IntType)[:, None] + Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs) + if len(bdofs) > 0: + vals = numpy.ones(bdofs.shape, dtype=PETSc.RealType) + assembly_callables.append(partial(P.setValuesRCV, bdofs, bdofs, vals, addv)) Pmats[Vrow, Vcol] = P if len(V) == 1: @@ -336,22 +339,13 @@ def get_coeffs(e, result=None): else: Pmat = PETSc.Mat().createNest([[Pmats[Vrow, Vcol] for Vcol in V] for Vrow in V], comm=self.comm) - @PETSc.Log.EventDecorator("FDMAssemble") - def assemble_P(): - for _assemble in assembly_callables: - _assemble() - for Vrow, Vcol in product(Vsort, Vsort): - P = Pmats[Vrow, Vcol] - if P.getType().endswith("aij"): - P.zeroEntries() - self.set_values(P, Vrow, Vcol, addv) - for Vrow in Vsort: - rows = bc_rows[Vrow] - if len(rows) > 0: - Pmats[Vrow, Vrow].setValuesRCV(rows, rows, bc_vals[Vrow], addv) - Pmat.assemble() - - return Pmat, assemble_P + assembly_callables.append(Pmat.assemble) + return Pmat, assembly_callables + + @PETSc.Log.EventDecorator("FDMAssemble") + def _assemble_P(self): + for _assemble in self.assembly_callables: + _assemble() @PETSc.Log.EventDecorator("FDMUpdate") def update(self, pc): @@ -551,12 +545,11 @@ def assemble_coefficients(self, J, fcp, block_diagonal=True): # Construct Z = broken(V^k) * broken(V^{k+1}) V = args_J[0].function_space() - formdegree = V.finat_element.formdegree - degree = e.degree() - try: - degree = max(degree) - except TypeError: - pass + fe = V.finat_element + formdegree = fe.formdegree + degree = fe.degree + if type(degree) != int: + degree, = set(degree) qdeg = degree if formdegree == tdim: qfam = "DG" if tdim == 1 else "DQ" @@ -615,18 +608,17 @@ def assemble_reference_tensor(self, V, transpose=False): :returns: a :class:`PETSc.Mat` interpolating V^k * d(V^k) onto broken(V^k) * broken(V^{k+1}) on the reference element. """ - tdim = V.mesh().topological_dimension() value_size = V.value_size - formdegree = V.finat_element.formdegree - degree = V.finat_element.degree - try: - degree = max(degree) - except TypeError: - pass + fe = V.finat_element + tdim = fe.cell.get_spatial_dimension() + formdegree = fe.formdegree + degree = fe.degree + if type(degree) != int: + degree, = set(degree) if formdegree == tdim: degree = degree + 1 - is_interior, is_facet = is_restricted(V.finat_element) - key = (degree, tdim, formdegree, value_size, is_interior, is_facet, transpose) + is_interior, is_facet = is_restricted(fe) + key = (value_size, tdim, degree, formdegree, is_interior, is_facet, transpose) cache = self._cache.setdefault("reference_tensor", {}) try: return cache[key] @@ -636,7 +628,7 @@ def assemble_reference_tensor(self, V, transpose=False): result = PETSc.Mat().createTranspose(result).convert(result.getType()) return cache.setdefault(key, result) - full_key = (degree, tdim, formdegree, value_size, False, False, False) + full_key = key[:-3] + (False,) * 3 if is_facet and full_key in cache: result = cache[full_key] noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.getComm()) @@ -644,7 +636,7 @@ def assemble_reference_tensor(self, V, transpose=False): noperm.destroy() return cache.setdefault(key, result) - elements = sorted(get_base_elements(V.finat_element), key=lambda e: e.formdegree) + elements = sorted(get_base_elements(fe), key=lambda e: e.formdegree) ref_el = elements[0].get_reference_element() eq = FIAT.FDMQuadrature(ref_el, degree) e0 = elements[0] if elements[0].formdegree == 0 else FIAT.FDMLagrange(ref_el, degree) @@ -1217,7 +1209,7 @@ def get_base_elements(e): return sum(list(map(get_base_elements, e.elements)), []) elif isinstance(e, finat.TensorProductElement): return sum(list(map(get_base_elements, e.factors)), []) - elif isinstance(e, finat.cube.FlattenedDimensions): + elif isinstance(e, finat.FlattenedDimensions): return get_base_elements(e.product) elif isinstance(e, (finat.HCurlElement, finat.HDivElement)): return get_base_elements(e.wrappee) From 63b82ec40223c0193eeea4b4a61ddf0cba271fb2 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Sat, 8 Apr 2023 11:52:09 +0100 Subject: [PATCH 63/75] use ElementKernel and SparseAssembler classes --- firedrake/preconditioners/fdm.py | 715 ++++++++++++++++--------------- 1 file changed, 379 insertions(+), 336 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index f0655ec48f..56eb9defad 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -21,11 +21,12 @@ from pyop2.utils import get_petsc_dir import firedrake.dmhooks as dmhooks -import ctypes -import numpy import ufl import FIAT import finat +import numpy +import ctypes +import operator Citations().add("Brubeck2022a", """ @article{Brubeck2022a, @@ -78,22 +79,6 @@ class FDMPC(PCBase): _citation = "Brubeck2022b" _cache = {} - @staticmethod - def setSubMatCSR(comm, triu=False): - """ - Compile C code to insert sparse submatrices and store in class cache - - :arg triu: are we inserting onto the upper triangular part of the matrix? - - :returns: a python wrapper for the matrix insertion function - """ - cache = FDMPC._cache.setdefault("setSubMatCSR", {}) - key = triu - try: - return cache[key] - except KeyError: - return cache.setdefault(key, load_setSubMatCSR(comm, triu)) - @PETSc.Log.EventDecorator("FDMInit") def initialize(self, pc): Citations().register(self._citation) @@ -236,8 +221,9 @@ def allocate_matrix(self, V, J, bcs, fcp, pmat_type, use_static_condensation): dofs = numpy.arange(value_size * Vbig.finat_element.space_dimension(), dtype=fdofs.dtype) idofs = numpy.setdiff1d(dofs, fdofs, assume_unique=True) - # Dictionary with the parent space and a method to form the Schur complement - self.get_static_condensation = {} + # Dictionaries with the parent space and kernel to compute the Schur complement + self.parent_space = {} + self.schur_kernel = {} if Vfacet and use_static_condensation: # If we are in a facet space, we build the Schur complement on its diagonal block if Vfacet.finat_element.formdegree == 0 and value_size == 1: @@ -246,45 +232,18 @@ def allocate_matrix(self, V, J, bcs, fcp, pmat_type, use_static_condensation): sc_builder = SchurComplementBlockCholesky else: sc_builder = SchurComplementBlockQR - self.get_static_condensation[Vfacet] = Vbig, sc_builder(idofs, fdofs).condense + self.schur_kernel[Vfacet] = partial(sc_builder, idofs, fdofs) + self.parent_space[Vfacet] = Vbig + elif len(fdofs) and V.finat_element.formdegree == 0: # If we are in H(grad), we just pad with zeros on the statically-condensed pattern - self.get_static_condensation[V] = Vbig, SchurComplementPattern(idofs, dofs).condense - - @PETSc.Log.EventDecorator("FDMGetIndices") - def cell_to_global(lgmap, cell_to_local, cell_index, result=None): - # Be careful not to create new arrays - result = cell_to_local(cell_index, result=result) - return lgmap.apply(result, result=result) + self.schur_kernel[V] = partial(SchurComplementKernel, idofs, dofs) + self.parent_space[V] = V # Create data structures needed for assembly - self.cell_to_global = {} - self.lgmaps = {} - for Vsub in V: - lgmap = Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) - bsize = Vsub.dof_dset.layout_vec.getBlockSize() - cell_to_local, nel = extrude_node_map(Vsub.cell_node_map(), bsize=bsize) - self.cell_to_global[Vsub] = partial(cell_to_global, lgmap, cell_to_local) - self.lgmaps[Vsub] = lgmap - self.nel = nel - - coefficients, assembly_callables = self.assemble_coefficients(J, fcp) - coeffs = [coefficients.get(name) for name in ("beta", "alpha")] - cdata = [c.dat.data_ro for c in coeffs] - cmaps = [extrude_node_map(c.cell_node_map())[0] for c in coeffs] - cindices = [cmap(0) if self.nel else None for cmap in cmaps] - - @PETSc.Log.EventDecorator("FDMGetCoeffs") - def get_coeffs(e, result=None): - # Get vector for betas and alphas on a cell - if result is None: - return numpy.concatenate([c[cmap(e, result=idx)] for c, cmap, idx in zip(cdata, cmaps, cindices)], out=result) - numpy.take(cdata[0], cmaps[0](e, result=cindices[0]), axis=0, out=result[:cindices[0].size]) - numpy.take(cdata[1], cmaps[1](e, result=cindices[1]), axis=0, out=result[cindices[0].size:]) - return result - - self.get_coeffs = get_coeffs - self.work_mats = {} + self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} + self.coefficients, assembly_callables = self.assemble_coefficients(J, fcp) + self.assemblers = {} Pmats = {} addv = PETSc.InsertMode.ADD_VALUES @@ -308,6 +267,7 @@ def get_coeffs(e, result=None): preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False) preallocator.setUp() self.set_values(preallocator, Vrow, Vcol, addv, triu=triu) + preallocator.assemble() d_nnz, o_nnz = get_preallocation(preallocator, sizes[0][0]) preallocator.destroy() @@ -385,114 +345,6 @@ def destroy(self, pc): if hasattr(self, "pc"): self.pc.getOperators()[-1].destroy() self.pc.destroy() - if hasattr(self, "work_mats"): - for mat in self.work_mats.values(): - mat.destroy() - - @cached_property - def _element_mass_matrix(self): - data = self.get_coeffs(0) - data.fill(1.0E0) - shape = data.shape + (1,)*(3-len(data.shape)) - nrows = shape[0] * shape[1] - ai = numpy.arange(nrows+1, dtype=PETSc.IntType) - aj = numpy.tile(ai[:-1].reshape((-1, shape[1])), (1, shape[2])) - if shape[2] > 1: - ai *= shape[2] - data = numpy.tile(numpy.eye(shape[2], dtype=data.dtype), shape[:1] + (1,)*(len(shape)-1)) - Me = PETSc.Mat().createAIJ((nrows, nrows), bsize=shape[2], csr=(ai, aj, data), comm=PETSc.COMM_SELF) - return self.work_mats.setdefault("mass_matrix", Me) - - @cached_property - def _element_mass_diagonal(self): - return self.work_mats.setdefault("mass_diagonal", self._element_mass_matrix.getDiagonal()) - - @PETSc.Log.EventDecorator("FDMSetValues") - def set_values(self, A, Vrow, Vcol, addv, triu=False): - """ - Assemble the stiffness matrix in the FDM basis using sparse reference - tensors and diagonal mass matrices. - - :arg A: the :class:`PETSc.Mat` to assemble - :arg Vrow: the :class:`.FunctionSpace` test space - :arg Vcol: the :class:`.FunctionSpace` trial space - :arg addv: a `PETSc.Mat.InsertMode` - :arg triu: are we assembling only the upper triangular part? - """ - if self.nel == 0: - # This MPI rank does not own any elements, nothing to be done - return - - def get_key(*args): - return tuple(map(lambda V: V.ufl_element() if V else None, args)) - - Vbig = None - condense_element_mat = lambda Ae, result=None: Ae - if Vrow == Vcol: - Vbig, condense_element_mat = self.get_static_condensation.get(Vrow, (Vbig, condense_element_mat)) - - Me = self._element_mass_matrix - # Interpolation of basis and exterior derivative onto broken spaces - ctensor = self.assemble_reference_tensor(Vbig or Vcol) - rtensor = self.assemble_reference_tensor(Vbig or Vrow, transpose=True) - # Element matrix obtained via Equation (3.9) of Brubeck2022b - assemble_element_mat = partial(rtensor.matMatMult, Me, ctensor) - # Preallocate the element matrix - key = get_key(Vbig or Vrow, Vbig or Vcol, None) - try: - Ae = self.work_mats[key] - except KeyError: - Ae = self.work_mats.setdefault(key, assemble_element_mat()) - # Preallocate the element Schur complement - key = get_key(Vrow, Vcol, Vbig) - try: - Se = self.work_mats[key] - except KeyError: - Se = self.work_mats.setdefault(key, condense_element_mat(Ae)) - - get_rindices = self.cell_to_global[Vrow] - rindices = numpy.empty(Se.getSize()[:1], dtype=PETSc.IntType) - if Vrow == Vcol: - get_cindices = lambda e, result=None: result - cindices = rindices - else: - get_cindices = self.cell_to_global[Vcol] - cindices = numpy.empty(Se.getSize()[1:], dtype=PETSc.IntType) - - setSubMatCSR = self.setSubMatCSR(PETSc.COMM_SELF, triu=triu) - insert = PETSc.InsertMode.INSERT - if A.getType() == PETSc.Mat.Type.PREALLOCATOR: - # Empty kernel for preallocation - element_kernel = lambda e, result=None: result - condense_element_mat = lambda Ae, result=None: result - - elif Me.getBlockSize() == 1: - # Kernel with diagonal mass matrix - diagonal = self._element_mass_diagonal - data = diagonal.array_w.reshape((-1,) + Vrow.shape) - - def element_kernel(e, result=None): - self.get_coeffs(e, result=data) - Me.setDiagonal(diagonal, addv=insert) - return assemble_element_mat(result=result) - else: - # Kernel with block diagonal mass matrix - ai, aj, data = Me.getValuesCSR() - data = data.reshape((-1,) + Vrow.shape * 2) - - def element_kernel(e, result=None): - self.get_coeffs(e, result=data) - Me.setValuesCSR(ai, aj, data, addv=insert) - Me.assemble() - return assemble_element_mat(result=result) - - # Core assembly loop - for e in range(self.nel): - get_rindices(e, result=rindices) - get_cindices(e, result=cindices) - Ae = element_kernel(e, result=Ae) - Se = condense_element_mat(Ae, result=Se) - setSubMatCSR(A, Se, rindices, cindices, addv) @PETSc.Log.EventDecorator("FDMCoefficients") def assemble_coefficients(self, J, fcp, block_diagonal=True): @@ -623,88 +475,306 @@ def assemble_reference_tensor(self, V, transpose=False): try: return cache[key] except KeyError: - if transpose: - result = self.assemble_reference_tensor(V, transpose=False) - result = PETSc.Mat().createTranspose(result).convert(result.getType()) - return cache.setdefault(key, result) - - full_key = key[:-3] + (False,) * 3 - if is_facet and full_key in cache: - result = cache[full_key] - noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.getComm()) - result = result.createSubMatrix(noperm, self.fises) - noperm.destroy() - return cache.setdefault(key, result) - - elements = sorted(get_base_elements(fe), key=lambda e: e.formdegree) - ref_el = elements[0].get_reference_element() - eq = FIAT.FDMQuadrature(ref_el, degree) - e0 = elements[0] if elements[0].formdegree == 0 else FIAT.FDMLagrange(ref_el, degree) - e1 = elements[-1] if elements[-1].formdegree == 1 else FIAT.FDMDiscontinuousLagrange(ref_el, degree-1) - if is_interior: - e0 = FIAT.RestrictedElement(e0, restriction_domain="interior") - - A00 = petsc_sparse(fiat_reference_prolongator(e0, eq), comm=PETSc.COMM_SELF) - A10 = petsc_sparse(fiat_reference_prolongator(e0, e1, derivative=True), comm=PETSc.COMM_SELF) - A11 = petsc_sparse(numpy.eye(e1.space_dimension(), dtype=PETSc.RealType), comm=PETSc.COMM_SELF) - B_blocks = mass_blocks(tdim, formdegree, A00, A11) - A_blocks = diff_blocks(tdim, formdegree, A00, A11, A10) - result = block_mat(B_blocks + A_blocks, destroy_blocks=True) - A00.destroy() - A10.destroy() - A11.destroy() - - if value_size != 1: - eye = petsc_sparse(numpy.eye(value_size), comm=result.getComm()) - temp = result - result = temp.kron(eye) - temp.destroy() - eye.destroy() - - if is_facet: - cache[full_key] = result - noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.getComm()) - result = result.createSubMatrix(noperm, self.fises) - noperm.destroy() + pass + if transpose: + result = self.assemble_reference_tensor(V, transpose=False) + result = PETSc.Mat().createTranspose(result).convert(result.getType()) + return cache.setdefault(key, result) + full_key = key[:-3] + (False,) * 3 + if is_facet and full_key in cache: + result = cache[full_key] + noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.getComm()) + result = result.createSubMatrix(noperm, self.fises) + noperm.destroy() return cache.setdefault(key, result) + elements = sorted(get_base_elements(fe), key=lambda e: e.formdegree) + ref_el = elements[0].get_reference_element() + eq = FIAT.FDMQuadrature(ref_el, degree) + e0 = elements[0] if elements[0].formdegree == 0 else FIAT.FDMLagrange(ref_el, degree) + e1 = elements[-1] if elements[-1].formdegree == 1 else FIAT.FDMDiscontinuousLagrange(ref_el, degree-1) + if is_interior: + e0 = FIAT.RestrictedElement(e0, restriction_domain="interior") + + A00 = petsc_sparse(fiat_reference_prolongator(e0, eq), comm=PETSc.COMM_SELF) + A10 = petsc_sparse(fiat_reference_prolongator(e0, e1, derivative=True), comm=PETSc.COMM_SELF) + A11 = petsc_sparse(numpy.eye(e1.space_dimension(), dtype=PETSc.RealType), comm=PETSc.COMM_SELF) + B_blocks = mass_blocks(tdim, formdegree, A00, A11) + A_blocks = diff_blocks(tdim, formdegree, A00, A11, A10) + result = block_mat(B_blocks + A_blocks, destroy_blocks=True) + A00.destroy() + A10.destroy() + A11.destroy() + + if value_size != 1: + eye = petsc_sparse(numpy.eye(value_size), comm=result.getComm()) + temp = result + result = temp.kron(eye) + temp.destroy() + eye.destroy() + + if is_facet: + cache[full_key] = result + noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.getComm()) + result = result.createSubMatrix(noperm, self.fises) + noperm.destroy() + + return cache.setdefault(key, result) + + @cached_property + def _element_mass_matrix(self): + Z = [self.coefficients[name].function_space() for name in ("beta", "alpha")] + shape = (sum(V.finat_element.space_dimension() for V in Z),) + Z[0].shape + data = numpy.ones(shape, dtype=PETSc.RealType) + shape += (1,) * (3-len(shape)) + nrows = shape[0] * shape[1] + ai = numpy.arange(nrows+1, dtype=PETSc.IntType) + aj = numpy.tile(ai[:-1].reshape((-1, shape[1])), (1, shape[2])) + if shape[2] > 1: + ai *= shape[2] + data = numpy.tile(numpy.eye(shape[2], dtype=data.dtype), shape[:1] + (1,)*(len(shape)-1)) + return PETSc.Mat().createAIJ((nrows, nrows), csr=(ai, aj, data), comm=PETSc.COMM_SELF) + + @PETSc.Log.EventDecorator("FDMSetValues") + def set_values(self, A, Vrow, Vcol, addv, triu=False): + """ + Assemble the stiffness matrix in the FDM basis using sparse reference + tensors and diagonal mass matrices. + + :arg A: the :class:`PETSc.Mat` to assemble + :arg Vrow: the :class:`.FunctionSpace` test space + :arg Vcol: the :class:`.FunctionSpace` trial space + :arg addv: a `PETSc.Mat.InsertMode` + :arg triu: are we assembling only the upper triangular part? + """ + key = (Vrow.ufl_element(), Vcol.ufl_element()) + try: + assembler = self.assemblers[key] + except KeyError: + Vbig = None + if Vrow == Vcol: + Vbig = self.parent_space.get(Vrow) + + beta = self.coefficients["beta"] + alpha = self.coefficients["alpha"] + # Interpolation of basis and exterior derivative onto broken spaces + ctensor = self.assemble_reference_tensor(Vbig or Vcol) + rtensor = self.assemble_reference_tensor(Vbig or Vrow, transpose=True) + element_kernel = TripleProductKernel(rtensor, self._element_mass_matrix, ctensor, beta, alpha) + if Vbig is not None: + element_kernel = self.schur_kernel[Vrow](element_kernel) + + assembler = SparseAssembler(element_kernel, Vrow, Vcol, self.lgmaps[Vrow], self.lgmaps[Vcol]) + self.assemblers.setdefault(key, assembler) + assembler.assemble(A, addv=addv, triu=triu) + + +class SparseAssembler(object): + + _cache = {} -class SchurComplementBuilder(object): + @staticmethod + def setSubMatCSR(comm, triu=False): + """ + Compile C code to insert sparse submatrices and store in class cache + + :arg triu: are we inserting onto the upper triangular part of the matrix? + + :returns: a python wrapper for the matrix insertion function + """ + cache = SparseAssembler._cache.setdefault("setSubMatCSR", {}) + key = triu + try: + return cache[key] + except KeyError: + return cache.setdefault(key, load_setSubMatCSR(comm, triu)) + + def __init__(self, kernel, Vrow, Vcol, rmap, cmap): + self.kernel = kernel + m, n = kernel.result.getSize() + + spaces = [Vrow] + row_shape = tuple() if Vrow.value_size == 1 else (Vrow.value_size,) + map_rows = (self.map_block_indices, rmap) if row_shape else (rmap.apply,) + rows = numpy.empty((m, ), dtype=PETSc.IntType).reshape((-1,) + row_shape) + + self.bc_nodes = None + if Vcol == Vrow: + cols = rows + map_cols = (lambda *x, result=None: result, ) + # own = Vrow.dof_dset.layout_vec.getLocalSize() + # bc_nodes = numpy.flatnonzero(rmap.indices[:own] < 0).astype(PETSc.IntType) + # if len(bc_nodes) > 0: + # bc_nodes = Vrow.dof_dset.lgmap.apply(bc_nodes, result=bc_nodes) + # self.bc_nodes = bc_nodes[:, None] + else: + spaces.append(Vcol) + col_shape = tuple() if Vcol.value_size == 1 else (Vcol.value_size,) + map_cols = (self.map_block_indices, cmap) if col_shape else (cmap.apply, ) + cols = numpy.empty((n, ), dtype=PETSc.IntType).reshape((-1,) + col_shape) + + spaces.extend(c.function_space() for c in kernel.coefficients) + self.indices = tuple(numpy.empty((V.finat_element.space_dimension(),), dtype=PETSc.IntType) for V in spaces) + self.map_rows = partial(*map_rows, self.indices[spaces.index(Vrow)], result=rows) + self.map_cols = partial(*map_cols, self.indices[spaces.index(Vcol)], result=cols) + self.kernel_args = self.indices[1+spaces.index(Vcol):] + + integral_type = kernel.integral_type + if integral_type == "cell": + get_map = operator.methodcaller("cell_node_map") + elif integral_type == "interior_facet": + get_map = operator.methodcaller("interior_facet_node_map") + else: + raise NotImplementedError("Only for cell or interior facet integrals") + self.node_maps = tuple(map(get_map, spaces)) + node_map = self.node_maps[0] + self.nel = node_map.values.shape[0] + if node_map.offset is None: + layers = None + else: + layers = node_map.iterset.layers_array + layers = layers[:, 1]-layers[:, 0]-1 + if layers.shape[0] != self.nel: + layers = numpy.repeat(layers, self.nel) + self.layers = layers + + def map_block_indices(self, lgmap, indices, result=None): + bsize = result.shape[1] + numpy.copyto(result[:, 0], indices) + result[:, 0] *= bsize + numpy.add.outer(result[:, 0], numpy.arange(1, bsize, dtype=indices.dtype), out=result[:, 1:]) + return lgmap.apply(result, result=result) + + def set_indices(self, e): + for index, node_map in zip(self.indices, self.node_maps): + numpy.copyto(index, node_map.values_with_halo[e]) + + def add_offsets(self): + for index, node_map in zip(self.indices, self.node_maps): + index += node_map.offset + + def assemble(self, A, addv=None, triu=False): + if A.getType() == PETSc.Mat.Type.PREALLOCATOR: + kernel = lambda *args, result=None: result + else: + kernel = self.kernel + triu = False + if self.bc_nodes is not None: + vals = numpy.ones(self.bc_nodes.shape, dtype=PETSc.RealType) + A.setValuesRCV(self.bc_nodes, self.bc_nodes, vals, addv) + result = self.kernel.result + insert = self.setSubMatCSR(PETSc.COMM_SELF, triu=triu) + + # Core assembly loop + if self.layers is None: + for e in range(self.nel): + self.set_indices(e) + insert(A, kernel(*self.kernel_args, result=result), + self.map_rows(), self.map_cols(), addv) + else: + for e in range(self.nel): + self.set_indices(e) + for _ in range(self.layers[e]): + insert(A, kernel(*self.kernel_args, result=result), + self.map_rows(), self.map_cols(), addv) + self.add_offsets() + + +class ElementKernel(object): """ - Class to build Schur complement matrices that reuses work matrices and the - symbolic factorization of the interior block. + A constant element kernel """ + def __init__(self, A, *coefficients): + self.result = A + self.coefficients = coefficients + self.integral_type = "cell" - def __init__(self, idofs, fdofs): - self.idofs = idofs - self.fdofs = fdofs - self.slices = {} - self.ises = tuple() - self.isrows = [] - self.iscols = [] - self.submats = [] - self.work = [None for _ in range(2)] + def __call__(self, *args, result=None): + return result or self.result def __del__(self): - self.reset() + self.destroy() - def reset(self): - for obj in self.ises: - if isinstance(obj, PETSc.Object): - obj.destroy() - for obj in self.submats: - if isinstance(obj, PETSc.Object): - obj.destroy() - for obj in self.work: + def destroy(self): + pass + + +class TripleProductKernel(ElementKernel): + """ + An element kernel to compute a triple matrix product A * B * C Where A and + C are constant matrices and B is a block diagonal matrix with entries given + by coefficients. + See Equation (3.9) of Brubeck2022b. + """ + def __init__(self, A, B, C, *coefficients): + self.work = None + V = coefficients[0].function_space() + dshape = (-1, ) + coefficients[0].dat.data_ro.shape[1:] + if V.value_size == 1: + self.work = B.getDiagonal() + self.update = partial(B.setDiagonal, self.work) + self.data = self.work.array_w.reshape(dshape) + else: + indptr, indices, data = B.getValuesCSR() + self.data = data.reshape(dshape) + self.update = lambda *args: (B.setValuesCSR(indptr, indices, self.data), B.assemble()) + + stops = numpy.cumsum([0] + [c.function_space().finat_element.space_dimension() for c in coefficients]) + self.slices = [slice(*stops[k:k+2]) for k in range(len(stops)-1)] + self.product = partial(A.matMatMult, B, C) + super().__init__(self.product(), *coefficients) + + def __call__(self, *indices, result=None): + for c, i, z in zip(self.coefficients, indices, self.slices): + numpy.take(c.dat.data_ro, i, axis=0, out=self.data[z]) + self.update() + return self.product(result=result) + + def destroy(self): + self.result.destroy() + if isinstance(self.work, PETSc.Object): + self.work.destroy() + + +class SchurComplementKernel(ElementKernel): + """ + An element kernel to compute Schur complements that reuses work matrices and the + symbolic factorization of the interior block. + """ + def __init__(self, idofs, fdofs, kernel): + self.kernel = kernel + self.A = kernel.result + comm = self.A.getComm() + i0, i1 = tuple(PETSc.IS().createGeneral(i, comm=comm) for i in (idofs, fdofs)) + self.slices = self.sort_interior_dofs(i0, self.A) + self.isrows = [i0, i0, i1, i1] + self.iscols = [i0, i1, i0, i1] + self.ises = (i0, i1) + self.work = [None for _ in range(2)] + self.submats = [] + super().__init__(self.condense(), *kernel.coefficients) + + def __call__(self, *args, result=None): + self.kernel(*args, result=self.A) + return self.condense(result=result) + + def destroy(self): + self.kernel.destroy() + self.result.destroy() + objs = [] + objs.extend(self.ises) + objs.extend(self.work) + objs.extend(self.submats) + for obj in objs: if isinstance(obj, PETSc.Object): obj.destroy() - self.submats = [] - self.work = [None for _ in range(2)] def sort_interior_dofs(self, i0, A): """Permute `i0` to have A[i0, i0] with square blocks of - increasing dimension along its diagonal. Add slices with the extents + increasing dimension along its diagonal. + + Return a dict of slices with the extents of each set of blocks in the CSR representation of A.""" A00 = A.createSubMatrix(i0, i0) indptr, indices, _ = A00.getValuesCSR() @@ -712,7 +782,7 @@ def sort_interior_dofs(self, i0, A): perm = numpy.argsort(degree) icur = 0 istart = 0 - self.slices[1] = slice(0, 0) + slices = {1: slice(0, 0)} unique_degree, counts = numpy.unique(degree, return_counts=True) for k, kdofs in sorted(zip(unique_degree, counts)): if k > 1: @@ -722,35 +792,34 @@ def sort_interior_dofs(self, i0, A): neigh[row] = indices[slice(*indptr[i:i+2])] perm[icur:icur+kdofs] = list(dict.fromkeys(neigh.flat)) - self.slices[k] = slice(istart, istart + k * kdofs) + slices[k] = slice(istart, istart + k * kdofs) istart += k * kdofs icur += kdofs i0.setIndices(i0.getIndices()[perm]) A00.destroy() + return slices - def get_blocks(self, A): - if len(self.submats) == 0: - comm = A.getComm() - i0 = PETSc.IS().createGeneral(self.idofs, comm=comm) - i1 = PETSc.IS().createGeneral(self.fdofs, comm=comm) - self.sort_interior_dofs(i0, A) - self.isrows = [i0, i0, i1, i1] - self.iscols = [i0, i1, i0, i1] - self.ises = (i0, i1) - self.submats = A.createSubMatrices(self.isrows, iscols=self.iscols, submats=self.submats or None) + def get_blocks(self): + self.submats = self.A.createSubMatrices(self.isrows, self.iscols, submats=self.submats or None) return self.submats @PETSc.Log.EventDecorator("FDMCondense") - def condense(self, A, result=None): + def condense(self, result=None): + """By default pad with zeros the statically condensed pattern""" + structure = PETSc.Mat.Structure.SUBSET if result else None + if result is None: + A00, A01, A10, _ = self.get_blocks() + result = A10.matMatMult(A00, A01, result=result) + result.aypx(0.0, self.A, structure=structure) return result -class SchurComplementDiagonal(SchurComplementBuilder): +class SchurComplementDiagonal(SchurComplementKernel): @PETSc.Log.EventDecorator("FDMCondense") - def condense(self, A, result=None): + def condense(self, result=None): structure = PETSc.Mat.Structure.SUBSET if result else None - A00, A01, A10, A11 = self.get_blocks(A) + A00, A01, A10, A11 = self.get_blocks() self.work[0] = A00.getDiagonal(result=self.work[0]) self.work[0].reciprocal() self.work[0].scale(-1) @@ -760,24 +829,12 @@ def condense(self, A, result=None): return result -class SchurComplementPattern(SchurComplementBuilder): - - @PETSc.Log.EventDecorator("FDMCondense") - def condense(self, A, result=None): - structure = PETSc.Mat.Structure.SUBSET if result else None - if result is None: - A00, A01, A10, _ = self.get_blocks(A) - result = A10.matMatMult(A00, A01, result=result) - result.aypx(0.0, A, structure=structure) - return result - - -class SchurComplementBlockCholesky(SchurComplementBuilder): +class SchurComplementBlockCholesky(SchurComplementKernel): @PETSc.Log.EventDecorator("FDMCondense") - def condense(self, A, result=None): + def condense(self, result=None): structure = PETSc.Mat.Structure.SUBSET if result else None - A00, A01, A10, A11 = self.get_blocks(A) + A00, A01, A10, A11 = self.get_blocks() indptr, indices, R = A00.getValuesCSR() zlice = self.slices[1] @@ -785,9 +842,10 @@ def condense(self, A, result=None): numpy.reciprocal(R[zlice], out=R[zlice]) flops = 2 * (zlice.stop - zlice.start) for k in sorted(degree for degree in self.slices if degree > 1): - zlice = self.slices[k] - A = R[zlice].reshape((-1, k, k)) - R[zlice] = numpy.linalg.inv(numpy.linalg.cholesky(A)).reshape((-1)) + Rk = R[self.slices[k]] + A = Rk.reshape((-1, k, k)) + rinv = numpy.linalg.inv(numpy.linalg.cholesky(A)) + numpy.copyto(Rk, rinv.flat) flops += A.shape[0] * ((k**3)//3 + k**3) PETSc.Log.logFlops(flops) @@ -800,12 +858,12 @@ def condense(self, A, result=None): return result -class SchurComplementBlockQR(SchurComplementBuilder): +class SchurComplementBlockQR(SchurComplementKernel): - @PETSc.Log.EventDecorator("FDMGetSchur") - def condense(self, A, result=None): + @PETSc.Log.EventDecorator("FDMCondense") + def condense(self, result=None): structure = PETSc.Mat.Structure.SUBSET if result else None - A00, A01, A10, A11 = self.get_blocks(A) + A00, A01, A10, A11 = self.get_blocks() indptr, indices, R = A00.getValuesCSR() Q = numpy.ones(R.shape, dtype=R.dtype) @@ -816,8 +874,9 @@ def condense(self, A, result=None): zlice = self.slices[k] A = R[zlice].reshape((-1, k, k)) q, r = numpy.linalg.qr(A, mode="complete") - Q[zlice] = q.reshape((-1,)) - R[zlice] = numpy.linalg.inv(r).reshape((-1,)) + numpy.copyto(Q[zlice], q.flat) + rinv = numpy.linalg.inv(r) + numpy.copyto(R[zlice], rinv.flat) flops += A.shape[0] * ((4*k**3)//3 + k**3) PETSc.Log.logFlops(flops) @@ -832,12 +891,12 @@ def condense(self, A, result=None): return result -class SchurComplementBlockSVD(SchurComplementBuilder): +class SchurComplementBlockSVD(SchurComplementKernel): - @PETSc.Log.EventDecorator("FDMGetSchur") - def condense(self, A, result=None): + @PETSc.Log.EventDecorator("FDMCondense") + def condense(self, result=None): structure = PETSc.Mat.Structure.SUBSET if result else None - A00, A01, A10, A11 = self.get_blocks(A) + A00, A01, A10, A11 = self.get_blocks() indptr, indices, U = A00.getValuesCSR() V = numpy.ones(U.shape, dtype=U.dtype) self.work[0] = A00.getDiagonal(result=self.work[0]) @@ -850,9 +909,9 @@ def condense(self, A, result=None): A = U[bslice].reshape((-1, k, k)) u, s, v = numpy.linalg.svd(A, full_matrices=False) dslice = slice(dslice.stop, dslice.stop + k * A.shape[0]) - D.array_w[dslice] = s.reshape((-1,)) - U[bslice] = numpy.transpose(u, axes=(0, 2, 1)).reshape((-1,)) - V[bslice] = numpy.transpose(v, axes=(0, 2, 1)).reshape((-1,)) + numpy.copyto(D.array_w[dslice], s.flat) + numpy.copyto(U[bslice], numpy.transpose(u, axes=(0, 2, 1)).flat) + numpy.copyto(V[bslice], numpy.transpose(v, axes=(0, 2, 1)).flat) flops += A.shape[0] * ((4*k**3)//3 + 4*k**3) PETSc.Log.logFlops(flops) @@ -871,21 +930,22 @@ def condense(self, A, result=None): return result -class SchurComplementBlockInverse(SchurComplementBuilder): +class SchurComplementBlockInverse(SchurComplementKernel): - @PETSc.Log.EventDecorator("FDMGetSchur") - def condense(self, A, result=None): + @PETSc.Log.EventDecorator("FDMCondense") + def condense(self, result=None): structure = PETSc.Mat.Structure.SUBSET if result else None - A00, A01, A10, A11 = self.get_blocks(A) + A00, A01, A10, A11 = self.get_blocks() indptr, indices, R = A00.getValuesCSR() zlice = self.slices[1] numpy.reciprocal(R[zlice], out=R[zlice]) flops = zlice.stop - zlice.start for k in sorted(degree for degree in self.slices if degree > 1): - zlice = self.slices[k] - A = R[zlice].reshape((-1, k, k)) - R[zlice] = numpy.linalg.inv(A).reshape((-1,)) + Rk = R[self.slices[k]] + A = Rk.reshape((-1, k, k)) + rinv = numpy.linalg.inv(A) + numpy.copyto(Rk, rinv.flat) flops += A.shape[0] * (k**3) PETSc.Log.logFlops(flops) @@ -899,25 +959,12 @@ def condense(self, A, result=None): @PETSc.Log.EventDecorator("LoadCode") def load_c_code(code, name, **kwargs): - cppargs = ["-I%s/include" % d for d in get_petsc_dir()] - ldargs = (["-L%s/lib" % d for d in get_petsc_dir()] - + ["-Wl,-rpath,%s/lib" % d for d in get_petsc_dir()] + petsc_dir = get_petsc_dir() + cppargs = ["-I%s/include" % d for d in petsc_dir] + ldargs = (["-L%s/lib" % d for d in petsc_dir] + + ["-Wl,-rpath,%s/lib" % d for d in petsc_dir] + ["-lpetsc", "-lm"]) - funptr = load(code, "c", name, - cppargs=cppargs, ldargs=ldargs, - **kwargs) - - def get_pointer(obj): - if isinstance(obj, PETSc.Object): - return obj.handle - elif isinstance(obj, numpy.ndarray): - return obj.ctypes.data - return obj - - @PETSc.Log.EventDecorator(name) - def wrapper(*args): - return funptr(*map(get_pointer, args)) - return wrapper + return load(code, "c", name, cppargs=cppargs, ldargs=ldargs, **kwargs) def load_setSubMatCSR(comm, triu=False): @@ -925,10 +972,10 @@ def load_setSubMatCSR(comm, triu=False): Done in C for efficiency, since it loops over rows.""" if triu: name = "setSubMatCSR_SBAIJ" - select_cols = "icol < irow ? -1: icol" + select_cols = "icol -= (icol < irow) * (1 + icol);" else: name = "setSubMatCSR_AIJ" - select_cols = "icol" + select_cols = "" code = f""" #include @@ -959,7 +1006,8 @@ def load_setSubMatCSR(comm, triu=False): irow = rindices[i]; for (PetscInt j = 0; j < ncols; j++) {{ icol = cindices[cols[j]]; - indices[j] = {select_cols}; + {select_cols} + indices[j] = icol; }} ierr = MatSetValues(A, 1, &irow, ncols, indices, vals, addv);CHKERRQ(ierr); ierr = MatRestoreRow(B, i, &ncols, &cols, &vals);CHKERRQ(ierr); @@ -970,8 +1018,14 @@ def load_setSubMatCSR(comm, triu=False): """ argtypes = [ctypes.c_voidp, ctypes.c_voidp, ctypes.c_voidp, ctypes.c_voidp, ctypes.c_int] - return load_c_code(code, name, comm=comm, argtypes=argtypes, - restype=ctypes.c_int) + funptr = load_c_code(code, name, comm=comm, argtypes=argtypes, + restype=ctypes.c_int) + + @PETSc.Log.EventDecorator(name) + def wrapper(A, B, rows, cols, addv): + return funptr(A.handle, B.handle, rows.ctypes.data, cols.ctypes.data, addv) + + return wrapper def is_restricted(finat_element): @@ -1139,18 +1193,6 @@ def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[], comm=None): temp.destroy() eye.destroy() - rmap = Vf.local_to_global_map(fbcs) - cmap = Vc.local_to_global_map(cbcs) - rlocal, nel = extrude_node_map(Vf.cell_node_map(), bsize=Vf.value_size) - clocal, nel = extrude_node_map(Vc.cell_node_map(), bsize=Vc.value_size) - - def cell_to_global(lgmap, cell_to_local, e, result=None): - result = cell_to_local(e, result=result) - return lgmap.apply(result, result=result) - - imode = PETSc.InsertMode.INSERT - update_Dmat = FDMPC.setSubMatCSR(PETSc.COMM_SELF, triu=False) - sizes = tuple(V.dof_dset.layout_vec.getSizes() for V in (Vf, Vc)) block_size = Vf.dof_dset.layout_vec.getBlockSize() preallocator = PETSc.Mat().create(comm=comm) @@ -1158,24 +1200,19 @@ def cell_to_global(lgmap, cell_to_local, e, result=None): preallocator.setSizes(sizes) preallocator.setUp() - rindices = None - cindices = None - for e in range(nel): - rindices = cell_to_global(rmap, rlocal, e, result=rindices) - cindices = cell_to_global(cmap, clocal, e, result=cindices) - update_Dmat(preallocator, Dhat, rindices, cindices, imode) - + insert = PETSc.InsertMode.INSERT + rmap = Vf.local_to_global_map(fbcs) + cmap = Vc.local_to_global_map(cbcs) + assembler = SparseAssembler(ElementKernel(Dhat), Vf, Vc, rmap, cmap) + assembler.assemble(preallocator, addv=insert) preallocator.assemble() + nnz = get_preallocation(preallocator, sizes[0][0]) preallocator.destroy() Dmat = PETSc.Mat().createAIJ(sizes, block_size, nnz=nnz, comm=comm) Dmat.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) - for e in range(nel): - rindices = cell_to_global(rmap, rlocal, e, result=rindices) - cindices = cell_to_global(cmap, clocal, e, result=cindices) - update_Dmat(Dmat, Dhat, rindices, cindices, imode) - + assembler.assemble(Dmat, addv=insert) Dmat.assemble() Dhat.destroy() return Dmat @@ -1285,11 +1322,18 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): :arg addv: a `PETSc.Mat.InsertMode` :arg triu: are we assembling only the upper triangular part? """ - set_submat = self.setSubMatCSR(PETSc.COMM_SELF, triu=triu) + set_submat = SparseAssembler.setSubMatCSR(PETSc.COMM_SELF, triu=triu) update_A = lambda A, Ae, rindices: set_submat(A, Ae, rindices, rindices, addv) condense_element_mat = lambda x: x - get_rindices = self.cell_to_global[Vrow] + def cell_to_global(lgmap, cell_to_local, cell_index, result=None): + # Be careful not to create new arrays + result = cell_to_local(cell_index, result=result) + return lgmap.apply(result, result=result) + + bsize = Vrow.dof_dset.layout_vec.getBlockSize() + cell_to_local, nel = extrude_node_map(Vrow.cell_node_map(), bsize=bsize) + get_rindices = partial(cell_to_global, self.lgmaps[Vrow], cell_to_local) Afdm, Dfdm, bdof, axes_shifts = self.assemble_reference_tensor(Vrow) Gq = self.coefficients.get("alpha") @@ -1333,7 +1377,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): aptr = numpy.arange(0, (bshape[0]+1)*bshape[1], bshape[1], dtype=PETSc.IntType) aidx = numpy.tile(numpy.arange(bshape[1], dtype=PETSc.IntType), bshape[0]) - for e in range(self.nel): + for e in range(nel): # Ae = Be kron Bq[e] adata = numpy.sum(Bq.dat.data_ro[index_coef(e)], axis=0) Ae = PETSc.Mat().createAIJWithArrays(bshape, (aptr, aidx, adata), comm=PETSc.COMM_SELF) @@ -1349,7 +1393,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): ae = numpy.zeros((ncomp, tdim), dtype=PETSc.RealType) be = numpy.zeros((ncomp,), dtype=PETSc.RealType) je = None - for e in range(self.nel): + for e in range(nel): je = index_coef(e, result=je) bce = bcflags.dat.data_ro_with_halos[index_bc(e)] > 1E-8 # get coefficients on this cell @@ -1622,7 +1666,6 @@ def assemble_coefficients(self, J, fcp): for coef in coefficients.values(): with coef.dat.vec as cvec: cvec.set(1.0E0) - self.coefficients = coefficients return coefficients, assembly_callables From 49302eb5eb34401bc61bf9795c5fb32a225171a5 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 10 Apr 2023 16:07:09 +0100 Subject: [PATCH 64/75] avoid createSubMatrices in static condensation --- firedrake/preconditioners/fdm.py | 218 ++++++++++++++++--------------- 1 file changed, 112 insertions(+), 106 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 56eb9defad..d5583ed177 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -218,27 +218,24 @@ def allocate_matrix(self, V, J, bcs, fcp, pmat_type, use_static_condensation): if value_size != 1: fdofs = numpy.add.outer(value_size * fdofs, numpy.arange(value_size, dtype=fdofs.dtype)) self.fises = PETSc.IS().createGeneral(fdofs, comm=PETSc.COMM_SELF) - dofs = numpy.arange(value_size * Vbig.finat_element.space_dimension(), dtype=fdofs.dtype) - idofs = numpy.setdiff1d(dofs, fdofs, assume_unique=True) - # Dictionaries with the parent space and kernel to compute the Schur complement - self.parent_space = {} + # Dictionaries with the complement space and kernel to compute the Schur complement + self.complement_space = {} self.schur_kernel = {} if Vfacet and use_static_condensation: # If we are in a facet space, we build the Schur complement on its diagonal block + self.complement_space[Vfacet] = FunctionSpace(V.mesh(), restrict_element(ebig, "interior")) if Vfacet.finat_element.formdegree == 0 and value_size == 1: - sc_builder = SchurComplementDiagonal + self.schur_kernel[Vfacet] = SchurComplementDiagonal elif pmat_type.endswith("sbaij"): - sc_builder = SchurComplementBlockCholesky + self.schur_kernel[Vfacet] = SchurComplementBlockCholesky else: - sc_builder = SchurComplementBlockQR - self.schur_kernel[Vfacet] = partial(sc_builder, idofs, fdofs) - self.parent_space[Vfacet] = Vbig + self.schur_kernel[Vfacet] = SchurComplementBlockQR elif len(fdofs) and V.finat_element.formdegree == 0: # If we are in H(grad), we just pad with zeros on the statically-condensed pattern - self.schur_kernel[V] = partial(SchurComplementKernel, idofs, dofs) - self.parent_space[V] = V + self.complement_space[V] = FunctionSpace(V.mesh(), restrict_element(V.ufl_element(), "interior")) + self.schur_kernel[V] = SchurComplementKernel # Create data structures needed for assembly self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} @@ -450,7 +447,7 @@ def assemble_coefficients(self, J, fcp, block_diagonal=True): return coefficients, assembly_callables @PETSc.Log.EventDecorator("FDMRefTensor") - def assemble_reference_tensor(self, V, transpose=False): + def assemble_reference_tensor(self, V, transpose=False, sort_interior=False): """ Return the reference tensor used in the diagonal factorisation of the sparse cell matrices. See Section 3.2 of Brubeck2022b. @@ -470,18 +467,38 @@ def assemble_reference_tensor(self, V, transpose=False): if formdegree == tdim: degree = degree + 1 is_interior, is_facet = is_restricted(fe) - key = (value_size, tdim, degree, formdegree, is_interior, is_facet, transpose) + key = (value_size, tdim, degree, formdegree, is_interior, is_facet, transpose, sort_interior) cache = self._cache.setdefault("reference_tensor", {}) try: return cache[key] except KeyError: pass + if transpose: - result = self.assemble_reference_tensor(V, transpose=False) + result = self.assemble_reference_tensor(V, transpose=False, sort_interior=sort_interior) result = PETSc.Mat().createTranspose(result).convert(result.getType()) return cache.setdefault(key, result) - full_key = key[:-3] + (False,) * 3 + if sort_interior: + assert is_interior and not is_facet and not transpose + result = self.assemble_reference_tensor(V, transpose=transpose, sort_interior=False) + # Compute the stiffness matrix on the interior of a cell + A00 = self._element_mass_matrix.PtAP(result) + indptr, indices, _ = A00.getValuesCSR() + degree = numpy.diff(indptr) + # Sort DOFs to make A00 block diagonal with blocks of increasing dimension along the diagonal + perm = numpy.array(list(dict.fromkeys(indices)), dtype=indices.dtype) + perm = perm[numpy.argsort(degree[perm], kind='stable')] + A00.destroy() + + perm = PETSc.IS().createGeneral(perm, comm=result.getComm()) + noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.getComm()) + result = result.createSubMatrix(noperm, perm) + noperm.destroy() + perm.destroy() + return cache.setdefault(key, result) + + full_key = key[:-4] + (False,) * 4 if is_facet and full_key in cache: result = cache[full_key] noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.getComm()) @@ -552,18 +569,25 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): try: assembler = self.assemblers[key] except KeyError: - Vbig = None - if Vrow == Vcol: - Vbig = self.parent_space.get(Vrow) - - beta = self.coefficients["beta"] - alpha = self.coefficients["alpha"] # Interpolation of basis and exterior derivative onto broken spaces - ctensor = self.assemble_reference_tensor(Vbig or Vcol) - rtensor = self.assemble_reference_tensor(Vbig or Vrow, transpose=True) - element_kernel = TripleProductKernel(rtensor, self._element_mass_matrix, ctensor, beta, alpha) - if Vbig is not None: - element_kernel = self.schur_kernel[Vrow](element_kernel) + C1 = self.assemble_reference_tensor(Vcol) + R1 = self.assemble_reference_tensor(Vrow, transpose=True) + M = self._element_mass_matrix + # Element stiffness matrix = R1 * M * C1, see Equation (3.9) of Brubeck2022b + element_kernel = TripleProductKernel(R1, M, C1, self.coefficients["beta"], self.coefficients["alpha"]) + + schur_kernel = None + if Vrow == Vcol: + schur_kernel = self.schur_kernel.get(Vrow) + if schur_kernel is not None: + V0 = self.complement_space[Vrow] + C0 = self.assemble_reference_tensor(V0, sort_interior=True) + R0 = self.assemble_reference_tensor(V0, sort_interior=True, transpose=True) + # Only the facet block updates the coefficients in M + element_kernel = schur_kernel(element_kernel, + TripleProductKernel(R1, M, C0), + TripleProductKernel(R0, M, C1), + TripleProductKernel(R0, M, C0)) assembler = SparseAssembler(element_kernel, Vrow, Vcol, self.lgmaps[Vrow], self.lgmaps[Vcol]) self.assemblers.setdefault(key, assembler) @@ -618,7 +642,7 @@ def __init__(self, kernel, Vrow, Vcol, rmap, cmap): self.indices = tuple(numpy.empty((V.finat_element.space_dimension(),), dtype=PETSc.IntType) for V in spaces) self.map_rows = partial(*map_rows, self.indices[spaces.index(Vrow)], result=rows) self.map_cols = partial(*map_cols, self.indices[spaces.index(Vcol)], result=cols) - self.kernel_args = self.indices[1+spaces.index(Vcol):] + self.kernel_args = self.indices[-len(kernel.coefficients):] integral_type = kernel.integral_type if integral_type == "cell": @@ -702,26 +726,31 @@ def destroy(self): class TripleProductKernel(ElementKernel): """ - An element kernel to compute a triple matrix product A * B * C Where A and + An element kernel to compute a triple matrix product A * B * C, where A and C are constant matrices and B is a block diagonal matrix with entries given by coefficients. - See Equation (3.9) of Brubeck2022b. """ def __init__(self, A, B, C, *coefficients): self.work = None - V = coefficients[0].function_space() - dshape = (-1, ) + coefficients[0].dat.data_ro.shape[1:] - if V.value_size == 1: - self.work = B.getDiagonal() - self.update = partial(B.setDiagonal, self.work) - self.data = self.work.array_w.reshape(dshape) + if len(coefficients) == 0: + self.data = numpy.array([]) + self.update = lambda *args: args else: - indptr, indices, data = B.getValuesCSR() - self.data = data.reshape(dshape) - self.update = lambda *args: (B.setValuesCSR(indptr, indices, self.data), B.assemble()) + V = coefficients[0].function_space() + dshape = (-1, ) + coefficients[0].dat.data_ro.shape[1:] + if V.value_size == 1: + self.work = B.getDiagonal() + self.data = self.work.array_w.reshape(dshape) + self.update = partial(B.setDiagonal, self.work) + else: + indptr, indices, data = B.getValuesCSR() + self.data = data.reshape(dshape) + self.update = lambda *args: (B.setValuesCSR(indptr, indices, self.data), B.assemble()) + + stops = numpy.zeros((len(coefficients) + 1,), dtype=PETSc.IntType) + numpy.cumsum([c.function_space().finat_element.space_dimension() for c in coefficients], out=stops[1:]) + self.slices = [slice(*stops[k:k+2]) for k in range(len(coefficients))] - stops = numpy.cumsum([0] + [c.function_space().finat_element.space_dimension() for c in coefficients]) - self.slices = [slice(*stops[k:k+2]) for k in range(len(stops)-1)] self.product = partial(A.matMatMult, B, C) super().__init__(self.product(), *coefficients) @@ -742,75 +771,48 @@ class SchurComplementKernel(ElementKernel): An element kernel to compute Schur complements that reuses work matrices and the symbolic factorization of the interior block. """ - def __init__(self, idofs, fdofs, kernel): - self.kernel = kernel - self.A = kernel.result - comm = self.A.getComm() - i0, i1 = tuple(PETSc.IS().createGeneral(i, comm=comm) for i in (idofs, fdofs)) - self.slices = self.sort_interior_dofs(i0, self.A) - self.isrows = [i0, i0, i1, i1] - self.iscols = [i0, i1, i0, i1] - self.ises = (i0, i1) + def __init__(self, *kernels): + self.children = kernels + self.submats = [k.result for k in self.children] + + # Create dict of slices with the extents of the diagonal blocks + A00 = self.submats[-1] + degree = numpy.diff(A00.getValuesCSR()[0]) + istart = 0 + self.slices = {1: slice(0, 0)} + unique_degree, counts = numpy.unique(degree, return_counts=True) + for k, kdofs in sorted(zip(unique_degree, counts)): + self.slices[k] = slice(istart, istart + k * kdofs) + istart += k * kdofs + self.work = [None for _ in range(2)] - self.submats = [] - super().__init__(self.condense(), *kernel.coefficients) + coefficients = [] + for k in self.children: + coefficients.extend(k.coefficients) + coefficients = list(dict.fromkeys(coefficients)) + super().__init__(self.condense(), *coefficients) def __call__(self, *args, result=None): - self.kernel(*args, result=self.A) + for k in self.children: + k(*args, result=k.result) return self.condense(result=result) def destroy(self): - self.kernel.destroy() + for k in self.children: + k.destroy() self.result.destroy() - objs = [] - objs.extend(self.ises) - objs.extend(self.work) - objs.extend(self.submats) - for obj in objs: + for obj in self.work: if isinstance(obj, PETSc.Object): obj.destroy() - def sort_interior_dofs(self, i0, A): - """Permute `i0` to have A[i0, i0] with square blocks of - increasing dimension along its diagonal. - - Return a dict of slices with the extents - of each set of blocks in the CSR representation of A.""" - A00 = A.createSubMatrix(i0, i0) - indptr, indices, _ = A00.getValuesCSR() - degree = numpy.diff(indptr) - perm = numpy.argsort(degree) - icur = 0 - istart = 0 - slices = {1: slice(0, 0)} - unique_degree, counts = numpy.unique(degree, return_counts=True) - for k, kdofs in sorted(zip(unique_degree, counts)): - if k > 1: - neigh = numpy.empty((kdofs, k), dtype=indices.dtype) - for row in range(kdofs): - i = perm[icur+row] - neigh[row] = indices[slice(*indptr[i:i+2])] - perm[icur:icur+kdofs] = list(dict.fromkeys(neigh.flat)) - - slices[k] = slice(istart, istart + k * kdofs) - istart += k * kdofs - icur += kdofs - i0.setIndices(i0.getIndices()[perm]) - A00.destroy() - return slices - - def get_blocks(self): - self.submats = self.A.createSubMatrices(self.isrows, self.iscols, submats=self.submats or None) - return self.submats - @PETSc.Log.EventDecorator("FDMCondense") def condense(self, result=None): """By default pad with zeros the statically condensed pattern""" structure = PETSc.Mat.Structure.SUBSET if result else None if result is None: - A00, A01, A10, _ = self.get_blocks() + _, A10, A01, A00 = self.submats result = A10.matMatMult(A00, A01, result=result) - result.aypx(0.0, self.A, structure=structure) + result.aypx(0.0, self.submats[0], structure=structure) return result @@ -819,7 +821,7 @@ class SchurComplementDiagonal(SchurComplementKernel): @PETSc.Log.EventDecorator("FDMCondense") def condense(self, result=None): structure = PETSc.Mat.Structure.SUBSET if result else None - A00, A01, A10, A11 = self.get_blocks() + A11, A10, A01, A00 = self.submats self.work[0] = A00.getDiagonal(result=self.work[0]) self.work[0].reciprocal() self.work[0].scale(-1) @@ -834,7 +836,7 @@ class SchurComplementBlockCholesky(SchurComplementKernel): @PETSc.Log.EventDecorator("FDMCondense") def condense(self, result=None): structure = PETSc.Mat.Structure.SUBSET if result else None - A00, A01, A10, A11 = self.get_blocks() + A11, A10, A01, A00 = self.submats indptr, indices, R = A00.getValuesCSR() zlice = self.slices[1] @@ -863,7 +865,7 @@ class SchurComplementBlockQR(SchurComplementKernel): @PETSc.Log.EventDecorator("FDMCondense") def condense(self, result=None): structure = PETSc.Mat.Structure.SUBSET if result else None - A00, A01, A10, A11 = self.get_blocks() + A11, A10, A01, A00 = self.submats indptr, indices, R = A00.getValuesCSR() Q = numpy.ones(R.shape, dtype=R.dtype) @@ -896,7 +898,7 @@ class SchurComplementBlockSVD(SchurComplementKernel): @PETSc.Log.EventDecorator("FDMCondense") def condense(self, result=None): structure = PETSc.Mat.Structure.SUBSET if result else None - A00, A01, A10, A11 = self.get_blocks() + A11, A10, A01, A00 = self.submats indptr, indices, U = A00.getValuesCSR() V = numpy.ones(U.shape, dtype=U.dtype) self.work[0] = A00.getDiagonal(result=self.work[0]) @@ -935,7 +937,7 @@ class SchurComplementBlockInverse(SchurComplementKernel): @PETSc.Log.EventDecorator("FDMCondense") def condense(self, result=None): structure = PETSc.Mat.Structure.SUBSET if result else None - A00, A01, A10, A11 = self.get_blocks() + A11, A10, A01, A00 = self.submats indptr, indices, R = A00.getValuesCSR() zlice = self.slices[1] @@ -1218,6 +1220,18 @@ def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[], comm=None): return Dmat +def restrict_element(ele, restriction_domain): + """Get an element that is not restricted and return the restricted element.""" + if isinstance(ele, ufl.VectorElement): + return type(ele)(restrict_element(ele._sub_element, restriction_domain), dim=ele.num_sub_elements()) + elif isinstance(ele, ufl.TensorElement): + return type(ele)(restrict_element(ele._sub_element, restriction_domain), shape=ele._shape, symmetry=ele.symmetry()) + elif isinstance(ele, ufl.MixedElement): + return type(ele)(*(restrict_element(e, restriction_domain) for e in ele.sub_elements())) + else: + return ele[restriction_domain] + + def unrestrict_element(ele): """Get an element that might or might not be restricted and return the parent unrestricted element.""" @@ -1225,18 +1239,10 @@ def unrestrict_element(ele): return type(ele)(unrestrict_element(ele._sub_element), dim=ele.num_sub_elements()) elif isinstance(ele, ufl.TensorElement): return type(ele)(unrestrict_element(ele._sub_element), shape=ele._shape, symmetry=ele.symmetry()) - elif isinstance(ele, ufl.EnrichedElement): - return type(ele)(*list(dict.fromkeys(unrestrict_element(e) for e in ele._elements))) - elif isinstance(ele, ufl.TensorProductElement): - return type(ele)(*(unrestrict_element(e) for e in ele.sub_elements()), cell=ele.cell()) elif isinstance(ele, ufl.MixedElement): return type(ele)(*(unrestrict_element(e) for e in ele.sub_elements())) - elif isinstance(ele, ufl.WithMapping): - return type(ele)(unrestrict_element(ele.wrapee), ele.mapping()) elif isinstance(ele, ufl.RestrictedElement): return unrestrict_element(ele._element) - elif isinstance(ele, (ufl.HDivElement, ufl.HCurlElement, ufl.BrokenElement)): - return type(ele)(unrestrict_element(ele._element)) else: return ele From ce7cae1f2a3da111eca447645a53a4646cf3a9f9 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 17 Apr 2023 17:45:46 +0100 Subject: [PATCH 65/75] extract finite elements from coefficient spaces, exploit symmetry in SchurComplementCholesky --- firedrake/preconditioners/fdm.py | 246 ++++++++++++++++++------------- firedrake/preconditioners/pmg.py | 16 +- 2 files changed, 151 insertions(+), 111 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index d5583ed177..c7d031079f 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -4,7 +4,7 @@ from firedrake.preconditioners.base import PCBase from firedrake.preconditioners.patch import bcdofs from firedrake.preconditioners.pmg import (prolongation_matrix_matfree, - fiat_reference_prolongator, + evaluate_dual, get_permutation_to_line_elements) from firedrake.preconditioners.facet_split import split_dofs, restricted_dofs from firedrake.formmanipulation import ExtractSubBlock @@ -193,15 +193,17 @@ def allocate_matrix(self, V, J, bcs, fcp, pmat_type, use_static_condensation): :arg J: the Jacobian bilinear form :arg bcs: an iterable of boundary conditions on V :arg fcp: form compiler parameters to assemble coefficients - :arg pmat_type: the preconditioner `PETSc.Mat.Type` + :arg pmat_type: the `PETSc.Mat.Type` for the blocks in the diagonal :arg use_static_condensation: are we assembling the statically-condensed Schur complement on facets? :returns: 2-tuple with the preconditioner :class:`PETSc.Mat` and a list of assembly callables """ + symmetric = pmat_type.endswith("sbaij") ifacet = [i for i, Vsub in enumerate(V) if is_restricted(Vsub.finat_element)[1]] if len(ifacet) == 0: Vfacet = None Vbig = V + ebig = V.ufl_element() _, fdofs = split_dofs(V.finat_element) elif len(ifacet) == 1: Vfacet = V[ifacet[0]] @@ -213,30 +215,27 @@ def allocate_matrix(self, V, J, bcs, fcp, pmat_type, use_static_condensation): fdofs = restricted_dofs(Vfacet.finat_element, Vbig.finat_element) else: raise ValueError("Expecting at most one FunctionSpace restricted onto facets.") + self.embedding_element = ebig - value_size = Vbig.value_size - if value_size != 1: - fdofs = numpy.add.outer(value_size * fdofs, numpy.arange(value_size, dtype=fdofs.dtype)) - self.fises = PETSc.IS().createGeneral(fdofs, comm=PETSc.COMM_SELF) + if Vbig.value_size == 1: + self.fises = PETSc.IS().createGeneral(fdofs, comm=PETSc.COMM_SELF) + else: + self.fises = PETSc.IS().createBlock(Vbig.value_size, fdofs, comm=PETSc.COMM_SELF) - # Dictionaries with the complement space and kernel to compute the Schur complement - self.complement_space = {} + # Dictionary with kernel to compute the Schur complement self.schur_kernel = {} - if Vfacet and use_static_condensation: + if V == Vbig and Vbig.finat_element.formdegree == 0: + # If we are in H(grad), we just pad with zeros on the statically-condensed pattern + self.schur_kernel[V] = SchurComplementPattern + elif Vfacet and use_static_condensation: # If we are in a facet space, we build the Schur complement on its diagonal block - self.complement_space[Vfacet] = FunctionSpace(V.mesh(), restrict_element(ebig, "interior")) - if Vfacet.finat_element.formdegree == 0 and value_size == 1: + if Vfacet.finat_element.formdegree == 0 and Vfacet.value_size == 1: self.schur_kernel[Vfacet] = SchurComplementDiagonal - elif pmat_type.endswith("sbaij"): + elif symmetric: self.schur_kernel[Vfacet] = SchurComplementBlockCholesky else: self.schur_kernel[Vfacet] = SchurComplementBlockQR - elif len(fdofs) and V.finat_element.formdegree == 0: - # If we are in H(grad), we just pad with zeros on the statically-condensed pattern - self.complement_space[V] = FunctionSpace(V.mesh(), restrict_element(V.ufl_element(), "interior")) - self.schur_kernel[V] = SchurComplementKernel - # Create data structures needed for assembly self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} self.coefficients, assembly_callables = self.assemble_coefficients(J, fcp) @@ -244,8 +243,6 @@ def allocate_matrix(self, V, J, bcs, fcp, pmat_type, use_static_condensation): Pmats = {} addv = PETSc.InsertMode.ADD_VALUES - symmetric = pmat_type.endswith("sbaij") - # Store only off-diagonal blocks with more columns than rows to save memory Vsort = sorted(V, key=lambda Vsub: Vsub.dim()) # Loop over all pairs of subspaces @@ -276,6 +273,8 @@ def allocate_matrix(self, V, J, bcs, fcp, pmat_type, use_static_condensation): P.setSizes(sizes) P.setPreallocationNNZ((d_nnz, o_nnz)) P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) + if on_diag: + P.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, True) if ptype.endswith("sbaij"): P.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True) P.setUp() @@ -295,7 +294,6 @@ def allocate_matrix(self, V, J, bcs, fcp, pmat_type, use_static_condensation): Pmat = Pmats[V, V] else: Pmat = PETSc.Mat().createNest([[Pmats[Vrow, Vcol] for Vcol in V] for Vrow in V], comm=self.comm) - assembly_callables.append(Pmat.assemble) return Pmat, assembly_callables @@ -358,6 +356,8 @@ def assemble_coefficients(self, J, fcp, block_diagonal=True): order coefficients keyed on ``"beta"`` and ``"alpha"``, and a list of assembly callables. """ + coefficients = {} + assembly_callables = [] # Basic idea: take the original bilinear form and # replace the exterior derivatives with arguments in broken(V^{k+1}). # Then, replace the original arguments with arguments in broken(V^k). @@ -427,8 +427,6 @@ def assemble_coefficients(self, J, fcp, block_diagonal=True): mixed_form = ufl.replace(ufl.replace(Jcell, repgrad), repargs) # Return coefficients and assembly callables - coefficients = {} - assembly_callables = [] if block_diagonal and V.shape: from firedrake.assemble import assemble M = assemble(mixed_form, mat_type="matfree", form_compiler_parameters=fcp) @@ -481,42 +479,54 @@ def assemble_reference_tensor(self, V, transpose=False, sort_interior=False): if sort_interior: assert is_interior and not is_facet and not transpose - result = self.assemble_reference_tensor(V, transpose=transpose, sort_interior=False) - # Compute the stiffness matrix on the interior of a cell - A00 = self._element_mass_matrix.PtAP(result) - indptr, indices, _ = A00.getValuesCSR() - degree = numpy.diff(indptr) # Sort DOFs to make A00 block diagonal with blocks of increasing dimension along the diagonal - perm = numpy.array(list(dict.fromkeys(indices)), dtype=indices.dtype) - perm = perm[numpy.argsort(degree[perm], kind='stable')] - A00.destroy() - - perm = PETSc.IS().createGeneral(perm, comm=result.getComm()) - noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.getComm()) - result = result.createSubMatrix(noperm, perm) - noperm.destroy() - perm.destroy() + result = self.assemble_reference_tensor(V, transpose=transpose, sort_interior=False) + if formdegree != 0: + # Compute the stiffness matrix on the interior of a cell + A00 = self._element_mass_matrix.PtAP(result) + indptr, indices, _ = A00.getValuesCSR() + degree = numpy.diff(indptr) + # Sort by blocks + uniq, u_index = numpy.unique(indices, return_index=True) + perm = uniq[u_index.argsort(kind='stable')] + # Sort by degree + degree = degree[perm] + perm = perm[degree.argsort(kind='stable')] + A00.destroy() + + iscol = PETSc.IS().createGeneral(perm, comm=result.getComm()) + result = get_submat(result, iscol=iscol) + iscol.destroy() return cache.setdefault(key, result) full_key = key[:-4] + (False,) * 4 if is_facet and full_key in cache: - result = cache[full_key] - noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.getComm()) - result = result.createSubMatrix(noperm, self.fises) - noperm.destroy() + result = get_submat(cache[full_key], iscol=self.fises) return cache.setdefault(key, result) + # Get CG(k) and DG(k-1) 1D elements from V elements = sorted(get_base_elements(fe), key=lambda e: e.formdegree) - ref_el = elements[0].get_reference_element() - eq = FIAT.FDMQuadrature(ref_el, degree) - e0 = elements[0] if elements[0].formdegree == 0 else FIAT.FDMLagrange(ref_el, degree) - e1 = elements[-1] if elements[-1].formdegree == 1 else FIAT.FDMDiscontinuousLagrange(ref_el, degree-1) - if is_interior: + e0, e1 = elements[::len(elements)-1] + e0 = elements[0] if elements[0].formdegree == 0 else None + e1 = elements[-1] if elements[-1].formdegree == 1 else None + if e0 and is_interior: e0 = FIAT.RestrictedElement(e0, restriction_domain="interior") - A00 = petsc_sparse(fiat_reference_prolongator(e0, eq), comm=PETSc.COMM_SELF) - A10 = petsc_sparse(fiat_reference_prolongator(e0, e1, derivative=True), comm=PETSc.COMM_SELF) - A11 = petsc_sparse(numpy.eye(e1.space_dimension(), dtype=PETSc.RealType), comm=PETSc.COMM_SELF) + # Get broken(CG(k)) and DG(k-1) 1D elements from the coefficient spaces + Q0 = self.coefficients["beta"].function_space().finat_element.element + elements = sorted(get_base_elements(Q0), key=lambda e: e.formdegree) + q0 = elements[0] if elements[0].formdegree == 0 else None + q1 = elements[-1] + if q1.formdegree != 1: + Q1 = self.coefficients["alpha"].function_space().finat_element.element + q1 = sorted(get_base_elements(Q1), key=lambda e: e.formdegree)[-1] + + # Interpolate V * d(V) -> space(beta) * space(alpha) + comm = PETSc.COMM_SELF + zero = PETSc.Mat() + A00 = petsc_sparse(evaluate_dual(e0, q0), comm=comm) if e0 and q0 else zero + A11 = petsc_sparse(evaluate_dual(e1, q1), comm=comm) if e1 else zero + A10 = petsc_sparse(evaluate_dual(e0, q1, alpha=(1,)), comm=comm) if e0 else zero B_blocks = mass_blocks(tdim, formdegree, A00, A11) A_blocks = diff_blocks(tdim, formdegree, A00, A11, A10) result = block_mat(B_blocks + A_blocks, destroy_blocks=True) @@ -533,9 +543,7 @@ def assemble_reference_tensor(self, V, transpose=False, sort_interior=False): if is_facet: cache[full_key] = result - noperm = PETSc.IS().createGeneral(numpy.arange(result.getSize()[0], dtype=PETSc.IntType), comm=result.getComm()) - result = result.createSubMatrix(noperm, self.fises) - noperm.destroy() + result = get_submat(cache[full_key], iscol=self.fises) return cache.setdefault(key, result) @@ -580,7 +588,7 @@ def set_values(self, A, Vrow, Vcol, addv, triu=False): if Vrow == Vcol: schur_kernel = self.schur_kernel.get(Vrow) if schur_kernel is not None: - V0 = self.complement_space[Vrow] + V0 = FunctionSpace(Vrow.mesh(), restrict_element(self.embedding_element, "interior")) C0 = self.assemble_reference_tensor(V0, sort_interior=True) R0 = self.assemble_reference_tensor(V0, sort_interior=True, transpose=True) # Only the facet block updates the coefficients in M @@ -622,36 +630,32 @@ def __init__(self, kernel, Vrow, Vcol, rmap, cmap): row_shape = tuple() if Vrow.value_size == 1 else (Vrow.value_size,) map_rows = (self.map_block_indices, rmap) if row_shape else (rmap.apply,) rows = numpy.empty((m, ), dtype=PETSc.IntType).reshape((-1,) + row_shape) - - self.bc_nodes = None if Vcol == Vrow: cols = rows - map_cols = (lambda *x, result=None: result, ) - # own = Vrow.dof_dset.layout_vec.getLocalSize() - # bc_nodes = numpy.flatnonzero(rmap.indices[:own] < 0).astype(PETSc.IntType) - # if len(bc_nodes) > 0: - # bc_nodes = Vrow.dof_dset.lgmap.apply(bc_nodes, result=bc_nodes) - # self.bc_nodes = bc_nodes[:, None] + map_cols = (lambda *args, result=None: result, ) else: spaces.append(Vcol) col_shape = tuple() if Vcol.value_size == 1 else (Vcol.value_size,) map_cols = (self.map_block_indices, cmap) if col_shape else (cmap.apply, ) cols = numpy.empty((n, ), dtype=PETSc.IntType).reshape((-1,) + col_shape) - spaces.extend(c.function_space() for c in kernel.coefficients) - self.indices = tuple(numpy.empty((V.finat_element.space_dimension(),), dtype=PETSc.IntType) for V in spaces) - self.map_rows = partial(*map_rows, self.indices[spaces.index(Vrow)], result=rows) - self.map_cols = partial(*map_cols, self.indices[spaces.index(Vcol)], result=cols) - self.kernel_args = self.indices[-len(kernel.coefficients):] integral_type = kernel.integral_type - if integral_type == "cell": + if integral_type in ["cell", "interior_facet_horiz"]: get_map = operator.methodcaller("cell_node_map") - elif integral_type == "interior_facet": + elif integral_type in ["interior_facet", "interior_facet_vert"]: get_map = operator.methodcaller("interior_facet_node_map") else: raise NotImplementedError("Only for cell or interior facet integrals") self.node_maps = tuple(map(get_map, spaces)) + + ncell = 2 if integral_type.startswith("interior_facet") else 1 + self.indices = tuple(numpy.empty((V.finat_element.space_dimension() * ncell,), dtype=PETSc.IntType) for V in spaces) + self.map_rows = partial(*map_rows, self.indices[spaces.index(Vrow)], result=rows) + self.map_cols = partial(*map_cols, self.indices[spaces.index(Vcol)], result=cols) + self.kernel_args = self.indices[-len(kernel.coefficients):] + self.set_indices = self.copy_indices + node_map = self.node_maps[0] self.nel = node_map.values.shape[0] if node_map.offset is None: @@ -659,18 +663,27 @@ def __init__(self, kernel, Vrow, Vcol, rmap, cmap): else: layers = node_map.iterset.layers_array layers = layers[:, 1]-layers[:, 0]-1 + if integral_type.endswith("horiz"): + layers -= 1 + self.set_indices = self.copy_indices_horiz if layers.shape[0] != self.nel: layers = numpy.repeat(layers, self.nel) self.layers = layers def map_block_indices(self, lgmap, indices, result=None): - bsize = result.shape[1] + bsize = result.shape[-1] numpy.copyto(result[:, 0], indices) result[:, 0] *= bsize numpy.add.outer(result[:, 0], numpy.arange(1, bsize, dtype=indices.dtype), out=result[:, 1:]) return lgmap.apply(result, result=result) - def set_indices(self, e): + def copy_indices_horiz(self, e): + for index, node_map in zip(self.indices, self.node_maps): + index = index.reshape((2, -1)) + numpy.copyto(index, node_map.values_with_halo[e]) + index[1] += node_map.offset + + def copy_indices(self, e): for index, node_map in zip(self.indices, self.node_maps): numpy.copyto(index, node_map.values_with_halo[e]) @@ -683,10 +696,6 @@ def assemble(self, A, addv=None, triu=False): kernel = lambda *args, result=None: result else: kernel = self.kernel - triu = False - if self.bc_nodes is not None: - vals = numpy.ones(self.bc_nodes.shape, dtype=PETSc.RealType) - A.setValuesRCV(self.bc_nodes, self.bc_nodes, vals, addv) result = self.kernel.result insert = self.setSubMatCSR(PETSc.COMM_SELF, triu=triu) @@ -736,9 +745,8 @@ def __init__(self, A, B, C, *coefficients): self.data = numpy.array([]) self.update = lambda *args: args else: - V = coefficients[0].function_space() dshape = (-1, ) + coefficients[0].dat.data_ro.shape[1:] - if V.value_size == 1: + if numpy.prod(dshape[1:]) == 1: self.work = B.getDiagonal() self.data = self.work.array_w.reshape(dshape) self.update = partial(B.setDiagonal, self.work) @@ -773,7 +781,7 @@ class SchurComplementKernel(ElementKernel): """ def __init__(self, *kernels): self.children = kernels - self.submats = [k.result for k in self.children] + self.submats = [k.result for k in kernels] # Create dict of slices with the extents of the diagonal blocks A00 = self.submats[-1] @@ -785,6 +793,8 @@ def __init__(self, *kernels): self.slices[k] = slice(istart, istart + k * kdofs) istart += k * kdofs + self.blocks = sorted(degree for degree in self.slices if degree > 1) + self.work = [None for _ in range(2)] coefficients = [] for k in self.children: @@ -805,6 +815,18 @@ def destroy(self): if isinstance(obj, PETSc.Object): obj.destroy() + @PETSc.Log.EventDecorator("FDMCondense") + def condense(self, result=None): + return result + + +class SchurComplementPattern(SchurComplementKernel): + + def __call__(self, *args, result=None): + k = self.children[0] + k(*args, result=k.result) + return self.condense(result=result) + @PETSc.Log.EventDecorator("FDMCondense") def condense(self, result=None): """By default pad with zeros the statically condensed pattern""" @@ -833,17 +855,21 @@ def condense(self, result=None): class SchurComplementBlockCholesky(SchurComplementKernel): + def __init__(self, K11, K10, K01, K00): + # asssume that K10 = K01^T + super().__init__(K11, K01, K00) + @PETSc.Log.EventDecorator("FDMCondense") def condense(self, result=None): structure = PETSc.Mat.Structure.SUBSET if result else None - A11, A10, A01, A00 = self.submats + A11, A01, A00 = self.submats indptr, indices, R = A00.getValuesCSR() zlice = self.slices[1] numpy.sqrt(R[zlice], out=R[zlice]) numpy.reciprocal(R[zlice], out=R[zlice]) flops = 2 * (zlice.stop - zlice.start) - for k in sorted(degree for degree in self.slices if degree > 1): + for k in self.blocks: Rk = R[self.slices[k]] A = Rk.reshape((-1, k, k)) rinv = numpy.linalg.inv(numpy.linalg.cholesky(A)) @@ -853,10 +879,9 @@ def condense(self, result=None): PETSc.Log.logFlops(flops) A00.setValuesCSR(indptr, indices, R) A00.assemble() - self.work[0] = A10.matTransposeMult(A00, result=self.work[0]) - A00.scale(-1.0) - result = self.work[0].matMatMult(A00, A01, result=result) - result.axpy(1.0, A11, structure=structure) + self.work[0] = A00.matMult(A01, result=self.work[0]) + result = self.work[0].transposeMatMult(self.work[0], result=result) + result.aypx(-1.0, A11, structure=structure) return result @@ -872,7 +897,7 @@ def condense(self, result=None): zlice = self.slices[1] numpy.reciprocal(R[zlice], out=R[zlice]) flops = zlice.stop - zlice.start - for k in sorted(degree for degree in self.slices if degree > 1): + for k in self.blocks: zlice = self.slices[k] A = R[zlice].reshape((-1, k, k)) q, r = numpy.linalg.qr(A, mode="complete") @@ -906,7 +931,7 @@ def condense(self, result=None): dslice = self.slices[1] numpy.sign(D.array_r[dslice], out=U[dslice]) flops = dslice.stop - dslice.start - for k in sorted(degree for degree in self.slices if degree > 1): + for k in self.blocks: bslice = self.slices[k] A = U[bslice].reshape((-1, k, k)) u, s, v = numpy.linalg.svd(A, full_matrices=False) @@ -943,7 +968,7 @@ def condense(self, result=None): zlice = self.slices[1] numpy.reciprocal(R[zlice], out=R[zlice]) flops = zlice.stop - zlice.start - for k in sorted(degree for degree in self.slices if degree > 1): + for k in self.blocks: Rk = R[self.slices[k]] A = Rk.reshape((-1, k, k)) rinv = numpy.linalg.inv(A) @@ -1070,6 +1095,25 @@ def kron3(A, B, C, scale=None): return result +def get_submat(A, isrow=None, iscol=None): + """Return the sub matrix A[isrow, iscol]""" + needs_rows = isrow is None + needs_cols = iscol is None + if needs_rows and needs_cols: + return A + size = A.getSize() + if needs_rows: + isrow = PETSc.IS().createStride(size[0], step=1, comm=A.getComm()) + if needs_cols: + iscol = PETSc.IS().createStride(size[1], step=1, comm=A.getComm()) + submat = A.createSubMatrix(isrow, iscol) + if needs_rows: + isrow.destroy() + if needs_cols: + iscol.destroy() + return submat + + def block_mat(A_blocks, destroy_blocks=False): """Return a concrete Mat corresponding to a block matrix given as a list of lists. Optionally, destroys the input Mats if a new Mat is created.""" @@ -1113,8 +1157,7 @@ def mass_blocks(tdim, formdegree, B00, B11): if n == 1: return [B_diag] else: - zero = PETSc.Mat().createAIJ(B_diag[0].getSize(), nnz=(0, 0), comm=B_diag[0].getComm()) - zero.assemble() + zero = PETSc.Mat() return [[B_diag[i] if i == j else zero for j in range(n)] for i in range(n)] @@ -1139,9 +1182,7 @@ def diff_blocks(tdim, formdegree, A00, A11, A10): if formdegree == 0: A_blocks = [[kron3(A00, A00, A10)], [kron3(A00, A10, A00)], [kron3(A10, A00, A00)]] elif formdegree == 1: - size = tuple(A11.getSize()[k] * A10.getSize()[k] * A00.getSize()[k] for k in range(2)) - zero = PETSc.Mat().createAIJ(size, nnz=(0, 0), comm=A10.getComm()) - zero.assemble() + zero = PETSc.Mat() A_blocks = [[kron3(A00, A10, A11, scale=-1), kron3(A00, A11, A10), zero], [kron3(A10, A00, A11, scale=-1), zero, kron3(A11, A00, A10)], [zero, kron3(A10, A11, A00), kron3(A11, A10, A00, scale=-1)]] @@ -1162,15 +1203,20 @@ def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[], comm=None): if ef.formdegree - ec.formdegree != 1: raise ValueError("Expecting Vf = d(Vc)") - elements = list(set(get_base_elements(ec) + get_base_elements(ef))) - elements = sorted(elements, key=lambda e: e.formdegree) - e0, e1 = elements[::len(elements)-1] + elements = sorted(get_base_elements(ec), key=lambda e: e.formdegree) + c0, c1 = elements[::len(elements)-1] + elements = sorted(get_base_elements(ef), key=lambda e: e.formdegree) + f0, f1 = elements[::len(elements)-1] + if f0.formdegree != 0: + f0 = None + if c1.formdegree != 1: + c1 = None - degree = e0.degree() tdim = Vc.mesh().topological_dimension() - A00 = petsc_sparse(numpy.eye(degree+1, dtype=PETSc.RealType), comm=PETSc.COMM_SELF) - A10 = petsc_sparse(fiat_reference_prolongator(e0, e1, derivative=True), comm=PETSc.COMM_SELF) - A11 = petsc_sparse(numpy.eye(degree, dtype=PETSc.RealType), comm=PETSc.COMM_SELF) + zero = PETSc.Mat() + A00 = petsc_sparse(evaluate_dual(c0, f0), comm=PETSc.COMM_SELF) if f0 else zero + A11 = petsc_sparse(evaluate_dual(c1, f1), comm=PETSc.COMM_SELF) if c1 else zero + A10 = petsc_sparse(evaluate_dual(c0, f1, alpha=(1,)), comm=PETSc.COMM_SELF) Dhat = block_mat(diff_blocks(tdim, ec.formdegree, A00, A11, A10), destroy_blocks=True) A00.destroy() A10.destroy() @@ -1404,9 +1450,9 @@ def cell_to_global(lgmap, cell_to_local, cell_index, result=None): bce = bcflags.dat.data_ro_with_halos[index_bc(e)] > 1E-8 # get coefficients on this cell if Gq is not None: - numpy.sum(Gq.dat.data_ro[je], axis=0, out=ae) + ae[:] = numpy.sum(Gq.dat.data_ro[je], axis=0) if Bq is not None: - numpy.sum(Bq.dat.data_ro[je], axis=0, out=be) + be[:] = numpy.sum(Bq.dat.data_ro[je], axis=0) rindices = get_rindices(e, result=rindices) rows = numpy.reshape(rindices, (-1, bsize)) @@ -1662,10 +1708,10 @@ def assemble_coefficients(self, J, fcp): ds_ext = ufl.Measure(itype, domain=mesh, subdomain_id=it.subdomain_id(), metadata=md) forms.append(ufl.inner(test, beta)*ds_ext) + tensor = coefficients.setdefault("bcflags", Function(Q)) if len(forms): form = sum(forms) if len(form.arguments()) == 1: - tensor = coefficients.setdefault("bcflags", Function(Q)) assembly_callables.append(OneFormAssembler(form, tensor=tensor, form_compiler_parameters=fcp).assemble) # set arbitrary non-zero coefficients for preallocation diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 8719c76ece..e497d32340 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -553,7 +553,8 @@ def expand_element(ele): return ele -def evaluate_dual(source, target, alpha=None): +@lru_cache(maxsize=10) +def evaluate_dual(source, target, alpha=tuple()): """Evaluate the action of a set of dual functionals of the target element on the (derivative of order alpha of the) basis functions of the source element.""" @@ -561,7 +562,7 @@ def evaluate_dual(source, target, alpha=None): dual = target.get_dual_set() A = dual.to_riesz(primal) B = numpy.transpose(primal.get_coeffs()) - if alpha is not None: + if sum(alpha): dmats = primal.get_dmats() for i in range(len(alpha)): for j in range(alpha[i]): @@ -577,14 +578,7 @@ def compare_element(e1, e2): if e1.space_dimension() != e2.space_dimension(): return False B = evaluate_dual(e1, e2) - numpy.fill_diagonal(B, numpy.diagonal(B)-1.0) - return numpy.allclose(B, 0.0, rtol=1E-14, atol=1E-14) - - -@lru_cache(maxsize=10) -def fiat_reference_prolongator(celem, felem, derivative=False): - alpha = (1,) if derivative else None - return evaluate_dual(celem, felem, alpha=alpha) + return numpy.allclose(B, numpy.eye(B.shape[0]), rtol=1E-14, atol=1E-14) @lru_cache(maxsize=10) @@ -920,7 +914,7 @@ def make_kron_code(Vc, Vf, t_in, t_out, mat_name, scratch): fshapes.append((nscal,) + tuple(fshape)) cshapes.append((nscal,) + tuple(cshape)) - J = [identity_filter(fiat_reference_prolongator(ce, fe)).T for ce, fe in zip(celem, felem)] + J = [identity_filter(evaluate_dual(ce, fe)).T for ce, fe in zip(celem, felem)] if any(Jk.size and numpy.isclose(Jk, 0.0E0).all() for Jk in J): prolong_code.append(f""" for({IntType_c} i=0; i<{nscal*numpy.prod(fshape)}; i++) {t_out}[i+{fskip}] = 0.0E0; From 65690d062364e116dafafba0aff4a3c6baf3e3c6 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Fri, 21 Apr 2023 12:01:59 +0100 Subject: [PATCH 66/75] Fix caching --- firedrake/preconditioners/fdm.py | 99 ++++++++++++++++---------------- firedrake/preconditioners/pmg.py | 76 +++++++++++++++--------- 2 files changed, 98 insertions(+), 77 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index c7d031079f..66a9696628 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -494,57 +494,53 @@ def assemble_reference_tensor(self, V, transpose=False, sort_interior=False): perm = perm[degree.argsort(kind='stable')] A00.destroy() - iscol = PETSc.IS().createGeneral(perm, comm=result.getComm()) - result = get_submat(result, iscol=iscol) - iscol.destroy() + isperm = PETSc.IS().createGeneral(perm, comm=result.getComm()) + result = get_submat(result, iscol=isperm, permute=True) + isperm.destroy() return cache.setdefault(key, result) - full_key = key[:-4] + (False,) * 4 - if is_facet and full_key in cache: - result = get_submat(cache[full_key], iscol=self.fises) - return cache.setdefault(key, result) - - # Get CG(k) and DG(k-1) 1D elements from V - elements = sorted(get_base_elements(fe), key=lambda e: e.formdegree) - e0, e1 = elements[::len(elements)-1] - e0 = elements[0] if elements[0].formdegree == 0 else None - e1 = elements[-1] if elements[-1].formdegree == 1 else None - if e0 and is_interior: - e0 = FIAT.RestrictedElement(e0, restriction_domain="interior") - - # Get broken(CG(k)) and DG(k-1) 1D elements from the coefficient spaces - Q0 = self.coefficients["beta"].function_space().finat_element.element - elements = sorted(get_base_elements(Q0), key=lambda e: e.formdegree) - q0 = elements[0] if elements[0].formdegree == 0 else None - q1 = elements[-1] - if q1.formdegree != 1: - Q1 = self.coefficients["alpha"].function_space().finat_element.element - q1 = sorted(get_base_elements(Q1), key=lambda e: e.formdegree)[-1] - - # Interpolate V * d(V) -> space(beta) * space(alpha) - comm = PETSc.COMM_SELF - zero = PETSc.Mat() - A00 = petsc_sparse(evaluate_dual(e0, q0), comm=comm) if e0 and q0 else zero - A11 = petsc_sparse(evaluate_dual(e1, q1), comm=comm) if e1 else zero - A10 = petsc_sparse(evaluate_dual(e0, q1, alpha=(1,)), comm=comm) if e0 else zero - B_blocks = mass_blocks(tdim, formdegree, A00, A11) - A_blocks = diff_blocks(tdim, formdegree, A00, A11, A10) - result = block_mat(B_blocks + A_blocks, destroy_blocks=True) - A00.destroy() - A10.destroy() - A11.destroy() - - if value_size != 1: - eye = petsc_sparse(numpy.eye(value_size), comm=result.getComm()) - temp = result - result = temp.kron(eye) - temp.destroy() - eye.destroy() + short_key = key[:-3] + (False,) * 3 + try: + result = cache[short_key] + except KeyError: + # Get CG(k) and DG(k-1) 1D elements from V + elements = sorted(get_base_elements(fe), key=lambda e: e.formdegree) + e0 = elements[0] if elements[0].formdegree == 0 else None + e1 = elements[-1] if elements[-1].formdegree == 1 else None + if e0 and is_interior: + e0 = FIAT.RestrictedElement(e0, restriction_domain="interior") + + # Get broken(CG(k)) and DG(k-1) 1D elements from the coefficient spaces + Q0 = self.coefficients["beta"].function_space().finat_element.element + elements = sorted(get_base_elements(Q0), key=lambda e: e.formdegree) + q0 = elements[0] if elements[0].formdegree == 0 else None + q1 = elements[-1] + if q1.formdegree != 1: + Q1 = self.coefficients["alpha"].function_space().finat_element.element + q1 = sorted(get_base_elements(Q1), key=lambda e: e.formdegree)[-1] + + # Interpolate V * d(V) -> space(beta) * space(alpha) + comm = PETSc.COMM_SELF + zero = PETSc.Mat() + A00 = petsc_sparse(evaluate_dual(e0, q0), comm=comm) if e0 and q0 else zero + A11 = petsc_sparse(evaluate_dual(e1, q1), comm=comm) if e1 else zero + A10 = petsc_sparse(evaluate_dual(e0, q1, alpha=(1,)), comm=comm) if e0 else zero + B_blocks = mass_blocks(tdim, formdegree, A00, A11) + A_blocks = diff_blocks(tdim, formdegree, A00, A11, A10) + result = block_mat(B_blocks + A_blocks, destroy_blocks=True) + A00.destroy() + A10.destroy() + A11.destroy() + if value_size != 1: + eye = petsc_sparse(numpy.eye(value_size), comm=result.getComm()) + temp = result + result = temp.kron(eye) + temp.destroy() + eye.destroy() if is_facet: - cache[full_key] = result - result = get_submat(cache[full_key], iscol=self.fises) - + cache[short_key] = result + result = get_submat(result, iscol=self.fises) return cache.setdefault(key, result) @cached_property @@ -1095,7 +1091,7 @@ def kron3(A, B, C, scale=None): return result -def get_submat(A, isrow=None, iscol=None): +def get_submat(A, isrow=None, iscol=None, permute=False): """Return the sub matrix A[isrow, iscol]""" needs_rows = isrow is None needs_cols = iscol is None @@ -1106,7 +1102,10 @@ def get_submat(A, isrow=None, iscol=None): isrow = PETSc.IS().createStride(size[0], step=1, comm=A.getComm()) if needs_cols: iscol = PETSc.IS().createStride(size[1], step=1, comm=A.getComm()) - submat = A.createSubMatrix(isrow, iscol) + if permute: + submat = A.permute(isrow, iscol) + else: + submat = A.createSubMatrix(isrow, iscol) if needs_rows: isrow.destroy() if needs_cols: @@ -1335,7 +1334,7 @@ class PoissonFDMPC(FDMPC): def assemble_reference_tensor(self, V): try: - _, line_elements, shifts = get_permutation_to_line_elements(V.finat_element) + _, line_elements, shifts = get_permutation_to_line_elements(V) except ValueError: raise ValueError("FDMPC does not support the element %s" % V.ufl_element()) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index e497d32340..b441ebde16 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -1,4 +1,4 @@ -from functools import partial, lru_cache +from functools import partial from itertools import chain from firedrake.dmhooks import (attach_hooks, get_appctx, push_appctx, pop_appctx, add_hook, get_parent, push_parent, pop_parent, @@ -12,9 +12,11 @@ from tsfc.finatinterface import create_element from tsfc import compile_expression_dual_evaluation from pyop2 import op2 +from pyop2.caching import cached import firedrake import finat +import FIAT import ufl import loopy import numpy @@ -553,7 +555,35 @@ def expand_element(ele): return ele -@lru_cache(maxsize=10) +def hash_fiat_element(element): + """FIAT elements are not hashable, + this is not the best way to create a hash""" + restriction = None + e = element + if isinstance(e, FIAT.DiscontinuousElement): + # this hash does not care about inter-element continuity + e = e._element + if isinstance(e, FIAT.RestrictedElement): + restriction = tuple(e._indices) + e = e._element + if len(restriction) == e.space_dimension(): + restriction = None + family = e.__class__.__name__ + degree = e.order + return (family, element.ref_el, degree, restriction) + + +def generate_key_evaluate_dual(source, target, alpha=tuple()): + return hash_fiat_element(source) + hash_fiat_element(target) + (alpha,) + + +def get_readonly_view(arr): + result = arr.view() + result.flags.writeable = False + return result + + +@cached({}, key=generate_key_evaluate_dual) def evaluate_dual(source, target, alpha=tuple()): """Evaluate the action of a set of dual functionals of the target element on the (derivative of order alpha of the) basis functions of the source @@ -562,14 +592,15 @@ def evaluate_dual(source, target, alpha=tuple()): dual = target.get_dual_set() A = dual.to_riesz(primal) B = numpy.transpose(primal.get_coeffs()) - if sum(alpha): + if sum(alpha) != 0: dmats = primal.get_dmats() for i in range(len(alpha)): for j in range(alpha[i]): B = numpy.dot(dmats[i], B) - return numpy.dot(A, B) + return get_readonly_view(numpy.dot(A, B)) +@cached({}, key=generate_key_evaluate_dual) def compare_element(e1, e2): """Numerically compare two :class:`FIAT.elements`. Equality is satisfied if e2.dual_basis(e1.primal_basis) == identity.""" @@ -581,9 +612,9 @@ def compare_element(e1, e2): return numpy.allclose(B, numpy.eye(B.shape[0]), rtol=1E-14, atol=1E-14) -@lru_cache(maxsize=10) +@cached({}, key=lambda V: V.ufl_element()) @PETSc.Log.EventDecorator("GetLineElements") -def get_permutation_to_line_elements(finat_element): +def get_permutation_to_line_elements(V): """ Find DOF permutation to factor out the EnrichedElement expansion into common TensorProductElements. This routine exposes structure to e.g vectorize @@ -592,35 +623,26 @@ def get_permutation_to_line_elements(finat_element): This is temporary while we wait for dual evaluation of :class:`finat.EnrichedElement`. + :arg V: a :class:`.FunctionSpace` + :returns: a 3-tuple of the DOF permutation, the unique terms in expansion as a list of tuples of :class:`FIAT.FiniteElements`, and the cyclic permutations of the axes to form the element given by their shifts in list of `int` tuples """ + finat_element = V.finat_element expansion = expand_element(finat_element) if expansion.space_dimension() != finat_element.space_dimension(): - raise ValueError("Failed to decompose %s into tensor products" % finat_element) + raise ValueError("Failed to decompose %s into tensor products" % V.ufl_element()) - unique_factors = set() line_elements = [] terms = expansion.elements if hasattr(expansion, "elements") else [expansion] for term in terms: factors = term.factors if hasattr(term, "factors") else (term,) - fiat_factors = [e.fiat_equivalent for e in reversed(factors)] + fiat_factors = tuple(e.fiat_equivalent for e in reversed(factors)) if any(e.get_reference_element().get_spatial_dimension() != 1 for e in fiat_factors): - raise ValueError("Failed to decompose %s into line elements" % fiat_factors) - - # use the same FIAT element if it appears multiple times in the expansion - for i in range(len(fiat_factors)): - n = fiat_factors[i] - for f in unique_factors: - if compare_element(n, f): - n = f - break - if n is fiat_factors[i]: - unique_factors.add(n) - fiat_factors[i] = n - line_elements.append(tuple(fiat_factors)) + raise ValueError("Failed to decompose %s into line elements" % V.ufl_element()) + line_elements.append(fiat_factors) shapes = [tuple(e.space_dimension() for e in factors) for factors in line_elements] sizes = list(map(numpy.prod, shapes)) @@ -660,7 +682,7 @@ def get_permutation_to_line_elements(finat_element): shifts.append(axes_shifts) - dof_perm = numpy.concatenate(dof_perm) + dof_perm = get_readonly_view(numpy.concatenate(dof_perm)) return dof_perm, unique_line_elements, shifts @@ -669,7 +691,7 @@ def get_permuted_map(V): Return a PermutedMap with the same tensor product shape for every component of H(div) or H(curl) tensor product elements """ - indices, _, _ = get_permutation_to_line_elements(V.finat_element) + indices, _, _ = get_permutation_to_line_elements(V) if numpy.all(indices[:-1] < indices[1:]): return V.cell_node_map() return op2.PermutedMap(V.cell_node_map(), indices) @@ -832,8 +854,8 @@ def make_kron_code(Vc, Vf, t_in, t_out, mat_name, scratch): operator_decl = [] prolong_code = [] restrict_code = [] - _, celems, cshifts = get_permutation_to_line_elements(Vc.finat_element) - _, felems, fshifts = get_permutation_to_line_elements(Vf.finat_element) + _, celems, cshifts = get_permutation_to_line_elements(Vc) + _, felems, fshifts = get_permutation_to_line_elements(Vf) shifts = fshifts in_place = False @@ -1065,7 +1087,7 @@ def make_mapping_code(Q, cmapping, fmapping, t_in, t_out): def make_permutation_code(V, vshape, pshape, t_in, t_out, array_name): - _, _, shifts = get_permutation_to_line_elements(V.finat_element) + _, _, shifts = get_permutation_to_line_elements(V) shift = shifts[0] if shift != (0,): ndof = numpy.prod(vshape) From 8954fdf7ed2c8aa097737fcdc96cc237d819436e Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Fri, 21 Apr 2023 16:14:59 +0100 Subject: [PATCH 67/75] lint --- firedrake/preconditioners/fdm.py | 30 ++++++++++++------------------ firedrake/preconditioners/pmg.py | 4 ++-- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 66a9696628..f47861a7a7 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -1,5 +1,5 @@ from functools import partial -from itertools import product +from itertools import chain, product from firedrake.petsc import PETSc from firedrake.preconditioners.base import PCBase from firedrake.preconditioners.patch import bcdofs @@ -126,7 +126,7 @@ def initialize(self, pc): else: # Reconstruct Jacobian and bcs with variant element V_fdm = FunctionSpace(V.mesh(), e_fdm) - J_fdm = J(*[t.reconstruct(function_space=V_fdm) for t in J.arguments()], coefficients={}) + J_fdm = J(*(t.reconstruct(function_space=V_fdm) for t in J.arguments()), coefficients={}) bcs_fdm = [] for bc in bcs: W = V_fdm @@ -1053,16 +1053,10 @@ def wrapper(A, B, rows, cols, addv): def is_restricted(finat_element): """Determine if an element is a restriction onto interior or facets""" - is_interior = True - is_facet = True - cell_dim = finat_element.cell.get_dimension() - entity_dofs = finat_element.entity_dofs() - for dim in sorted(entity_dofs): - if any(len(entity_dofs[dim][entity]) > 0 for entity in entity_dofs[dim]): - if dim == cell_dim: - is_facet = False - else: - is_interior = False + tdim = finat_element.cell.get_dimension() + idofs = len(finat_element.entity_dofs()[tdim][0]) + is_interior = idofs == finat_element.space_dimension() + is_facet = idofs == 0 return is_interior, is_facet @@ -1294,9 +1288,9 @@ def unrestrict_element(ele): def get_base_elements(e): if isinstance(e, finat.EnrichedElement): - return sum(list(map(get_base_elements, e.elements)), []) + return list(chain.from_iterable(map(get_base_elements, e.elements))) elif isinstance(e, finat.TensorProductElement): - return sum(list(map(get_base_elements, e.factors)), []) + return list(chain.from_iterable(map(get_base_elements, e.factors))) elif isinstance(e, finat.FlattenedDimensions): return get_base_elements(e.product) elif isinstance(e, (finat.HCurlElement, finat.HDivElement)): @@ -1645,8 +1639,8 @@ def assemble_coefficients(self, J, fcp): replace_val = {t: ufl.dot(dummy_Piola, s) for t, s in zip(args_J, ref_val)} else: replace_val = {t: s for t, s in zip(args_J, ref_val)} - beta = expand_derivatives(sum([ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_val), - ref_val[0]), ref_val[1]) for i in integrals_J])) + beta = expand_derivatives(sum(ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_val), + ref_val[0]), ref_val[1]) for i in integrals_J)) if Piola: beta = ufl.replace(beta, {dummy_Piola: Piola}) # assemble zero-th order coefficient @@ -1671,8 +1665,8 @@ def assemble_coefficients(self, J, fcp): ifacet_inner = lambda v, u: ((ufl.inner(v('+'), u('+')) + ufl.inner(v('-'), u('-')))/area)*dS_int replace_grad = {ufl.grad(t): ufl.dot(dt, Finv) for t, dt in zip(args_J, ref_grad)} - alpha = expand_derivatives(sum([ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_grad), - ref_grad[0]), ref_grad[1]) for i in integrals_J])) + alpha = expand_derivatives(sum(ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_grad), + ref_grad[0]), ref_grad[1]) for i in integrals_J)) G = alpha G = ufl.as_tensor([[[G[i, k, j, k] for i in range(G.ufl_shape[0])] for j in range(G.ufl_shape[2])] for k in range(G.ufl_shape[3])]) G = G * abs(ufl.JacobianDeterminant(mesh)) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index b441ebde16..e13f97b416 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -945,7 +945,7 @@ def make_kron_code(Vc, Vf, t_in, t_out, mat_name, scratch): for({IntType_c} i=0; i<{nscal*numpy.prod(cshape)}; i++) {t_in}[i+{cskip}] = 0.0E0; """) else: - Jsize = numpy.cumsum([Jlen]+[Jk.size for Jk in J]) + Jsize = numpy.cumsum([Jlen] + [Jk.size for Jk in J]) Jptrs = ["%s+%d" % (mat_name, Jsize[k]) if J[k].size else "NULL" for k in range(len(J))] Jmats.extend(J) Jlen = Jsize[-1] @@ -987,7 +987,7 @@ def make_kron_code(Vc, Vf, t_in, t_out, mat_name, scratch): cskip += nscal*numpy.prod(cshape) # Pass the 1D interpolators as a hexadecimal string - Jdata = ", ".join(map(float.hex, chain(*[Jk.flat for Jk in Jmats]))) + Jdata = ", ".join(map(float.hex, chain.from_iterable(Jk.flat for Jk in Jmats))) operator_decl.append(f""" PetscScalar {mat_name}[{Jlen}] = {{ {Jdata} }}; """) From 56e2f8b601d52bc4d2d40c8343b830ca2beaa363 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 26 Apr 2023 15:54:15 +0100 Subject: [PATCH 68/75] do not create rscale Vec --- firedrake/preconditioners/pmg.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index e13f97b416..e16040302d 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -362,9 +362,7 @@ def create_transfer(self, mat_type, cctx, fctx, cbcs, fbcs): def create_interpolation(self, dmc, dmf): prefix = dmc.getOptionsPrefix() mat_type = PETSc.Options(prefix).getString("mg_levels_transfer_mat_type", default="matfree") - interpolate = self.create_transfer(mat_type, get_appctx(dmc), get_appctx(dmf), True, False) - rscale = interpolate.createVecRight() # only used as a workaround in the creation of coarse vecs - return interpolate, rscale + return self.create_transfer(mat_type, get_appctx(dmc), get_appctx(dmf), True, False), None def create_injection(self, dmc, dmf): prefix = dmc.getOptionsPrefix() From 649ceed1ad0dbddc0977a2a2813bd001dabcd141 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 26 Apr 2023 16:38:01 +0100 Subject: [PATCH 69/75] do not change venv name --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ac716d2358..73ed727455 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -48,7 +48,7 @@ jobs: - name: Build Firedrake run: | cd .. - ./firedrake/scripts/firedrake-install $COMPLEX --venv-name build --tinyasm --disable-ssh --minimal-petsc --slepc --documentation-dependencies --install thetis --install gusto --install icepack --install irksome --install femlium --no-package-manager --package-branch tsfc pbrubeck/fdm-discontinuous || (cat firedrake-install.log && /bin/false) + ./firedrake/scripts/firedrake-install $COMPLEX --venv-name firedrake_venv --tinyasm --netgen --disable-ssh --minimal-petsc --slepc --documentation-dependencies --install thetis --install gusto --install icepack --install irksome --install femlium --no-package-manager --package-branch tsfc pbrubeck/fdm-discontinuous || (cat firedrake-install.log && /bin/false) - name: Install test dependencies run: | . ../firedrake_venv/bin/activate From 0c3f5dd76350515cc8a973a22a36361856e2bc79 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 26 Apr 2023 16:39:39 +0100 Subject: [PATCH 70/75] define H(d) --- firedrake/preconditioners/fdm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index f47861a7a7..706ab73492 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -57,9 +57,9 @@ class FDMPC(PCBase): """ A preconditioner for tensor-product elements that changes the shape - functions so that the H(d) Riesz map is sparse on Cartesian cells, - and assembles a global sparse matrix on which other preconditioners, - such as `ASMStarPC`, can be applied. + functions so that the H(d) (d in {grad, curl, div}) Riesz map is sparse on + Cartesian cells, and assembles a global sparse matrix on which other + preconditioners, such as `ASMStarPC`, can be applied. Here we assume that the volume integrals in the Jacobian can be expressed as: From 35ba3e629a4c6b018be0bb4dfb73736e796469e9 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Sun, 30 Apr 2023 12:05:44 +0100 Subject: [PATCH 71/75] use weakref for coarsening and transfer operators --- firedrake/preconditioners/pmg.py | 79 ++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index e16040302d..6769dfaa22 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -22,6 +22,7 @@ import numpy import os import tempfile +import weakref __all__ = ("PMGPC", "PMGSNES") @@ -56,7 +57,8 @@ class PMGBase(PCSNESBase): """ _prefix = "pmg_" - _cache = {} + _coarsen_cache = weakref.WeakKeyDictionary() + _transfer_cache = weakref.WeakKeyDictionary() def coarsen_element(self, ele): """ @@ -277,27 +279,14 @@ def inject_state(): interpolate = None if fctx._nullspace or fctx._nullspace_T or fctx._near_nullspace: interpolate, _ = cdm.createInterpolation(fdm) - cctx._nullspace = self.coarsen_nullspace(cV, interpolate, fctx._nullspace) - cctx._nullspace_T = self.coarsen_nullspace(cV, interpolate, fctx._nullspace_T) - cctx._near_nullspace = self.coarsen_nullspace(cV, interpolate, fctx._near_nullspace) + cctx._nullspace = self.coarsen_nullspace(fctx._nullspace, cV, interpolate) + cctx._nullspace_T = self.coarsen_nullspace(fctx._nullspace_T, cV, interpolate) + cctx._near_nullspace = self.coarsen_nullspace(fctx._near_nullspace, cV, interpolate) cctx.set_nullspace(cctx._nullspace, cV._ises, transpose=False, near=False) cctx.set_nullspace(cctx._nullspace_T, cV._ises, transpose=True, near=False) cctx.set_nullspace(cctx._near_nullspace, cV._ises, transpose=False, near=True) return cdm - def coarsen_bcs(self, fbcs, cV): - cbcs = [] - for bc in fbcs: - cV_ = cV - for index in bc._indices: - cV_ = cV_.sub(index) - cbc_value = self.coarsen_bc_value(bc, cV_) - if isinstance(bc, firedrake.DirichletBC): - cbcs.append(bc.reconstruct(V=cV_, g=cbc_value)) - else: - raise NotImplementedError("Unsupported BC type, please get in touch if you need this") - return cbcs - def coarsen_quadrature(self, metadata, fdeg, cdeg): """Coarsen the quadrature degree in a dictionary preserving the ratio of quadrature nodes to interpolation nodes (qdeg+1)//(fdeg+1).""" @@ -308,28 +297,50 @@ def coarsen_quadrature(self, metadata, fdeg, cdeg): except (KeyError, TypeError): return metadata - def coarsen_nullspace(self, coarse_V, interpolate, fine_nullspace): - """Coarsen a nullspace or retrieve it from class cache""" - cache = self._cache.setdefault("nullspace", {}) - key = (coarse_V.ufl_element(), fine_nullspace) + def coarsen_bcs(self, fbcs, cV): + """Coarsen a list of bcs""" + cbcs = [] + for bc in fbcs: + cache = self._coarsen_cache.setdefault(bc, {}) + key = (cV.ufl_element(), self.is_snes) + try: + coarse_bc = cache[key] + except KeyError: + cV_ = cV + for index in bc._indices: + cV_ = cV_.sub(index) + cbc_value = self.coarsen_bc_value(bc, cV_) + if isinstance(bc, firedrake.DirichletBC): + coarse_bc = cache.setdefault(key, bc.reconstruct(V=cV_, g=cbc_value)) + else: + raise NotImplementedError("Unsupported BC type, please get in touch if you need this") + cbcs.append(coarse_bc) + return cbcs + + def coarsen_nullspace(self, fine_nullspace, cV, interpolate): + """Coarsen a nullspace""" + if fine_nullspace is None: + return fine_nullspace + cache = self._coarsen_cache.setdefault(fine_nullspace, {}) + key = cV.ufl_element() try: return cache[key] except KeyError: if isinstance(fine_nullspace, MixedVectorSpaceBasis): if interpolate.getType() == "python": interpolate = interpolate.getPythonContext() - submats = [interpolate.getNestSubMatrix(i, i) for i in range(len(coarse_V))] + submats = [interpolate.getNestSubMatrix(i, i) for i in range(len(cV))] coarse_bases = [] - for fs, submat, basis in zip(coarse_V, submats, fine_nullspace._bases): + for fs, submat, basis in zip(cV, submats, fine_nullspace._bases): if isinstance(basis, VectorSpaceBasis): - coarse_bases.append(self.coarsen_nullspace(fs, submat, basis)) + coarse_bases.append(self.coarsen_nullspace(basis, fs, submat)) else: - coarse_bases.append(coarse_V.sub(basis.index)) - coarse_nullspace = MixedVectorSpaceBasis(coarse_V, coarse_bases) + coarse_bases.append(cV.sub(basis.index)) + coarse_nullspace = MixedVectorSpaceBasis(cV, coarse_bases) elif isinstance(fine_nullspace, VectorSpaceBasis): coarse_vecs = [] for xf in fine_nullspace._petsc_vecs: - wc = firedrake.Function(coarse_V) + wc = firedrake.Function(cV) with wc.dat.vec_wo as xc: # the nullspace basis is in the dual of V interpolate.multTranspose(xf, xc) @@ -341,13 +352,9 @@ def coarsen_nullspace(self, coarse_V, interpolate, fine_nullspace): return cache.setdefault(key, coarse_nullspace) def create_transfer(self, mat_type, cctx, fctx, cbcs, fbcs): - """Create a transfer or retrieve it from class cache""" - cV = cctx.J.arguments()[0].function_space() - fV = fctx.J.arguments()[0].function_space() - cbcs = tuple(cctx._problem.bcs) if cbcs else tuple() - fbcs = tuple(fctx._problem.bcs) if fbcs else tuple() - key = (mat_type, fV.mesh(), cV.ufl_element(), fV.ufl_element(), cbcs, fbcs) - cache = self._cache.setdefault("transfer", {}) + """Create a transfer operator""" + cache = self._transfer_cache.setdefault(fctx, {}) + key = (mat_type, cctx, cbcs, fbcs) try: return cache[key] except KeyError: @@ -357,6 +364,10 @@ def create_transfer(self, mat_type, cctx, fctx, cbcs, fbcs): construct_mat = prolongation_matrix_aij else: raise ValueError("Unknown matrix type") + cV = cctx.J.arguments()[0].function_space() + fV = fctx.J.arguments()[0].function_space() + cbcs = tuple(cctx._problem.bcs) if cbcs else tuple() + fbcs = tuple(fctx._problem.bcs) if fbcs else tuple() return cache.setdefault(key, construct_mat(cV, fV, cbcs, fbcs)) def create_interpolation(self, dmc, dmf): From 3c5569990ebd6606adb0e34487ce800b90a3bd46 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 2 May 2023 17:23:56 +0100 Subject: [PATCH 72/75] fix typo in absolute tolerance --- firedrake/preconditioners/fdm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 706ab73492..6aba95b301 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -1062,7 +1062,7 @@ def is_restricted(finat_element): def petsc_sparse(A_numpy, rtol=1E-10, comm=None): """Convert dense numpy matrix into a sparse PETSc matrix""" - atol = rtol * max(A_numpy.min(), A_numpy.max(), key=abs) + atol = rtol * abs(max(A_numpy.min(), A_numpy.max(), key=abs)) sparsity = abs(A_numpy) > atol nnz = numpy.count_nonzero(sparsity, axis=1).astype(PETSc.IntType) A = PETSc.Mat().createAIJ(A_numpy.shape, nnz=(nnz, 0), comm=comm) From 8792bc305acd15a8026442129111c7716cba95da Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 3 May 2023 17:09:04 +0100 Subject: [PATCH 73/75] test that weakref caches are parallel safe --- firedrake/preconditioners/pmg.py | 1 + tests/multigrid/test_p_multigrid.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 6769dfaa22..7b87338ca6 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -57,6 +57,7 @@ class PMGBase(PCSNESBase): """ _prefix = "pmg_" + # This is parallel safe because the keys are ids of a collective objects _coarsen_cache = weakref.WeakKeyDictionary() _transfer_cache = weakref.WeakKeyDictionary() diff --git a/tests/multigrid/test_p_multigrid.py b/tests/multigrid/test_p_multigrid.py index c04913e7d4..abb77e450a 100644 --- a/tests/multigrid/test_p_multigrid.py +++ b/tests/multigrid/test_p_multigrid.py @@ -334,10 +334,14 @@ def test_p_multigrid_mixed(mat_type): ppc = solver.snes.ksp.pc.getPythonContext().ppc assert ppc.getMGLevels() == 3 - level = solver._ctx + # test that nullspace component is zero assert abs(assemble(z[1]*dx)) < 1E-12 + # test that we converge to the exact solution assert norm(z-z_exact, "H1") < 1E-12 + + # test that we have coarsened the nullspace correctly ctx_levels = 0 + level = solver._ctx while level is not None: nsp = level._nullspace assert isinstance(nsp, MixedVectorSpaceBasis) @@ -348,6 +352,13 @@ def test_p_multigrid_mixed(mat_type): ctx_levels += 1 assert ctx_levels == 3 + # test that caches are parallel safe + dummy_eq = type(object).__eq__ + for cache in (PMGPC._coarsen_cache, PMGPC._transfer_cache): + assert len(cache) > 0 + for k in cache: + assert type(k).__eq__ is dummy_eq + def test_p_fas_scalar(): mat_type = "matfree" From 7d40f36e56e7221944e959cad7025f781c1ecbc5 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 3 May 2023 17:10:57 +0100 Subject: [PATCH 74/75] small typo --- firedrake/preconditioners/pmg.py | 2 +- tests/multigrid/test_p_multigrid.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 7b87338ca6..ff302a94a2 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -57,7 +57,7 @@ class PMGBase(PCSNESBase): """ _prefix = "pmg_" - # This is parallel safe because the keys are ids of a collective objects + # This is parallel-safe because the keys are ids of a collective objects _coarsen_cache = weakref.WeakKeyDictionary() _transfer_cache = weakref.WeakKeyDictionary() diff --git a/tests/multigrid/test_p_multigrid.py b/tests/multigrid/test_p_multigrid.py index abb77e450a..3954536f1f 100644 --- a/tests/multigrid/test_p_multigrid.py +++ b/tests/multigrid/test_p_multigrid.py @@ -352,7 +352,7 @@ def test_p_multigrid_mixed(mat_type): ctx_levels += 1 assert ctx_levels == 3 - # test that caches are parallel safe + # test that caches are parallel-safe dummy_eq = type(object).__eq__ for cache in (PMGPC._coarsen_cache, PMGPC._transfer_cache): assert len(cache) > 0 From da35c07a50313dedfbb636c13ee9361878fa64da Mon Sep 17 00:00:00 2001 From: "David A. Ham" Date: Wed, 10 May 2023 17:03:09 +0100 Subject: [PATCH 75/75] Drop build changes --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 73ed727455..4aca285fff 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -48,7 +48,7 @@ jobs: - name: Build Firedrake run: | cd .. - ./firedrake/scripts/firedrake-install $COMPLEX --venv-name firedrake_venv --tinyasm --netgen --disable-ssh --minimal-petsc --slepc --documentation-dependencies --install thetis --install gusto --install icepack --install irksome --install femlium --no-package-manager --package-branch tsfc pbrubeck/fdm-discontinuous || (cat firedrake-install.log && /bin/false) + ./firedrake/scripts/firedrake-install $COMPLEX --venv-name firedrake_venv --tinyasm --netgen --disable-ssh --minimal-petsc --slepc --documentation-dependencies --install thetis --install gusto --install icepack --install irksome --install femlium --no-package-manager || (cat firedrake-install.log && /bin/false) - name: Install test dependencies run: | . ../firedrake_venv/bin/activate