From a67ef52799c768a5a91891842b9384b5616b8860 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Wed, 26 Jan 2022 13:03:41 -0500
Subject: [PATCH 01/50] first files for volume constraint

---
 hoomd/md/MeshVolumeConservation.cc | 445 +++++++++++++++++++++++++++++
 hoomd/md/MeshVolumeConservation.h  | 116 ++++++++
 2 files changed, 561 insertions(+)
 create mode 100644 hoomd/md/MeshVolumeConservation.cc
 create mode 100644 hoomd/md/MeshVolumeConservation.h
diff --git a/hoomd/md/MeshVolumeConservation.cc b/hoomd/md/MeshVolumeConservation.cc
new file mode 100644
index 0000000000..cdae38d59e
--- /dev/null
+++ b/hoomd/md/MeshVolumeConservation.cc
@@ -0,0 +1,445 @@
+// Copyright (c) 2009-2022 The Regents of the University of Michigan.
+// Part of HOOMD-blue, released under the BSD 3-Clause License.
+
+#include "MeshVolumeConservation.h"
+
+#include <iostream>
+#include <math.h>
+#include <sstream>
+#include <stdexcept>
+
+using namespace std;
+
+// SMALL a relatively small number
+#define SMALL Scalar(0.001)
+
+/*! \file MeshVolumeConservation.cc
+    \brief Contains code for the MeshVolumeConservation class
+*/
+
+namespace hoomd
+    {
+namespace md
+    {
+/*! \param sysdef System to compute forces on
+    \post Memory is allocated, and forces are zeroed.
+*/
+MeshVolumeConservation::MeshVolumeConservation(std::shared_ptr<SystemDefinition> sysdef,
+                                               std::shared_ptr<MeshDefinition> meshdef)
+    : ForceCompute(sysdef), m_K(NULL), m_V0(NULL), m_mesh_data(meshdef), m_volume(0)
+    {
+    m_exec_conf->msg->notice(5) << "Constructing MeshVolumeConservation" << endl;
+
+    // allocate the parameters
+    m_K = new Scalar[m_pdata->getNTypes()];
+
+    // allocate the parameters
+    m_V0 = new Scalar[m_pdata->getNTypes()];
+    }
+
+MeshVolumeConservation::~MeshVolumeConservation()
+    {
+    m_exec_conf->msg->notice(5) << "Destroying MeshVolumeConservation" << endl;
+
+    delete[] m_K;
+    delete[] m_V0;
+    m_K = NULL;
+    m_V0 = NULL;
+    }
+
+/*! \param type Type of the angle to set parameters for
+    \param K Stiffness parameter for the force computation
+
+    Sets parameters for the potential of a particular angle type
+*/
+void MeshVolumeConservation::setParams(unsigned int type, Scalar K, Scalar V0)
+    {
+    m_K[type] = K;
+    m_V0[type] = V0;
+
+    // check for some silly errors a user could make
+    if (K <= 0)
+        m_exec_conf->msg->warning() << "volume: specified K <= 0" << endl;
+    if (V0 <= 0)
+        m_exec_conf->msg->warning() << "volume: specified V0 <= 0" << endl;
+    }
+
+void MeshVolumeConservation::setParamsPython(std::string type, pybind11::dict params)
+    {
+    auto typ = m_mesh_data->getMeshBondData()->getTypeByName(type);
+    auto _params = vconstraint_params(params);
+    setParams(typ, _params.k, _params.V0);
+    }
+
+pybind11::dict MeshVolumeConservation::getParams(std::string type)
+    {
+    auto typ = m_mesh_data->getMeshBondData()->getTypeByName(type);
+    if (typ >= m_mesh_data->getMeshBondData()->getNTypes())
+        {
+        m_exec_conf->msg->error() << "mesh.helfrich: Invalid mesh type specified" << endl;
+        throw runtime_error("Error setting parameters in MeshVolumeConservation");
+        }
+    pybind11::dict params;
+    params["k"] = m_K[typ];
+    params["V0"] = m_V0[typ];
+    return params;
+    }
+
+/*! Actually perform the force computation
+    \param timestep Current time step
+ */
+void MeshVolumeConservation::computeForces(uint64_t timestep)
+    {
+    if (m_prof)
+        m_prof->push("Harmonic Angle");
+
+    computeVolume(); // precompute sigmas
+
+    assert(m_pdata);
+    // access the particle data arrays
+    ArrayHandle<Scalar4> h_pos(m_pdata->getPositions(), access_location::host, access_mode::read);
+
+    ArrayHandle<unsigned int> h_rtag(m_pdata->getRTags(), access_location::host, access_mode::read);
+    ArrayHandle<int3> h_image(m_pdata->getImages(), access_location::host, access_mode::read);
+
+    ArrayHandle<Scalar4> h_force(m_force, access_location::host, access_mode::overwrite);
+    ArrayHandle<Scalar> h_virial(m_virial, access_location::host, access_mode::overwrite);
+    size_t virial_pitch = m_virial.getPitch();
+
+    ArrayHandle<typename MeshTriangle::members_t> h_triangles(
+        m_mesh_data->getMeshTriangleData()->getMembersArray(),
+        access_location::host,
+        access_mode::read);
+
+    // there are enough other checks on the input data: but it doesn't hurt to be safe
+    assert(h_force.data);
+    assert(h_virial.data);
+    assert(h_pos.data);
+    assert(h_rtag.data);
+    assert(h_triangles.data);
+
+    // Zero data for force calculation.
+    memset((void*)h_force.data, 0, sizeof(Scalar4) * m_force.getNumElements());
+    memset((void*)h_virial.data, 0, sizeof(Scalar) * m_virial.getNumElements());
+
+    // get a local copy of the simulation box too
+    const BoxDim& box = m_pdata->getGlobalBox();
+
+    PDataFlags flags = m_pdata->getFlags();
+    bool compute_virial = flags[pdata_flag::pressure_tensor];
+
+    Scalar helfrich_virial[6];
+    for (unsigned int i = 0; i < 6; i++)
+        helfrich_virial[i] = Scalar(0.0);
+
+    // for each of the angles
+    const unsigned int size = (unsigned int)m_mesh_data->getMeshTriangleData()->getN();
+    for (unsigned int i = 0; i < size; i++)
+        {
+        // lookup the tag of each of the particles participating in the bond
+        const typename MeshTriangle::members_t& triangle = h_triangles.data[i];
+
+        unsigned int btag_a = bond.tag[0];
+        assert(btag_a < m_pdata->getMaximumTag() + 1);
+        unsigned int btag_b = bond.tag[1];
+        assert(btag_b < m_pdata->getMaximumTag() + 1);
+        unsigned int btag_c = bond.tag[2];
+        assert(btag_c < m_pdata->getMaximumTag() + 1);
+
+        // transform a and b into indices into the particle data arrays
+        // (MEM TRANSFER: 4 integers)
+        unsigned int idx_a = h_rtag.data[btag_a];
+        unsigned int idx_b = h_rtag.data[btag_b];
+        unsigned int idx_c = h_rtag.data[btag_c];
+
+        assert(idx_a < m_pdata->getN() + m_pdata->getNGhosts());
+        assert(idx_b < m_pdata->getN() + m_pdata->getNGhosts());
+        assert(idx_c < m_pdata->getN() + m_pdata->getNGhosts());
+
+        vec3<Scalar> pos_a(h_pos.data[idx_a].x, h_pos.data[idx_a].y, h_pos.data[idx_a].z);
+        vec3<Scalar> pos_b(h_pos.data[idx_b].x, h_pos.data[idx_b].y, h_pos.data[idx_b].z);
+        vec3<Scalar> pos_c(h_pos.data[idx_c].x, h_pos.data[idx_c].y, h_pos.data[idx_c].z);
+
+        vec3<Scalar> dVol_a = cross(pos_b, pos_c);
+
+        vec3<Scalar> dVol_b = cross(pos_c, pos_a);
+
+        vec3<Scalar> dVol_c = cross(pos_a, pos_b);
+
+        // calculate d\vec{r}
+        Scalar3 dba;
+        dab.x = h_pos.data[idx_a].x - h_pos.data[idx_b].x;
+        dab.y = h_pos.data[idx_a].y - h_pos.data[idx_b].y;
+        dab.z = h_pos.data[idx_a].z - h_pos.data[idx_b].z;
+
+        Scalar3 dbc;
+        dac.x = h_pos.data[idx_c].x - h_pos.data[idx_b].x;
+        dac.y = h_pos.data[idx_c].y - h_pos.data[idx_b].y;
+        dac.z = h_pos.data[idx_c].z - h_pos.data[idx_b].z;
+
+        dba = box.minImage(dba);
+        dbc = box.minImage(dbc);
+
+        vec3<Scalar> normal
+            = cross(vec3 < Scalar(dba.x, dba.y, dba.z), vec3<Scalar>(dbc.x, dbc.y, dbc.z));
+
+        Scalar3 Fa;
+
+        Fa.x = dsigma_dash_a * inv_sigma_a * sigma_dash_a.x - sigma_dash_a2 * dsigma_a.x;
+        Fa.x += (dsigma_dash_b * inv_sigma_b * sigma_dash_b.x - sigma_dash_b2 * dsigma_b.x);
+        Fa.x += (dsigma_dash_c * inv_sigma_c * sigma_dash_c.x - sigma_dash_c2 * dsigma_c.x);
+        Fa.x += (dsigma_dash_d * inv_sigma_d * sigma_dash_d.x - sigma_dash_d2 * dsigma_d.x);
+
+        Fa.y = dsigma_dash_a * inv_sigma_a * sigma_dash_a.y - sigma_dash_a2 * dsigma_a.y;
+        Fa.y += (dsigma_dash_b * inv_sigma_b * sigma_dash_b.y - sigma_dash_b2 * dsigma_b.y);
+        Fa.y += (dsigma_dash_c * inv_sigma_c * sigma_dash_c.y - sigma_dash_c2 * dsigma_c.y);
+        Fa.y += (dsigma_dash_d * inv_sigma_d * sigma_dash_d.y - sigma_dash_d2 * dsigma_d.y);
+
+        Fa.z = dsigma_dash_a * inv_sigma_a * sigma_dash_a.z - sigma_dash_a2 * dsigma_a.z;
+        Fa.z += (dsigma_dash_b * inv_sigma_b * sigma_dash_b.z - sigma_dash_b2 * dsigma_b.z);
+        Fa.z += (dsigma_dash_c * inv_sigma_c * sigma_dash_c.z - sigma_dash_c2 * dsigma_c.z);
+        Fa.z += (dsigma_dash_d * inv_sigma_d * sigma_dash_d.z - sigma_dash_d2 * dsigma_d.z);
+
+        Fa *= m_K[0];
+
+        if (compute_virial)
+            {
+            helfrich_virial[0] = Scalar(1. / 2.) * dab.x * Fa.x; // xx
+            helfrich_virial[1] = Scalar(1. / 2.) * dab.y * Fa.x; // xy
+            helfrich_virial[2] = Scalar(1. / 2.) * dab.z * Fa.x; // xz
+            helfrich_virial[3] = Scalar(1. / 2.) * dab.y * Fa.y; // yy
+            helfrich_virial[4] = Scalar(1. / 2.) * dab.z * Fa.y; // yz
+            helfrich_virial[5] = Scalar(1. / 2.) * dab.z * Fa.z; // zz
+            }
+
+        // Now, apply the force to each individual atom a,b,c, and accumulate the energy/virial
+        // do not update ghost particles
+        if (idx_a < m_pdata->getN())
+            {
+            h_force.data[idx_a].x += Fa.x;
+            h_force.data[idx_a].y += Fa.y;
+            h_force.data[idx_a].z += Fa.z;
+            h_force.data[idx_a].w = m_K[0] * 0.5 * dot(sigma_dash_a, sigma_dash_a) * inv_sigma_a;
+            for (int j = 0; j < 6; j++)
+                h_virial.data[j * virial_pitch + idx_a] += helfrich_virial[j];
+            }
+
+        if (idx_b < m_pdata->getN())
+            {
+            h_force.data[idx_b].x -= Fa.x;
+            h_force.data[idx_b].y -= Fa.y;
+            h_force.data[idx_b].z -= Fa.z;
+            h_force.data[idx_b].w = m_K[0] * 0.5 * dot(sigma_dash_b, sigma_dash_b) * inv_sigma_b;
+            for (int j = 0; j < 6; j++)
+                h_virial.data[j * virial_pitch + idx_b] += helfrich_virial[j];
+            }
+        }
+
+    if (m_prof)
+        m_prof->pop();
+    }
+
+void MeshVolumeConservation::computeSigma()
+    {
+    ArrayHandle<Scalar4> h_pos(m_pdata->getPositions(), access_location::host, access_mode::read);
+
+    ArrayHandle<unsigned int> h_rtag(m_pdata->getRTags(), access_location::host, access_mode::read);
+
+    ArrayHandle<typename MeshBond::members_t> h_bonds(
+        m_mesh_data->getMeshBondData()->getMembersArray(),
+        access_location::host,
+        access_mode::read);
+    ArrayHandle<typename MeshTriangle::members_t> h_triangles(
+        m_mesh_data->getMeshTriangleData()->getMembersArray(),
+        access_location::host,
+        access_mode::read);
+
+    // get a local copy of the simulation box too
+    const BoxDim& box = m_pdata->getGlobalBox();
+
+    ArrayHandle<Scalar> h_sigma(m_sigma, access_location::host, access_mode::overwrite);
+    ArrayHandle<Scalar3> h_sigma_dash(m_sigma_dash, access_location::host, access_mode::overwrite);
+
+    memset((void*)h_sigma.data, 0, sizeof(Scalar) * m_sigma.getNumElements());
+    memset((void*)h_sigma_dash.data, 0, sizeof(Scalar3) * m_sigma_dash.getNumElements());
+
+    // for each of the angles
+    const unsigned int size = (unsigned int)m_mesh_data->getMeshBondData()->getN();
+    for (unsigned int i = 0; i < size; i++)
+        {
+        // lookup the tag of each of the particles participating in the bond
+        const typename MeshBond::members_t& bond = h_bonds.data[i];
+
+        unsigned int btag_a = bond.tag[0];
+        assert(btag_a < m_pdata->getMaximumTag() + 1);
+        unsigned int btag_b = bond.tag[1];
+        assert(btag_b < m_pdata->getMaximumTag() + 1);
+
+        // transform a and b into indices into the particle data arrays
+        // (MEM TRANSFER: 4 integers)
+        unsigned int idx_a = h_rtag.data[btag_a];
+        unsigned int idx_b = h_rtag.data[btag_b];
+
+        unsigned int tr_idx1 = bond.tag[2];
+        unsigned int tr_idx2 = bond.tag[3];
+
+        if (tr_idx1 == tr_idx2)
+            continue;
+
+        const typename MeshTriangle::members_t& triangle1 = h_triangles.data[tr_idx1];
+        const typename MeshTriangle::members_t& triangle2 = h_triangles.data[tr_idx2];
+
+        unsigned int idx_c = h_rtag.data[triangle1.tag[0]];
+
+        unsigned int iterator = 1;
+        while (idx_a == idx_c || idx_b == idx_c)
+            {
+            idx_c = h_rtag.data[triangle1.tag[iterator]];
+            iterator++;
+            }
+
+        unsigned int idx_d = h_rtag.data[triangle2.tag[0]];
+
+        iterator = 1;
+        while (idx_a == idx_d || idx_b == idx_d)
+            {
+            idx_d = h_rtag.data[triangle2.tag[iterator]];
+            iterator++;
+            }
+
+        assert(idx_a < m_pdata->getN() + m_pdata->getNGhosts());
+        assert(idx_b < m_pdata->getN() + m_pdata->getNGhosts());
+        assert(idx_c < m_pdata->getN() + m_pdata->getNGhosts());
+        assert(idx_d < m_pdata->getN() + m_pdata->getNGhosts());
+
+        // calculate d\vec{r}
+        Scalar3 dab;
+        dab.x = h_pos.data[idx_a].x - h_pos.data[idx_b].x;
+        dab.y = h_pos.data[idx_a].y - h_pos.data[idx_b].y;
+        dab.z = h_pos.data[idx_a].z - h_pos.data[idx_b].z;
+
+        Scalar3 dac;
+        dac.x = h_pos.data[idx_a].x - h_pos.data[idx_c].x;
+        dac.y = h_pos.data[idx_a].y - h_pos.data[idx_c].y;
+        dac.z = h_pos.data[idx_a].z - h_pos.data[idx_c].z;
+
+        Scalar3 dad;
+        dad.x = h_pos.data[idx_a].x - h_pos.data[idx_d].x;
+        dad.y = h_pos.data[idx_a].y - h_pos.data[idx_d].y;
+        dad.z = h_pos.data[idx_a].z - h_pos.data[idx_d].z;
+
+        Scalar3 dbc;
+        dbc.x = h_pos.data[idx_b].x - h_pos.data[idx_c].x;
+        dbc.y = h_pos.data[idx_b].y - h_pos.data[idx_c].y;
+        dbc.z = h_pos.data[idx_b].z - h_pos.data[idx_c].z;
+
+        Scalar3 dbd;
+        dbd.x = h_pos.data[idx_b].x - h_pos.data[idx_d].x;
+        dbd.y = h_pos.data[idx_b].y - h_pos.data[idx_d].y;
+        dbd.z = h_pos.data[idx_b].z - h_pos.data[idx_d].z;
+
+        // apply minimum image conventions to all 3 vectors
+        dab = box.minImage(dab);
+        dac = box.minImage(dac);
+        dad = box.minImage(dad);
+        dbc = box.minImage(dbc);
+        dbd = box.minImage(dbd);
+
+        // on paper, the formula turns out to be: F = K*\vec{r} * (r_0/r - 1)
+        // FLOPS: 14 / MEM TRANSFER: 2 Scalars
+
+        // FLOPS: 42 / MEM TRANSFER: 6 Scalars
+        Scalar rsqab = dab.x * dab.x + dab.y * dab.y + dab.z * dab.z;
+        Scalar rab = sqrt(rsqab);
+        Scalar rac = dac.x * dac.x + dac.y * dac.y + dac.z * dac.z;
+        rac = sqrt(rac);
+        Scalar rad = dad.x * dad.x + dad.y * dad.y + dad.z * dad.z;
+        rad = sqrt(rad);
+
+        Scalar rbc = dbc.x * dbc.x + dbc.y * dbc.y + dbc.z * dbc.z;
+        rbc = sqrt(rbc);
+        Scalar rbd = dbd.x * dbd.x + dbd.y * dbd.y + dbd.z * dbd.z;
+        rbd = sqrt(rbd);
+
+        Scalar3 nab, nac, nad, nbc, nbd;
+        nab = dab / rab;
+        nac = dac / rac;
+        nad = dad / rad;
+        nbc = dbc / rbc;
+        nbd = dbd / rbd;
+
+        Scalar c_accb = nac.x * nbc.x + nac.y * nbc.y + nac.z * nbc.z;
+        if (c_accb > 1.0)
+            c_accb = 1.0;
+        if (c_accb < -1.0)
+            c_accb = -1.0;
+
+        Scalar c_addb = nad.x * nbd.x + nad.y * nbd.y + nad.z * nbd.z;
+        if (c_addb > 1.0)
+            c_addb = 1.0;
+        if (c_addb < -1.0)
+            c_addb = -1.0;
+
+        vec3<Scalar> nbac
+            = cross(vec3<Scalar>(nab.x, nab.y, nab.z), vec3<Scalar>(nac.x, nac.y, nac.z));
+
+        Scalar inv_nbac = 1.0 / sqrt(dot(nbac, nbac));
+
+        vec3<Scalar> nbad
+            = cross(vec3<Scalar>(nab.x, nab.y, nab.z), vec3<Scalar>(nad.x, nad.y, nad.z));
+
+        Scalar inv_nbad = 1.0 / sqrt(dot(nbad, nbad));
+
+        if (dot(nbac, nbad) * inv_nbad * inv_nbac > 0.9)
+            {
+            this->m_exec_conf->msg->error() << "helfrich calculations : triangles " << tr_idx1
+                                            << " " << tr_idx2 << " overlap." << std::endl
+                                            << std::endl;
+            throw std::runtime_error("Error in bending energy calculation");
+            }
+
+        Scalar inv_s_accb = sqrt(1.0 - c_accb * c_accb);
+        if (inv_s_accb < SMALL)
+            inv_s_accb = SMALL;
+        inv_s_accb = 1.0 / inv_s_accb;
+
+        Scalar inv_s_addb = sqrt(1.0 - c_addb * c_addb);
+        if (inv_s_addb < SMALL)
+            inv_s_addb = SMALL;
+        inv_s_addb = 1.0 / inv_s_addb;
+
+        Scalar cot_accb = c_accb * inv_s_accb;
+        Scalar cot_addb = c_addb * inv_s_addb;
+
+        Scalar sigma_hat_ab = (cot_accb + cot_addb) / 2;
+
+        Scalar sigma_a = sigma_hat_ab * rsqab * 0.25;
+
+        h_sigma.data[idx_a] += sigma_a;
+        h_sigma.data[idx_b] += sigma_a;
+
+        h_sigma_dash.data[idx_a].x += sigma_hat_ab * dab.x;
+        h_sigma_dash.data[idx_a].y += sigma_hat_ab * dab.y;
+        h_sigma_dash.data[idx_a].z += sigma_hat_ab * dab.z;
+
+        h_sigma_dash.data[idx_b].x -= sigma_hat_ab * dab.x;
+        h_sigma_dash.data[idx_b].y -= sigma_hat_ab * dab.y;
+        h_sigma_dash.data[idx_b].z -= sigma_hat_ab * dab.z;
+        }
+    }
+
+namespace detail
+    {
+void export_MeshVolumeConservation(pybind11::module& m)
+    {
+    pybind11::class_<MeshVolumeConservation, ForceCompute, std::shared_ptr<MeshVolumeConservation>>(
+        m,
+        "MeshVolumeConservation")
+        .def(pybind11::init<std::shared_ptr<SystemDefinition>, std::shared_ptr<MeshDefinition>>())
+        .def("setParams", &MeshVolumeConservation::setParamsPython)
+        .def("getParams", &MeshVolumeConservation::getParams);
+    }
+
+    } // end namespace detail
+    } // end namespace md
+    } // end namespace hoomd
diff --git a/hoomd/md/MeshVolumeConservation.h b/hoomd/md/MeshVolumeConservation.h
new file mode 100644
index 0000000000..d6522ae9db
--- /dev/null
+++ b/hoomd/md/MeshVolumeConservation.h
@@ -0,0 +1,116 @@
+// Copyright (c) 2009-2022 The Regents of the University of Michigan.
+// Part of HOOMD-blue, released under the BSD 3-Clause License.
+
+#include "hoomd/ForceCompute.h"
+#include "hoomd/MeshDefinition.h"
+
+#include <memory>
+
+#include <vector>
+
+/*! \file MeshVolumeConservation.h
+    \brief Declares a class for computing volume constraint forces
+*/
+
+#ifdef __HIPCC__
+#error This header cannot be compiled by nvcc
+#endif
+
+#include <pybind11/pybind11.h>
+
+#ifndef __MESHVOLUMECONSERVATION_H__
+#define __MESHVOLUMECONSERVATION_H__
+
+namespace hoomd
+    {
+namespace md
+    {
+struct vconstraint_params
+    {
+    Scalar k;
+    Scalar V0;
+
+#ifndef __HIPCC__
+    vconstraint_params() : k(0), V0(0) { }
+
+    vconstraint_params(pybind11::dict params)
+        : k(params["k"].cast<Scalar>(), V0(params["V0"].cast<Scalar>())
+        {
+        }
+
+    pybind11::dict asDict()
+        {
+        pybind11::dict v;
+        v["k"] = k;
+        v["V0"] = V0;
+        return v;
+        }
+#endif
+    }
+#ifdef SINGLE_PRECISION
+    __attribute__((aligned(8)));
+#else
+    __attribute__((aligned(16)));
+#endif
+
+//! Computes volume constraint forces on the mesh
+/*! Volume constraint forces are computed on every particle in a mesh.
+
+    \ingroup computes
+*/
+class PYBIND11_EXPORT MeshVolumeConservation : public ForceCompute
+    {
+    public:
+    //! Constructs the compute
+    MeshVolumeConservation(std::shared_ptr<SystemDefinition> sysdef,
+                           std::shared_ptr<MeshDefinition> meshdef);
+
+    //! Destructor
+    virtual ~MeshVolumeConservation();
+
+    //! Set the parameters
+    virtual void setParams(unsigned int type, Scalar K, Scalar V0);
+
+    virtual void setParamsPython(std::string type, pybind11::dict params);
+
+    /// Get the parameters for a type
+    pybind11::dict getParams(std::string type);
+
+#ifdef ENABLE_MPI
+    //! Get ghost particle fields requested by this pair potential
+    /*! \param timestep Current time step
+     */
+    virtual CommFlags getRequestedCommFlags(uint64_t timestep)
+        {
+        CommFlags flags = CommFlags(0);
+        flags[comm_flag::tag] = 1;
+        flags |= ForceCompute::getRequestedCommFlags(timestep);
+        return flags;
+        }
+#endif
+
+    protected:
+    Scalar* m_K; //!< K parameter for multiple mesh triangles
+
+    Scalar* m_V0;
+
+    std::shared_ptr<MeshDefinition> m_mesh_data; //!< Mesh data to use in computing helfich energy
+
+    Scalar m_volume; //! sum of the distances weighted by the bending angle over all neighbors
+    //! Actually compute the forces
+    virtual void computeForces(uint64_t timestep);
+
+    //! compute normals
+    virtual void computeVolume();
+    };
+
+namespace detail
+    {
+//! Exports the MeshVolumeConservation class to python
+void export_MeshVolumeConservation(pybind11::module& m);
+
+    } // end namespace detail
+    } // end namespace md
+    } // end namespace hoomd
+
+#endif

From 2cc9b720be8ac6422d6e48942e7fc237f17809d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Wed, 26 Jan 2022 16:09:14 -0500
Subject: [PATCH 02/50] add gpu_pos_list to mesh gpu list generator

---
 hoomd/MeshGroupData.cc  | 12 ++++++++++++
 hoomd/MeshGroupData.cu  |  9 +++++++++
 hoomd/MeshGroupData.cuh |  1 +
 3 files changed, 22 insertions(+)

diff --git a/hoomd/MeshGroupData.cc b/hoomd/MeshGroupData.cc
index 5c9fb0d6a2..7ab391c332 100644
--- a/hoomd/MeshGroupData.cc
+++ b/hoomd/MeshGroupData.cc
@@ -432,6 +432,7 @@ void MeshGroupData<group_size, Group, name, snap, bond>::rebuildGPUTable()
         this->m_gpu_table_indexer
             = Index2D(this->m_pdata->getN() + this->m_pdata->getNGhosts(), num_groups_max);
         this->m_gpu_table.resize(this->m_gpu_table_indexer.getNumElements());
+        this->m_gpu_pos_table.resize(this->m_gpu_table_indexer.getNumElements());
 
             {
             ArrayHandle<unsigned int> h_n_groups(this->m_gpu_n_groups,
@@ -441,6 +442,10 @@ void MeshGroupData<group_size, Group, name, snap, bond>::rebuildGPUTable()
                                                access_location::host,
                                                access_mode::overwrite);
 
+            ArrayHandle<unsigned int> h_gpu_pos_table(this->m_gpu_pos_table,
+                                                      access_location::host,
+                                                      access_mode::overwrite);
+
             // now, update the actual table
             // zero the number of bonded groups counter (again)
             memset(h_n_groups.data,
@@ -469,10 +474,12 @@ void MeshGroupData<group_size, Group, name, snap, bond>::rebuildGPUTable()
 
                     // list all group members j!=i in p.idx
                     unsigned int n = 0;
+                    unsigned int gpos = 0;
                     for (unsigned int j = 0; j < group_size_half; ++j)
                         {
                         if (j == i)
                             {
+                            gpos = j;
                             continue;
                             }
                         unsigned int tag2 = g.tag[j];
@@ -481,6 +488,7 @@ void MeshGroupData<group_size, Group, name, snap, bond>::rebuildGPUTable()
                         }
 
                     h_gpu_table.data[this->m_gpu_table_indexer(idx1, num)] = h;
+                    h_gpu_pos_table.data[this->m_gpu_table_indexer(idx1, num)] = gpos;
                     }
                 }
             }
@@ -529,6 +537,9 @@ void MeshGroupData<group_size, Group, name, snap, bond>::rebuildGPUTableGPU()
             ArrayHandle<members_t> d_gpu_table(this->m_gpu_table,
                                                access_location::device,
                                                access_mode::overwrite);
+            ArrayHandle<unsigned int> d_gpu_pos_table(this->m_gpu_pos_table,
+                                                      access_location::device,
+                                                      access_mode::overwrite);
             ArrayHandle<unsigned int> d_condition(this->m_condition,
                                                   access_location::device,
                                                   access_mode::readwrite);
@@ -553,6 +564,7 @@ void MeshGroupData<group_size, Group, name, snap, bond>::rebuildGPUTableGPU()
                                                          this->m_next_flag,
                                                          flag,
                                                          d_gpu_table.data,
+                                                         d_gpu_pos_table.data,
                                                          this->m_gpu_table_indexer.getW(),
                                                          d_scratch_g.data,
                                                          d_scratch_idx.data,
diff --git a/hoomd/MeshGroupData.cu b/hoomd/MeshGroupData.cu
index 7752afa8c2..d323c9ff33 100644
--- a/hoomd/MeshGroupData.cu
+++ b/hoomd/MeshGroupData.cu
@@ -70,6 +70,7 @@ __global__ void gpu_mesh_scatter_kernel(unsigned int n_scratch,
                                         const typeval_union* d_group_typeval,
                                         const unsigned int* d_rtag,
                                         group_t* d_pidx_group_table,
+                                        unsigned int* d_pidx_gpos_table,
                                         unsigned int pidx_group_table_pitch)
     {
     unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
@@ -98,12 +99,15 @@ __global__ void gpu_mesh_scatter_kernel(unsigned int n_scratch,
 
     unsigned int j = 0;
 
+    unsigned int gpos = 0;
+
     for (unsigned int k = 0; k < group_size_half; ++k)
         {
         unsigned int tag_k = g.tag[k];
         unsigned int pidx_k = d_rtag[tag_k];
         if (pidx_k == pidx)
             {
+            gpos = k;
             continue;
             }
 
@@ -111,6 +115,7 @@ __global__ void gpu_mesh_scatter_kernel(unsigned int n_scratch,
         }
 
     d_pidx_group_table[offset] = p;
+    d_pidx_gpos_table[offset] = gpos;
     }
 
 template<unsigned int group_size, typename group_t>
@@ -125,6 +130,7 @@ void gpu_update_mesh_table(const unsigned int n_groups,
                            unsigned int next_flag,
                            unsigned int& flag,
                            group_t* d_pidx_group_table,
+                           unsigned int* d_pidx_gpos_table,
                            const unsigned int pidx_group_table_pitch,
                            unsigned int* d_scratch_g,
                            unsigned int* d_scratch_idx,
@@ -203,6 +209,7 @@ void gpu_update_mesh_table(const unsigned int n_groups,
                            d_group_typeval,
                            d_rtag,
                            d_pidx_group_table,
+                           d_pidx_gpos_table,
                            pidx_group_table_pitch);
         }
     }
@@ -223,6 +230,7 @@ template void gpu_update_mesh_table<4>(const unsigned int n_groups,
                                        unsigned int next_flag,
                                        unsigned int& flag,
                                        group_storage<4>* d_pidx_group_table,
+                                       unsigned int* d_pidx_gpos_table,
                                        const unsigned int pidx_group_table_pitch,
                                        unsigned int* d_scratch_g,
                                        unsigned int* d_scratch_idx,
@@ -241,6 +249,7 @@ template void gpu_update_mesh_table<6>(const unsigned int n_groups,
                                        unsigned int next_flag,
                                        unsigned int& flag,
                                        group_storage<6>* d_pidx_group_table,
+                                       unsigned int* d_pidx_gpos_table,
                                        const unsigned int pidx_group_table_pitch,
                                        unsigned int* d_scratch_g,
                                        unsigned int* d_scratch_idx,
diff --git a/hoomd/MeshGroupData.cuh b/hoomd/MeshGroupData.cuh
index 41b1b9790a..462d138a37 100644
--- a/hoomd/MeshGroupData.cuh
+++ b/hoomd/MeshGroupData.cuh
@@ -26,6 +26,7 @@ void gpu_update_mesh_table(const unsigned int n_groups,
                            unsigned int next_flag,
                            unsigned int& flag,
                            group_t* d_pidx_group_table,
+                           unsigned int* d_pidx_gpos_table,
                            const unsigned int pidx_group_table_pitch,
                            unsigned int* d_scratch_g,
                            unsigned int* d_scratch_idx,

From d4be84200dfc00e7c9e75a15d60eb545ec8bdf0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Wed, 26 Jan 2022 18:33:45 -0500
Subject: [PATCH 03/50] fix volume factor

---
 hoomd/md/MeshVolumeConservation.cc | 279 ++++++++---------------------
 1 file changed, 78 insertions(+), 201 deletions(-)

diff --git a/hoomd/md/MeshVolumeConservation.cc b/hoomd/md/MeshVolumeConservation.cc
index cdae38d59e..3552314256 100644
--- a/hoomd/md/MeshVolumeConservation.cc
+++ b/hoomd/md/MeshVolumeConservation.cc
@@ -132,6 +132,12 @@ void MeshVolumeConservation::computeForces(uint64_t timestep)
     for (unsigned int i = 0; i < 6; i++)
         helfrich_virial[i] = Scalar(0.0);
 
+    Scalar VolDiff = m_volume - m_V0[0];
+
+    Scalar energy = m_K[0] * VolDiff * VolDiff / (2 * m_V0[0] * m_pdata->getN());
+
+    VolDiff = -m_K[0] / m_V0[0] * VolDiff / 6.0;
+
     // for each of the angles
     const unsigned int size = (unsigned int)m_mesh_data->getMeshTriangleData()->getN();
     for (unsigned int i = 0; i < size; i++)
@@ -156,60 +162,30 @@ void MeshVolumeConservation::computeForces(uint64_t timestep)
         assert(idx_b < m_pdata->getN() + m_pdata->getNGhosts());
         assert(idx_c < m_pdata->getN() + m_pdata->getNGhosts());
 
-        vec3<Scalar> pos_a(h_pos.data[idx_a].x, h_pos.data[idx_a].y, h_pos.data[idx_a].z);
-        vec3<Scalar> pos_b(h_pos.data[idx_b].x, h_pos.data[idx_b].y, h_pos.data[idx_b].z);
-        vec3<Scalar> pos_c(h_pos.data[idx_c].x, h_pos.data[idx_c].y, h_pos.data[idx_c].z);
-
-        vec3<Scalar> dVol_a = cross(pos_b, pos_c);
-
-        vec3<Scalar> dVol_b = cross(pos_c, pos_a);
-
-        vec3<Scalar> dVol_c = cross(pos_a, pos_b);
-
-        // calculate d\vec{r}
-        Scalar3 dba;
-        dab.x = h_pos.data[idx_a].x - h_pos.data[idx_b].x;
-        dab.y = h_pos.data[idx_a].y - h_pos.data[idx_b].y;
-        dab.z = h_pos.data[idx_a].z - h_pos.data[idx_b].z;
+        vec3<Scalar> pos_a = box.shift(h_pos.data[idx_a], h_image.data[idx_a]);
+        vec3<Scalar> pos_b = box.shift(h_pos.data[idx_b], h_image.data[idx_b]);
+        vec3<Scalar> pos_c = box.shift(h_pos.data[idx_c], h_image.data[idx_c]);
 
-        Scalar3 dbc;
-        dac.x = h_pos.data[idx_c].x - h_pos.data[idx_b].x;
-        dac.y = h_pos.data[idx_c].y - h_pos.data[idx_b].y;
-        dac.z = h_pos.data[idx_c].z - h_pos.data[idx_b].z;
+        vec3<Scalar> dVol_a = cross(pos_c, pos_b);
 
-        dba = box.minImage(dba);
-        dbc = box.minImage(dbc);
+        vec3<Scalar> dVol_b = cross(pos_a, pos_c);
 
-        vec3<Scalar> normal
-            = cross(vec3 < Scalar(dba.x, dba.y, dba.z), vec3<Scalar>(dbc.x, dbc.y, dbc.z));
+        vec3<Scalar> dVol_c = cross(pos_b, pos_a);
 
-        Scalar3 Fa;
+        Scalar3 Fa, Fb, Fc;
 
-        Fa.x = dsigma_dash_a * inv_sigma_a * sigma_dash_a.x - sigma_dash_a2 * dsigma_a.x;
-        Fa.x += (dsigma_dash_b * inv_sigma_b * sigma_dash_b.x - sigma_dash_b2 * dsigma_b.x);
-        Fa.x += (dsigma_dash_c * inv_sigma_c * sigma_dash_c.x - sigma_dash_c2 * dsigma_c.x);
-        Fa.x += (dsigma_dash_d * inv_sigma_d * sigma_dash_d.x - sigma_dash_d2 * dsigma_d.x);
-
-        Fa.y = dsigma_dash_a * inv_sigma_a * sigma_dash_a.y - sigma_dash_a2 * dsigma_a.y;
-        Fa.y += (dsigma_dash_b * inv_sigma_b * sigma_dash_b.y - sigma_dash_b2 * dsigma_b.y);
-        Fa.y += (dsigma_dash_c * inv_sigma_c * sigma_dash_c.y - sigma_dash_c2 * dsigma_c.y);
-        Fa.y += (dsigma_dash_d * inv_sigma_d * sigma_dash_d.y - sigma_dash_d2 * dsigma_d.y);
-
-        Fa.z = dsigma_dash_a * inv_sigma_a * sigma_dash_a.z - sigma_dash_a2 * dsigma_a.z;
-        Fa.z += (dsigma_dash_b * inv_sigma_b * sigma_dash_b.z - sigma_dash_b2 * dsigma_b.z);
-        Fa.z += (dsigma_dash_c * inv_sigma_c * sigma_dash_c.z - sigma_dash_c2 * dsigma_c.z);
-        Fa.z += (dsigma_dash_d * inv_sigma_d * sigma_dash_d.z - sigma_dash_d2 * dsigma_d.z);
-
-        Fa *= m_K[0];
+        Fa.x = VolDiff * dVol_a.x;
+        Fa.y = VolDiff * dVol_a.y;
+        Fa.z = VolDiff * dVol_a.z;
 
         if (compute_virial)
             {
-            helfrich_virial[0] = Scalar(1. / 2.) * dab.x * Fa.x; // xx
-            helfrich_virial[1] = Scalar(1. / 2.) * dab.y * Fa.x; // xy
-            helfrich_virial[2] = Scalar(1. / 2.) * dab.z * Fa.x; // xz
-            helfrich_virial[3] = Scalar(1. / 2.) * dab.y * Fa.y; // yy
-            helfrich_virial[4] = Scalar(1. / 2.) * dab.z * Fa.y; // yz
-            helfrich_virial[5] = Scalar(1. / 2.) * dab.z * Fa.z; // zz
+            helfrich_virial[0] = Scalar(1. / 2.) * h_pos.data[idx_a].x * Fa.x; // xx
+            helfrich_virial[1] = Scalar(1. / 2.) * h_pos.data[idx_a].y * Fa.x; // xy
+            helfrich_virial[2] = Scalar(1. / 2.) * h_pos.data[idx_a].z * Fa.x; // xz
+            helfrich_virial[3] = Scalar(1. / 2.) * h_pos.data[idx_a].y * Fa.y; // yy
+            helfrich_virial[4] = Scalar(1. / 2.) * h_pos.data[idx_a].z * Fa.y; // yz
+            helfrich_virial[5] = Scalar(1. / 2.) * h_pos.data[idx_a].z * Fa.z; // zz
             }
 
         // Now, apply the force to each individual atom a,b,c, and accumulate the energy/virial
@@ -219,36 +195,69 @@ void MeshVolumeConservation::computeForces(uint64_t timestep)
             h_force.data[idx_a].x += Fa.x;
             h_force.data[idx_a].y += Fa.y;
             h_force.data[idx_a].z += Fa.z;
-            h_force.data[idx_a].w = m_K[0] * 0.5 * dot(sigma_dash_a, sigma_dash_a) * inv_sigma_a;
+            h_force.data[idx_a].w = energy;
             for (int j = 0; j < 6; j++)
                 h_virial.data[j * virial_pitch + idx_a] += helfrich_virial[j];
             }
 
+        Fb.x = VolDiff * dVol_b.x;
+        Fb.y = VolDiff * dVol_b.y;
+        Fb.z = VolDiff * dVol_b.z;
+
+        if (compute_virial)
+            {
+            helfrich_virial[0] = Scalar(1. / 2.) * h_pos.data[idx_b].x * Fb.x; // xx
+            helfrich_virial[1] = Scalar(1. / 2.) * h_pos.data[idx_b].y * Fb.x; // xy
+            helfrich_virial[2] = Scalar(1. / 2.) * h_pos.data[idx_b].z * Fb.x; // xz
+            helfrich_virial[3] = Scalar(1. / 2.) * h_pos.data[idx_b].y * Fb.y; // yy
+            helfrich_virial[4] = Scalar(1. / 2.) * h_pos.data[idx_b].z * Fb.y; // yz
+            helfrich_virial[5] = Scalar(1. / 2.) * h_pos.data[idx_b].z * Fb.z; // zz
+            }
+
         if (idx_b < m_pdata->getN())
             {
-            h_force.data[idx_b].x -= Fa.x;
-            h_force.data[idx_b].y -= Fa.y;
-            h_force.data[idx_b].z -= Fa.z;
-            h_force.data[idx_b].w = m_K[0] * 0.5 * dot(sigma_dash_b, sigma_dash_b) * inv_sigma_b;
+            h_force.data[idx_b].x += Fb.x;
+            h_force.data[idx_b].y += Fb.y;
+            h_force.data[idx_b].z += Fb.z;
+            h_force.data[idx_b].w = energy;
             for (int j = 0; j < 6; j++)
                 h_virial.data[j * virial_pitch + idx_b] += helfrich_virial[j];
             }
+
+        Fc.x = VolDiff * dVol_c.x;
+        Fc.y = VolDiff * dVol_c.y;
+        Fc.z = VolDiff * dVol_c.z;
+
+        if (compute_virial)
+            {
+            helfrich_virial[0] = Scalar(1. / 2.) * h_pos.data[idx_c].x * Fc.x; // xx
+            helfrich_virial[1] = Scalar(1. / 2.) * h_pos.data[idx_c].y * Fc.x; // xy
+            helfrich_virial[2] = Scalar(1. / 2.) * h_pos.data[idx_c].z * Fc.x; // xz
+            helfrich_virial[3] = Scalar(1. / 2.) * h_pos.data[idx_c].y * Fc.y; // yy
+            helfrich_virial[4] = Scalar(1. / 2.) * h_pos.data[idx_c].z * Fc.y; // yz
+            helfrich_virial[5] = Scalar(1. / 2.) * h_pos.data[idx_c].z * Fc.z; // zz
+            }
+
+        if (idx_c < m_pdata->getN())
+            {
+            h_force.data[idx_c].x -= Fc.x;
+            h_force.data[idx_c].y -= Fc.y;
+            h_force.data[idx_c].z -= Fc.z;
+            h_force.data[idx_c].w = energy;
+            for (int j = 0; j < 6; j++)
+                h_virial.data[j * virial_pitch + idx_c] += helfrich_virial[j];
+            }
         }
 
     if (m_prof)
         m_prof->pop();
     }
 
-void MeshVolumeConservation::computeSigma()
+void MeshVolumeConservation::computeVolume()
     {
-    ArrayHandle<Scalar4> h_pos(m_pdata->getPositions(), access_location::host, access_mode::read);
-
     ArrayHandle<unsigned int> h_rtag(m_pdata->getRTags(), access_location::host, access_mode::read);
+    ArrayHandle<int3> h_image(m_pdata->getImages(), access_location::host, access_mode::read);
 
-    ArrayHandle<typename MeshBond::members_t> h_bonds(
-        m_mesh_data->getMeshBondData()->getMembersArray(),
-        access_location::host,
-        access_mode::read);
     ArrayHandle<typename MeshTriangle::members_t> h_triangles(
         m_mesh_data->getMeshTriangleData()->getMembersArray(),
         access_location::host,
@@ -260,171 +269,39 @@ void MeshVolumeConservation::computeSigma()
     ArrayHandle<Scalar> h_sigma(m_sigma, access_location::host, access_mode::overwrite);
     ArrayHandle<Scalar3> h_sigma_dash(m_sigma_dash, access_location::host, access_mode::overwrite);
 
-    memset((void*)h_sigma.data, 0, sizeof(Scalar) * m_sigma.getNumElements());
-    memset((void*)h_sigma_dash.data, 0, sizeof(Scalar3) * m_sigma_dash.getNumElements());
+    m_volume = 0;
 
     // for each of the angles
-    const unsigned int size = (unsigned int)m_mesh_data->getMeshBondData()->getN();
+    const unsigned int size = (unsigned int)m_mesh_data->getMeshTriangleData()->getN();
     for (unsigned int i = 0; i < size; i++)
         {
         // lookup the tag of each of the particles participating in the bond
-        const typename MeshBond::members_t& bond = h_bonds.data[i];
+        const typename MeshTriangle::members_t& triangle = h_triangles.data[i];
 
         unsigned int btag_a = bond.tag[0];
         assert(btag_a < m_pdata->getMaximumTag() + 1);
         unsigned int btag_b = bond.tag[1];
         assert(btag_b < m_pdata->getMaximumTag() + 1);
+        unsigned int btag_c = bond.tag[2];
+        assert(btag_c < m_pdata->getMaximumTag() + 1);
 
         // transform a and b into indices into the particle data arrays
         // (MEM TRANSFER: 4 integers)
         unsigned int idx_a = h_rtag.data[btag_a];
         unsigned int idx_b = h_rtag.data[btag_b];
-
-        unsigned int tr_idx1 = bond.tag[2];
-        unsigned int tr_idx2 = bond.tag[3];
-
-        if (tr_idx1 == tr_idx2)
-            continue;
-
-        const typename MeshTriangle::members_t& triangle1 = h_triangles.data[tr_idx1];
-        const typename MeshTriangle::members_t& triangle2 = h_triangles.data[tr_idx2];
-
-        unsigned int idx_c = h_rtag.data[triangle1.tag[0]];
-
-        unsigned int iterator = 1;
-        while (idx_a == idx_c || idx_b == idx_c)
-            {
-            idx_c = h_rtag.data[triangle1.tag[iterator]];
-            iterator++;
-            }
-
-        unsigned int idx_d = h_rtag.data[triangle2.tag[0]];
-
-        iterator = 1;
-        while (idx_a == idx_d || idx_b == idx_d)
-            {
-            idx_d = h_rtag.data[triangle2.tag[iterator]];
-            iterator++;
-            }
+        unsigned int idx_c = h_rtag.data[btag_c];
 
         assert(idx_a < m_pdata->getN() + m_pdata->getNGhosts());
         assert(idx_b < m_pdata->getN() + m_pdata->getNGhosts());
         assert(idx_c < m_pdata->getN() + m_pdata->getNGhosts());
-        assert(idx_d < m_pdata->getN() + m_pdata->getNGhosts());
-
-        // calculate d\vec{r}
-        Scalar3 dab;
-        dab.x = h_pos.data[idx_a].x - h_pos.data[idx_b].x;
-        dab.y = h_pos.data[idx_a].y - h_pos.data[idx_b].y;
-        dab.z = h_pos.data[idx_a].z - h_pos.data[idx_b].z;
-
-        Scalar3 dac;
-        dac.x = h_pos.data[idx_a].x - h_pos.data[idx_c].x;
-        dac.y = h_pos.data[idx_a].y - h_pos.data[idx_c].y;
-        dac.z = h_pos.data[idx_a].z - h_pos.data[idx_c].z;
-
-        Scalar3 dad;
-        dad.x = h_pos.data[idx_a].x - h_pos.data[idx_d].x;
-        dad.y = h_pos.data[idx_a].y - h_pos.data[idx_d].y;
-        dad.z = h_pos.data[idx_a].z - h_pos.data[idx_d].z;
-
-        Scalar3 dbc;
-        dbc.x = h_pos.data[idx_b].x - h_pos.data[idx_c].x;
-        dbc.y = h_pos.data[idx_b].y - h_pos.data[idx_c].y;
-        dbc.z = h_pos.data[idx_b].z - h_pos.data[idx_c].z;
-
-        Scalar3 dbd;
-        dbd.x = h_pos.data[idx_b].x - h_pos.data[idx_d].x;
-        dbd.y = h_pos.data[idx_b].y - h_pos.data[idx_d].y;
-        dbd.z = h_pos.data[idx_b].z - h_pos.data[idx_d].z;
-
-        // apply minimum image conventions to all 3 vectors
-        dab = box.minImage(dab);
-        dac = box.minImage(dac);
-        dad = box.minImage(dad);
-        dbc = box.minImage(dbc);
-        dbd = box.minImage(dbd);
-
-        // on paper, the formula turns out to be: F = K*\vec{r} * (r_0/r - 1)
-        // FLOPS: 14 / MEM TRANSFER: 2 Scalars
-
-        // FLOPS: 42 / MEM TRANSFER: 6 Scalars
-        Scalar rsqab = dab.x * dab.x + dab.y * dab.y + dab.z * dab.z;
-        Scalar rab = sqrt(rsqab);
-        Scalar rac = dac.x * dac.x + dac.y * dac.y + dac.z * dac.z;
-        rac = sqrt(rac);
-        Scalar rad = dad.x * dad.x + dad.y * dad.y + dad.z * dad.z;
-        rad = sqrt(rad);
-
-        Scalar rbc = dbc.x * dbc.x + dbc.y * dbc.y + dbc.z * dbc.z;
-        rbc = sqrt(rbc);
-        Scalar rbd = dbd.x * dbd.x + dbd.y * dbd.y + dbd.z * dbd.z;
-        rbd = sqrt(rbd);
-
-        Scalar3 nab, nac, nad, nbc, nbd;
-        nab = dab / rab;
-        nac = dac / rac;
-        nad = dad / rad;
-        nbc = dbc / rbc;
-        nbd = dbd / rbd;
-
-        Scalar c_accb = nac.x * nbc.x + nac.y * nbc.y + nac.z * nbc.z;
-        if (c_accb > 1.0)
-            c_accb = 1.0;
-        if (c_accb < -1.0)
-            c_accb = -1.0;
-
-        Scalar c_addb = nad.x * nbd.x + nad.y * nbd.y + nad.z * nbd.z;
-        if (c_addb > 1.0)
-            c_addb = 1.0;
-        if (c_addb < -1.0)
-            c_addb = -1.0;
-
-        vec3<Scalar> nbac
-            = cross(vec3<Scalar>(nab.x, nab.y, nab.z), vec3<Scalar>(nac.x, nac.y, nac.z));
-
-        Scalar inv_nbac = 1.0 / sqrt(dot(nbac, nbac));
-
-        vec3<Scalar> nbad
-            = cross(vec3<Scalar>(nab.x, nab.y, nab.z), vec3<Scalar>(nad.x, nad.y, nad.z));
-
-        Scalar inv_nbad = 1.0 / sqrt(dot(nbad, nbad));
-
-        if (dot(nbac, nbad) * inv_nbad * inv_nbac > 0.9)
-            {
-            this->m_exec_conf->msg->error() << "helfrich calculations : triangles " << tr_idx1
-                                            << " " << tr_idx2 << " overlap." << std::endl
-                                            << std::endl;
-            throw std::runtime_error("Error in bending energy calculation");
-            }
-
-        Scalar inv_s_accb = sqrt(1.0 - c_accb * c_accb);
-        if (inv_s_accb < SMALL)
-            inv_s_accb = SMALL;
-        inv_s_accb = 1.0 / inv_s_accb;
-
-        Scalar inv_s_addb = sqrt(1.0 - c_addb * c_addb);
-        if (inv_s_addb < SMALL)
-            inv_s_addb = SMALL;
-        inv_s_addb = 1.0 / inv_s_addb;
-
-        Scalar cot_accb = c_accb * inv_s_accb;
-        Scalar cot_addb = c_addb * inv_s_addb;
-
-        Scalar sigma_hat_ab = (cot_accb + cot_addb) / 2;
-
-        Scalar sigma_a = sigma_hat_ab * rsqab * 0.25;
 
-        h_sigma.data[idx_a] += sigma_a;
-        h_sigma.data[idx_b] += sigma_a;
+        vec3<Scalar> pos_a = box.shift(h_pos.data[idx_a], h_image.data[idx_a]);
+        vec3<Scalar> pos_b = box.shift(h_pos.data[idx_b], h_image.data[idx_b]);
+        vec3<Scalar> pos_c = box.shift(h_pos.data[idx_c], h_image.data[idx_c]);
 
-        h_sigma_dash.data[idx_a].x += sigma_hat_ab * dab.x;
-        h_sigma_dash.data[idx_a].y += sigma_hat_ab * dab.y;
-        h_sigma_dash.data[idx_a].z += sigma_hat_ab * dab.z;
+        Scalar vol_tri = dot(cross(pos_c, pos_b), pos_a) / 6.0;
 
-        h_sigma_dash.data[idx_b].x -= sigma_hat_ab * dab.x;
-        h_sigma_dash.data[idx_b].y -= sigma_hat_ab * dab.y;
-        h_sigma_dash.data[idx_b].z -= sigma_hat_ab * dab.z;
+        m_volume += vol_tri;
         }
     }
 

From 6e0a197241a264648956fa61c69bef248f6b8b86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Wed, 26 Jan 2022 19:07:16 -0500
Subject: [PATCH 04/50] setup GPU code for MeshVolumeConstraint

---
 hoomd/md/MeshVolumeConservationGPU.cc | 177 ++++++++++++++++++++++++++
 hoomd/md/MeshVolumeConservationGPU.h  |  73 +++++++++++
 2 files changed, 250 insertions(+)
 create mode 100644 hoomd/md/MeshVolumeConservationGPU.cc
 create mode 100644 hoomd/md/MeshVolumeConservationGPU.h

diff --git a/hoomd/md/MeshVolumeConservationGPU.cc b/hoomd/md/MeshVolumeConservationGPU.cc
new file mode 100644
index 0000000000..f1790bf5c6
--- /dev/null
+++ b/hoomd/md/MeshVolumeConservationGPU.cc
@@ -0,0 +1,177 @@
+// Copyright (c) 2009-2022 The Regents of the University of Michigan.
+// Part of HOOMD-blue, released under the BSD 3-Clause License.
+
+#include "MeshVolumeConservationGPU.h"
+
+using namespace std;
+
+/*! \file MeshVolumeConservationGPU.cc
+    \brief Contains code for the MeshVolumeConservationGPU class
+*/
+
+namespace hoomd
+    {
+namespace md
+    {
+/*! \param sysdef System to compute forces on
+    \post Memory is allocated, and forces are zeroed.
+*/
+MeshVolumeConservationGPU::MeshVolumeConservationGPU(std::shared_ptr<SystemDefinition> sysdef,
+                                                     std::shared_ptr<MeshDefinition> meshdef)
+    : MeshVolumeConservation(sysdef, meshdef)
+    {
+    if (!m_exec_conf->isCUDAEnabled())
+        {
+        m_exec_conf->msg->error()
+            << "Creating a MeshVolumeConservationGPU with no GPU in the execution configuration"
+            << endl;
+        throw std::runtime_error("Error initializing MeshVolumeConservationGPU");
+        }
+
+    // allocate and zero device memory
+    GPUArray<Scalar2> params(this->m_angle_data->getNTypes(), m_exec_conf);
+    m_params.swap(params);
+
+    // allocate flags storage on the GPU
+    GPUArray<unsigned int> flags(1, this->m_exec_conf);
+    m_flags.swap(flags);
+
+    // reset flags
+    ArrayHandle<unsigned int> h_flags(m_flags, access_location::host, access_mode::overwrite);
+    h_flags.data[0] = 0;
+
+    unsigned int warp_size = this->m_exec_conf->dev_prop.warpSize;
+    m_tuner_force.reset(new Autotuner(warp_size,
+                                      1024,
+                                      warp_size,
+                                      5,
+                                      100000,
+                                      "vconstraint_forces",
+                                      this->m_exec_conf));
+    m_tuner_volume.reset(new Autotuner(warp_size,
+                                       1024,
+                                       warp_size,
+                                       5,
+                                       100000,
+                                       "vconstraint_volume",
+                                       this->m_exec_conf));
+    }
+
+void MeshVolumeConservationGPU::setParams(unsigned int type, Scalar K, Scalar V0)
+    {
+    MeshVolumeConservation::setParams(type, K, V0);
+
+    ArrayHandle<Scalar> h_params(m_params, access_location::host, access_mode::readwrite);
+    // update the local copy of the memory
+    h_params.data[type] = make_scalar2(K, V0);
+    }
+
+/*! Actually perform the force computation
+    \param timestep Current time step
+ */
+void MeshVolumeConservationGPU::computeForces(uint64_t timestep)
+    {
+    // start the profile
+    if (this->m_prof)
+        this->m_prof->push(this->m_exec_conf, "VolumeConstraint");
+
+    // access the particle data arrays
+    ArrayHandle<Scalar4> d_pos(m_pdata->getPositions(), access_location::device, access_mode::read);
+    ArrayHandle<int3> d_image(m_pdata->getImages(), access_location::device, access_mode::read);
+
+    BoxDim box = this->m_pdata->getGlobalBox();
+
+    const GPUArray<typename MeshTriangle::members_t>& gpu_meshtriangle_list
+        = this->m_mesh_data->getMeshTriangleData()->getGPUTable();
+    const Index2D& gpu_table_indexer
+        = this->m_mesh_data->getMeshTriangleData()->getGPUTableIndexer();
+
+    ArrayHandle<typename MeshTriangle::members_t> d_gpu_meshtrianglelist(gpu_meshtriangle_list,
+                                                                         access_location::device,
+                                                                         access_mode::read);
+    ArrayHandle<unsigned int> d_gpu_meshtriangle_pos_list(
+        m_mesh_data->getMeshTriangleData()->getGPUPosTable(),
+        access_location::device,
+        access_mode::read);
+    ArrayHandle<unsigned int> d_gpu_n_meshtriangle(
+        this->m_mesh_data->getMeshTriangleData()->getNGroupsArray(),
+        access_location::device,
+        access_mode::read);
+
+    m_tuner_sigma->begin();
+    kernel::gpu_compute_volume_constraint_volume(m_volume,
+                                                 m_pdata->getN(),
+                                                 d_pos.data,
+                                                 d_image.data,
+                                                 box,
+                                                 d_gpu_meshtrianglelist.data,
+                                                 d_gpu_meshtriangle_pos_list,
+                                                 gpu_table_indexer,
+                                                 d_gpu_n_meshtriangle.data,
+                                                 m_tuner_sigma->getParam());
+
+    if (this->m_exec_conf->isCUDAErrorCheckingEnabled())
+        {
+        CHECK_CUDA_ERROR();
+        }
+
+    m_tuner_sigma->end();
+
+    ArrayHandle<Scalar4> d_force(m_force, access_location::device, access_mode::overwrite);
+    ArrayHandle<Scalar> d_virial(m_virial, access_location::device, access_mode::overwrite);
+    ArrayHandle<Scalar> d_params(m_params, access_location::device, access_mode::read);
+
+    // access the flags array for overwriting
+    ArrayHandle<unsigned int> d_flags(m_flags, access_location::device, access_mode::readwrite);
+
+    m_tuner_force->begin();
+    kernel::gpu_compute_volume_constraint_force(d_force.data,
+                                                d_virial.data,
+                                                m_virial.getPitch(),
+                                                m_pdata->getN(),
+                                                d_pos.data,
+                                                box,
+                                                m_volume,
+                                                d_gpu_meshtrianglelist.data,
+                                                d_gpu_meshtriangle_pos_list.data,
+                                                gpu_table_indexer,
+                                                d_gpu_n_meshtriangle.data,
+                                                d_params.data,
+                                                m_mesh_data->getMeshTriangleData()->getNTypes(),
+                                                m_tuner_force->getParam(),
+                                                d_flags.data);
+
+    if (this->m_exec_conf->isCUDAErrorCheckingEnabled())
+        {
+        CHECK_CUDA_ERROR();
+
+        // check the flags for any errors
+        ArrayHandle<unsigned int> h_flags(m_flags, access_location::host, access_mode::read);
+
+        if (h_flags.data[0] & 1)
+            {
+            this->m_exec_conf->msg->error() << "volume constraint: triangle out of bounds ("
+                                            << h_flags.data[0] << ")" << std::endl
+                                            << std::endl;
+            throw std::runtime_error("Error in meshtriangle calculation");
+            }
+        }
+    m_tuner_force->end();
+
+    if (this->m_prof)
+        this->m_prof->pop(this->m_exec_conf);
+    }
+
+namespace detail
+    {
+void export_MeshVolumeConservationGPU(pybind11::module& m)
+    {
+    pybind11::class_<MeshVolumeConservationGPU,
+                     MeshVolumeConservation,
+                     std::shared_ptr<MeshVolumeConservationGPU>>(m, "MeshVolumeConservationGPU")
+        .def(pybind11::init<std::shared_ptr<SystemDefinition>, std::shared_ptr<MeshDefinition>>());
+    }
+
+    } // end namespace detail
+    } // end namespace md
+    } // end namespace hoomd
diff --git a/hoomd/md/MeshVolumeConservationGPU.h b/hoomd/md/MeshVolumeConservationGPU.h
new file mode 100644
index 0000000000..d7c4831980
--- /dev/null
+++ b/hoomd/md/MeshVolumeConservationGPU.h
@@ -0,0 +1,73 @@
+// Copyright (c) 2009-2022 The Regents of the University of Michigan.
+// Part of HOOMD-blue, released under the BSD 3-Clause License.
+
+#include "MeshVolumeConservation.h"
+#include "MeshVolumeConservationGPU.cuh"
+#include "hoomd/Autotuner.h"
+
+#include <memory>
+
+/*! \file MeshVolumeConservationGPU.h
+    \brief Declares a class for computing volume constraint forces on the GPU
+*/
+
+#ifdef __HIPCC__
+#error This header cannot be compiled by nvcc
+#endif
+
+#ifndef __MESHVOLUMECONSERVATION_GPU_H__
+#define __MESHVOLUMECONSERVATION_GPU_H__
+
+namespace hoomd
+    {
+namespace md
+    {
+
+//! Computes helfrich energy forces on the mesh on the GPU
+/*! Helfrich energy forces are computed on every particle in a mesh.
+
+    \ingroup computes
+*/
+class PYBIND11_EXPORT MeshVolumeConservationGPU : public MeshVolumeConservation
+    {
+    public:
+    //! Constructs the compute
+    MeshVolumeConservationGPU(std::shared_ptr<SystemDefinition> sysdef,
+                              std::shared_ptr<MeshDefinition> meshdef);
+
+    //! Set autotuner parameters
+    /*! \param enable Enable/disable autotuning
+        \param period period (approximate) in time steps when returning occurs
+    */
+    virtual void setAutotunerParams(bool enable, unsigned int period)
+        {
+        MeshVolumeConservation::setAutotunerParams(enable, period);
+        m_tuner_force->setPeriod(period);
+        m_tuner_force->setEnabled(enable);
+        m_tuner_volume->setPeriod(period);
+        m_tuner_volume->setEnabled(enable);
+        }
+
+    //! Set the parameters
+    virtual void setParams(unsigned int type, Scalar K, Scalar V0);
+
+    protected:
+    std::unique_ptr<Autotuner> m_tuner_force;  //!< Autotuner for block size of force loop
+    std::unique_ptr<Autotuner> m_tuner_volume; //!< Autotuner for block size of volume loop
+    GPUArray<unsigned int> m_flags;            //!< Flags set during the kernel execution
+    GPUArray<Scalar> m_params;                 //!< Parameters stored on the GPU
+
+    //! Actually compute the forces
+    virtual void computeForces(uint64_t timestep);
+    };
+
+namespace detail
+    {
+//! Exports the MeshVolumeConservationGPU class to python
+void export_MeshVolumeConservationGPU(pybind11::module& m);
+
+    } // end namespace detail
+    } // end namespace md
+    } // end namespace hoomd
+
+#endif

From ce5dc467f1e1624203d81b186791bda74370bb33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Thu, 27 Jan 2022 13:17:40 -0500
Subject: [PATCH 05/50] rename VolumeConstraint class and link CPU code to
 hoomd

---
 hoomd/md/CMakeLists.txt                       |   4 +-
 ... => VolumeConservationMeshForceCompute.cc} | 107 ++--
 ...h => VolumeConservationMeshForceCompute.h} |  20 +-
 ... VolumeConservationMeshForceComputeGPU.cc} |   5 +-
 .../VolumeConservationMeshForceComputeGPU.cu  | 461 ++++++++++++++++++
 .../VolumeConservationMeshForceComputeGPU.cuh |  56 +++
 ...> VolumeConservationMeshForceComputeGPU.h} |   0
 hoomd/md/module-md.cc                         |   4 +
 8 files changed, 593 insertions(+), 64 deletions(-)
 rename hoomd/md/{MeshVolumeConservation.cc => VolumeConservationMeshForceCompute.cc} (72%)
 rename hoomd/md/{MeshVolumeConservation.h => VolumeConservationMeshForceCompute.h} (78%)
 rename hoomd/md/{MeshVolumeConservationGPU.cc => VolumeConservationMeshForceComputeGPU.cc} (98%)
 create mode 100644 hoomd/md/VolumeConservationMeshForceComputeGPU.cu
 create mode 100644 hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
 rename hoomd/md/{MeshVolumeConservationGPU.h => VolumeConservationMeshForceComputeGPU.h} (100%)

diff --git a/hoomd/md/CMakeLists.txt b/hoomd/md/CMakeLists.txt
index 3c0537421e..ad127d5ddd 100644
--- a/hoomd/md/CMakeLists.txt
+++ b/hoomd/md/CMakeLists.txt
@@ -10,7 +10,7 @@ set(_md_sources module-md.cc
                    CosineSqAngleForceCompute.cc
                    CustomForceCompute.cc
                    EvaluatorWalls.cc
-                       FIREEnergyMinimizer.cc
+                   FIREEnergyMinimizer.cc
                    ForceComposite.cc
                    ForceDistanceConstraint.cc
                    HarmonicAngleForceCompute.cc
@@ -43,6 +43,7 @@ set(_md_sources module-md.cc
                    TwoStepNVE.cc
                    TwoStepNVTMTK.cc
                    WallData.cc
+		   VolumeConservationMeshForceCompute.cc
                    ZeroMomentumUpdater.cc
                    )
 
@@ -190,6 +191,7 @@ set(_md_headers ActiveForceComputeGPU.h
                 TwoStepNVE.h
                 TwoStepNVTMTKGPU.h
                 TwoStepNVTMTK.h
+		VolumeConservationMeshForceCompute.h
                 WallData.h
                 ZeroMomentumUpdater.h
                 )
diff --git a/hoomd/md/MeshVolumeConservation.cc b/hoomd/md/VolumeConservationMeshForceCompute.cc
similarity index 72%
rename from hoomd/md/MeshVolumeConservation.cc
rename to hoomd/md/VolumeConservationMeshForceCompute.cc
index 3552314256..417af16be8 100644
--- a/hoomd/md/MeshVolumeConservation.cc
+++ b/hoomd/md/VolumeConservationMeshForceCompute.cc
@@ -1,7 +1,7 @@
 // Copyright (c) 2009-2022 The Regents of the University of Michigan.
 // Part of HOOMD-blue, released under the BSD 3-Clause License.
 
-#include "MeshVolumeConservation.h"
+#include "VolumeConservationMeshForceCompute.h"
 
 #include <iostream>
 #include <math.h>
@@ -10,11 +10,8 @@
 
 using namespace std;
 
-// SMALL a relatively small number
-#define SMALL Scalar(0.001)
-
-/*! \file MeshVolumeConservation.cc
-    \brief Contains code for the MeshVolumeConservation class
+/*! \file VolumeConservationMeshForceCompute.cc
+    \brief Contains code for the VolumeConservationMeshForceCompute class
 */
 
 namespace hoomd
@@ -24,11 +21,12 @@ namespace md
 /*! \param sysdef System to compute forces on
     \post Memory is allocated, and forces are zeroed.
 */
-MeshVolumeConservation::MeshVolumeConservation(std::shared_ptr<SystemDefinition> sysdef,
-                                               std::shared_ptr<MeshDefinition> meshdef)
+VolumeConservationMeshForceCompute::VolumeConservationMeshForceCompute(
+    std::shared_ptr<SystemDefinition> sysdef,
+    std::shared_ptr<MeshDefinition> meshdef)
     : ForceCompute(sysdef), m_K(NULL), m_V0(NULL), m_mesh_data(meshdef), m_volume(0)
     {
-    m_exec_conf->msg->notice(5) << "Constructing MeshVolumeConservation" << endl;
+    m_exec_conf->msg->notice(5) << "Constructing VolumeConservationMeshForceCompute" << endl;
 
     // allocate the parameters
     m_K = new Scalar[m_pdata->getNTypes()];
@@ -37,9 +35,9 @@ MeshVolumeConservation::MeshVolumeConservation(std::shared_ptr<SystemDefinition>
     m_V0 = new Scalar[m_pdata->getNTypes()];
     }
 
-MeshVolumeConservation::~MeshVolumeConservation()
+VolumeConservationMeshForceCompute::~VolumeConservationMeshForceCompute()
     {
-    m_exec_conf->msg->notice(5) << "Destroying MeshVolumeConservation" << endl;
+    m_exec_conf->msg->notice(5) << "Destroying VolumeConservationMeshForceCompute" << endl;
 
     delete[] m_K;
     delete[] m_V0;
@@ -52,7 +50,7 @@ MeshVolumeConservation::~MeshVolumeConservation()
 
     Sets parameters for the potential of a particular angle type
 */
-void MeshVolumeConservation::setParams(unsigned int type, Scalar K, Scalar V0)
+void VolumeConservationMeshForceCompute::setParams(unsigned int type, Scalar K, Scalar V0)
     {
     m_K[type] = K;
     m_V0[type] = V0;
@@ -64,20 +62,20 @@ void MeshVolumeConservation::setParams(unsigned int type, Scalar K, Scalar V0)
         m_exec_conf->msg->warning() << "volume: specified V0 <= 0" << endl;
     }
 
-void MeshVolumeConservation::setParamsPython(std::string type, pybind11::dict params)
+void VolumeConservationMeshForceCompute::setParamsPython(std::string type, pybind11::dict params)
     {
     auto typ = m_mesh_data->getMeshBondData()->getTypeByName(type);
     auto _params = vconstraint_params(params);
     setParams(typ, _params.k, _params.V0);
     }
 
-pybind11::dict MeshVolumeConservation::getParams(std::string type)
+pybind11::dict VolumeConservationMeshForceCompute::getParams(std::string type)
     {
     auto typ = m_mesh_data->getMeshBondData()->getTypeByName(type);
     if (typ >= m_mesh_data->getMeshBondData()->getNTypes())
         {
         m_exec_conf->msg->error() << "mesh.helfrich: Invalid mesh type specified" << endl;
-        throw runtime_error("Error setting parameters in MeshVolumeConservation");
+        throw runtime_error("Error setting parameters in VolumeConservationMeshForceCompute");
         }
     pybind11::dict params;
     params["k"] = m_K[typ];
@@ -88,12 +86,12 @@ pybind11::dict MeshVolumeConservation::getParams(std::string type)
 /*! Actually perform the force computation
     \param timestep Current time step
  */
-void MeshVolumeConservation::computeForces(uint64_t timestep)
+void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
     {
     if (m_prof)
         m_prof->push("Harmonic Angle");
 
-    computeVolume(); // precompute sigmas
+    computeVolume(); // precompute volume
 
     assert(m_pdata);
     // access the particle data arrays
@@ -145,26 +143,30 @@ void MeshVolumeConservation::computeForces(uint64_t timestep)
         // lookup the tag of each of the particles participating in the bond
         const typename MeshTriangle::members_t& triangle = h_triangles.data[i];
 
-        unsigned int btag_a = bond.tag[0];
-        assert(btag_a < m_pdata->getMaximumTag() + 1);
-        unsigned int btag_b = bond.tag[1];
-        assert(btag_b < m_pdata->getMaximumTag() + 1);
-        unsigned int btag_c = bond.tag[2];
-        assert(btag_c < m_pdata->getMaximumTag() + 1);
+        unsigned int ttag_a = triangle.tag[0];
+        assert(ttag_a < m_pdata->getMaximumTag() + 1);
+        unsigned int ttag_b = triangle.tag[1];
+        assert(ttag_b < m_pdata->getMaximumTag() + 1);
+        unsigned int ttag_c = triangle.tag[2];
+        assert(ttag_c < m_pdata->getMaximumTag() + 1);
 
         // transform a and b into indices into the particle data arrays
         // (MEM TRANSFER: 4 integers)
-        unsigned int idx_a = h_rtag.data[btag_a];
-        unsigned int idx_b = h_rtag.data[btag_b];
-        unsigned int idx_c = h_rtag.data[btag_c];
+        unsigned int idx_a = h_rtag.data[ttag_a];
+        unsigned int idx_b = h_rtag.data[ttag_b];
+        unsigned int idx_c = h_rtag.data[ttag_c];
 
         assert(idx_a < m_pdata->getN() + m_pdata->getNGhosts());
         assert(idx_b < m_pdata->getN() + m_pdata->getNGhosts());
         assert(idx_c < m_pdata->getN() + m_pdata->getNGhosts());
 
-        vec3<Scalar> pos_a = box.shift(h_pos.data[idx_a], h_image.data[idx_a]);
-        vec3<Scalar> pos_b = box.shift(h_pos.data[idx_b], h_image.data[idx_b]);
-        vec3<Scalar> pos_c = box.shift(h_pos.data[idx_c], h_image.data[idx_c]);
+        vec3<Scalar> pos_a(h_pos.data[idx_a].x, h_pos.data[idx_a].y, h_pos.data[idx_a].z);
+        vec3<Scalar> pos_b(h_pos.data[idx_b].x, h_pos.data[idx_b].y, h_pos.data[idx_b].z);
+        vec3<Scalar> pos_c(h_pos.data[idx_c].x, h_pos.data[idx_c].y, h_pos.data[idx_c].z);
+
+        pos_a = box.shift(pos_a, h_image.data[idx_a]);
+        pos_b = box.shift(pos_b, h_image.data[idx_b]);
+        pos_c = box.shift(pos_c, h_image.data[idx_c]);
 
         vec3<Scalar> dVol_a = cross(pos_c, pos_b);
 
@@ -253,8 +255,9 @@ void MeshVolumeConservation::computeForces(uint64_t timestep)
         m_prof->pop();
     }
 
-void MeshVolumeConservation::computeVolume()
+void VolumeConservationMeshForceCompute::computeVolume()
     {
+    ArrayHandle<Scalar4> h_pos(m_pdata->getPositions(), access_location::host, access_mode::read);
     ArrayHandle<unsigned int> h_rtag(m_pdata->getRTags(), access_location::host, access_mode::read);
     ArrayHandle<int3> h_image(m_pdata->getImages(), access_location::host, access_mode::read);
 
@@ -265,10 +268,6 @@ void MeshVolumeConservation::computeVolume()
 
     // get a local copy of the simulation box too
     const BoxDim& box = m_pdata->getGlobalBox();
-
-    ArrayHandle<Scalar> h_sigma(m_sigma, access_location::host, access_mode::overwrite);
-    ArrayHandle<Scalar3> h_sigma_dash(m_sigma_dash, access_location::host, access_mode::overwrite);
-
     m_volume = 0;
 
     // for each of the angles
@@ -278,26 +277,30 @@ void MeshVolumeConservation::computeVolume()
         // lookup the tag of each of the particles participating in the bond
         const typename MeshTriangle::members_t& triangle = h_triangles.data[i];
 
-        unsigned int btag_a = bond.tag[0];
-        assert(btag_a < m_pdata->getMaximumTag() + 1);
-        unsigned int btag_b = bond.tag[1];
-        assert(btag_b < m_pdata->getMaximumTag() + 1);
-        unsigned int btag_c = bond.tag[2];
-        assert(btag_c < m_pdata->getMaximumTag() + 1);
+        unsigned int ttag_a = triangle.tag[0];
+        assert(ttag_a < m_pdata->getMaximumTag() + 1);
+        unsigned int ttag_b = triangle.tag[1];
+        assert(ttag_b < m_pdata->getMaximumTag() + 1);
+        unsigned int ttag_c = triangle.tag[2];
+        assert(ttag_c < m_pdata->getMaximumTag() + 1);
 
         // transform a and b into indices into the particle data arrays
         // (MEM TRANSFER: 4 integers)
-        unsigned int idx_a = h_rtag.data[btag_a];
-        unsigned int idx_b = h_rtag.data[btag_b];
-        unsigned int idx_c = h_rtag.data[btag_c];
+        unsigned int idx_a = h_rtag.data[ttag_a];
+        unsigned int idx_b = h_rtag.data[ttag_b];
+        unsigned int idx_c = h_rtag.data[ttag_c];
 
         assert(idx_a < m_pdata->getN() + m_pdata->getNGhosts());
         assert(idx_b < m_pdata->getN() + m_pdata->getNGhosts());
         assert(idx_c < m_pdata->getN() + m_pdata->getNGhosts());
 
-        vec3<Scalar> pos_a = box.shift(h_pos.data[idx_a], h_image.data[idx_a]);
-        vec3<Scalar> pos_b = box.shift(h_pos.data[idx_b], h_image.data[idx_b]);
-        vec3<Scalar> pos_c = box.shift(h_pos.data[idx_c], h_image.data[idx_c]);
+        vec3<Scalar> pos_a(h_pos.data[idx_a].x, h_pos.data[idx_a].y, h_pos.data[idx_a].z);
+        vec3<Scalar> pos_b(h_pos.data[idx_b].x, h_pos.data[idx_b].y, h_pos.data[idx_b].z);
+        vec3<Scalar> pos_c(h_pos.data[idx_c].x, h_pos.data[idx_c].y, h_pos.data[idx_c].z);
+
+        pos_a = box.shift(pos_a, h_image.data[idx_a]);
+        pos_b = box.shift(pos_b, h_image.data[idx_b]);
+        pos_c = box.shift(pos_c, h_image.data[idx_c]);
 
         Scalar vol_tri = dot(cross(pos_c, pos_b), pos_a) / 6.0;
 
@@ -307,14 +310,16 @@ void MeshVolumeConservation::computeVolume()
 
 namespace detail
     {
-void export_MeshVolumeConservation(pybind11::module& m)
+void export_VolumeConservationMeshForceCompute(pybind11::module& m)
     {
-    pybind11::class_<MeshVolumeConservation, ForceCompute, std::shared_ptr<MeshVolumeConservation>>(
+    pybind11::class_<VolumeConservationMeshForceCompute,
+                     ForceCompute,
+                     std::shared_ptr<VolumeConservationMeshForceCompute>>(
         m,
-        "MeshVolumeConservation")
+        "VolumeConservationMeshForceCompute")
         .def(pybind11::init<std::shared_ptr<SystemDefinition>, std::shared_ptr<MeshDefinition>>())
-        .def("setParams", &MeshVolumeConservation::setParamsPython)
-        .def("getParams", &MeshVolumeConservation::getParams);
+        .def("setParams", &VolumeConservationMeshForceCompute::setParamsPython)
+        .def("getParams", &VolumeConservationMeshForceCompute::getParams);
     }
 
     } // end namespace detail
diff --git a/hoomd/md/MeshVolumeConservation.h b/hoomd/md/VolumeConservationMeshForceCompute.h
similarity index 78%
rename from hoomd/md/MeshVolumeConservation.h
rename to hoomd/md/VolumeConservationMeshForceCompute.h
index d6522ae9db..77974aa167 100644
--- a/hoomd/md/MeshVolumeConservation.h
+++ b/hoomd/md/VolumeConservationMeshForceCompute.h
@@ -8,7 +8,7 @@
 
 #include <vector>
 
-/*! \file MeshVolumeConservation.h
+/*! \file VolumeConservationMeshForceCompute.h
     \brief Declares a class for computing volume constraint forces
 */
 
@@ -18,8 +18,8 @@
 
 #include <pybind11/pybind11.h>
 
-#ifndef __MESHVOLUMECONSERVATION_H__
-#define __MESHVOLUMECONSERVATION_H__
+#ifndef __VOLUMECONSERVATIONMESHFORCECOMPUTE_H__
+#define __VOLUMECONSERVATIONMESHFORCECOMPUTE_H__
 
 namespace hoomd
     {
@@ -34,7 +34,7 @@ struct vconstraint_params
     vconstraint_params() : k(0), V0(0) { }
 
     vconstraint_params(pybind11::dict params)
-        : k(params["k"].cast<Scalar>(), V0(params["V0"].cast<Scalar>())
+        : k(params["k"].cast<Scalar>()), V0(params["V0"].cast<Scalar>())
         {
         }
 
@@ -58,15 +58,15 @@ struct vconstraint_params
 
     \ingroup computes
 */
-class PYBIND11_EXPORT MeshVolumeConservation : public ForceCompute
+class PYBIND11_EXPORT VolumeConservationMeshForceCompute : public ForceCompute
     {
     public:
     //! Constructs the compute
-    MeshVolumeConservation(std::shared_ptr<SystemDefinition> sysdef,
-                           std::shared_ptr<MeshDefinition> meshdef);
+    VolumeConservationMeshForceCompute(std::shared_ptr<SystemDefinition> sysdef,
+                                       std::shared_ptr<MeshDefinition> meshdef);
 
     //! Destructor
-    virtual ~MeshVolumeConservation();
+    virtual ~VolumeConservationMeshForceCompute();
 
     //! Set the parameters
     virtual void setParams(unsigned int type, Scalar K, Scalar V0);
@@ -106,8 +106,8 @@ class PYBIND11_EXPORT MeshVolumeConservation : public ForceCompute
 
 namespace detail
     {
-//! Exports the MeshVolumeConservation class to python
-void export_MeshVolumeConservation(pybind11::module& m);
+//! Exports the VolumeConservationMeshForceCompute class to python
+void export_VolumeConservationMeshForceCompute(pybind11::module& m);
 
     } // end namespace detail
     } // end namespace md
diff --git a/hoomd/md/MeshVolumeConservationGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
similarity index 98%
rename from hoomd/md/MeshVolumeConservationGPU.cc
rename to hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index f1790bf5c6..08941b3c9d 100644
--- a/hoomd/md/MeshVolumeConservationGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -98,7 +98,7 @@ void MeshVolumeConservationGPU::computeForces(uint64_t timestep)
         access_location::device,
         access_mode::read);
 
-    m_tuner_sigma->begin();
+    m_tuner_volume->begin();
     kernel::gpu_compute_volume_constraint_volume(m_volume,
                                                  m_pdata->getN(),
                                                  d_pos.data,
@@ -115,7 +115,7 @@ void MeshVolumeConservationGPU::computeForces(uint64_t timestep)
         CHECK_CUDA_ERROR();
         }
 
-    m_tuner_sigma->end();
+    m_tuner_volume->end();
 
     ArrayHandle<Scalar4> d_force(m_force, access_location::device, access_mode::overwrite);
     ArrayHandle<Scalar> d_virial(m_virial, access_location::device, access_mode::overwrite);
@@ -130,6 +130,7 @@ void MeshVolumeConservationGPU::computeForces(uint64_t timestep)
                                                 m_virial.getPitch(),
                                                 m_pdata->getN(),
                                                 d_pos.data,
+                                                d_image.data,
                                                 box,
                                                 m_volume,
                                                 d_gpu_meshtrianglelist.data,
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
new file mode 100644
index 0000000000..c5091418b7
--- /dev/null
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -0,0 +1,461 @@
+// Copyright (c) 2009-2022 The Regents of the University of Michigan.
+// Part of HOOMD-blue, released under the BSD 3-Clause License.
+
+#include "hip/hip_runtime.h"
+// Copyright (c) 2009-2021 The Regents of the University of Michigan
+// This file is part of the HOOMD-blue project, released under the BSD 3-Clause License.
+
+#include "MeshVolumeConservationGPU.cuh"
+#include "hoomd/TextureTools.h"
+
+#include <assert.h>
+
+// SMALL a relatively small number
+#define SMALL Scalar(0.001)
+
+/*! \file MeshVolumeConservationGPU.cu
+    \brief Defines GPU kernel code for calculating the volume_constraint forces. Used by
+   MeshVolumeConservationComputeGPU.
+*/
+
+namespace hoomd
+    {
+namespace md
+    {
+namespace kernel
+    {
+//! Kernel for calculating volume_constraint sigmas on the GPU
+/*! \param d_sigma Device memory to write per paricle sigma
+    \param d_sigma_dash Device memory to write per particle sigma_dash
+    \param N number of particles
+    \param d_pos device array of particle positions
+    \param d_rtag device array of particle reverse tags
+    \param box Box dimensions (in GPU format) to use for periodic boundary conditions
+    \param blist List of mesh bonds stored on the GPU
+    \param d_triangles device array of mesh triangles
+    \param n_bonds_list List of numbers of mesh bonds stored on the GPU
+*/
+__global__ void gpu_compute_volume_constraint_volume_kernel(Scalar volume,
+                                                            const unsigned int N,
+                                                            const Scalar4* d_pos,
+                                                            const unsigned int* d_rtag,
+                                                            const BoxDim& box,
+                                                            const group_storage<6>* tlist,
+                                                            const unsigned int* tpos_list,
+                                                            const Index2D tlist_idx,
+                                                            const unsigned int* n_triangles_list)
+    {
+    // start by identifying which particle we are to handle
+    int idx = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if (idx >= N)
+        return;
+
+    // load in the length of the list for this thread (MEM TRANSFER: 4 bytes)
+    int n_bonds = n_bonds_list[idx];
+
+    // read in the position of our b-particle from the a-b-c triplet. (MEM TRANSFER: 16 bytes)
+    Scalar4 postype = __ldg(d_pos + idx);
+    Scalar3 pos = make_scalar3(postype.x, postype.y, postype.z);
+
+    // initialize the force to 0
+    Scalar3 sigma_dash = make_scalar3(Scalar(0.0), Scalar(0.0), Scalar(0.0));
+
+    Scalar sigma = 0.0;
+
+    // loop over all angles
+    for (int bond_idx = 0; bond_idx < n_bonds; bond_idx++)
+        {
+        group_storage<4> cur_bond = blist[blist_idx(idx, bond_idx)];
+
+        int cur_bond_idx = cur_bond.idx[0];
+        int cur_tr1_idx = cur_bond.idx[1];
+        int cur_tr2_idx = cur_bond.idx[2];
+
+        if (cur_tr1_idx == cur_tr2_idx)
+            continue;
+
+        const group_storage<6>& triangle1 = d_triangles[cur_tr1_idx];
+
+        unsigned int cur_idx_c = d_rtag[triangle1.tag[0]];
+
+        unsigned int iterator = 1;
+        while (idx == cur_idx_c || cur_bond_idx == cur_idx_c)
+            {
+            cur_idx_c = d_rtag[triangle1.tag[iterator]];
+            iterator++;
+            }
+
+        const group_storage<6>& triangle2 = d_triangles[cur_tr2_idx];
+
+        unsigned int cur_idx_d = d_rtag[triangle2.tag[0]];
+
+        iterator = 1;
+        while (idx == cur_idx_d || cur_bond_idx == cur_idx_d)
+            {
+            cur_idx_d = d_rtag[triangle2.tag[iterator]];
+            iterator++;
+            }
+
+        // get the b-particle's position (MEM TRANSFER: 16 bytes)
+        Scalar4 bb_postype = d_pos[cur_bond_idx];
+        Scalar3 bb_pos = make_scalar3(bb_postype.x, bb_postype.y, bb_postype.z);
+        // get the c-particle's position (MEM TRANSFER: 16 bytes)
+        Scalar4 cc_postype = d_pos[cur_idx_c];
+        Scalar3 cc_pos = make_scalar3(cc_postype.x, cc_postype.y, cc_postype.z);
+        // get the c-particle's position (MEM TRANSFER: 16 bytes)
+        Scalar4 dd_postype = d_pos[cur_idx_d];
+        Scalar3 dd_pos = make_scalar3(dd_postype.x, dd_postype.y, dd_postype.z);
+
+        Scalar3 dab = pos - bb_pos;
+        Scalar3 dac = pos - cc_pos;
+        Scalar3 dad = pos - dd_pos;
+        Scalar3 dbc = bb_pos - cc_pos;
+        Scalar3 dbd = bb_pos - dd_pos;
+
+        dab = box.minImage(dab);
+        dac = box.minImage(dac);
+        dad = box.minImage(dad);
+        dbc = box.minImage(dbc);
+        dbd = box.minImage(dbd);
+
+        // on paper, the formula turns out to be: F = K*\vec{r} * (r_0/r - 1)
+        // FLOPS: 14 / MEM TRANSFER: 2 Scalars
+
+        // FLOPS: 42 / MEM TRANSFER: 6 Scalars
+        Scalar rsqab = dab.x * dab.x + dab.y * dab.y + dab.z * dab.z;
+        Scalar rac = dac.x * dac.x + dac.y * dac.y + dac.z * dac.z;
+        rac = sqrt(rac);
+        Scalar rad = dad.x * dad.x + dad.y * dad.y + dad.z * dad.z;
+        rad = sqrt(rad);
+
+        Scalar rbc = dbc.x * dbc.x + dbc.y * dbc.y + dbc.z * dbc.z;
+        rbc = sqrt(rbc);
+        Scalar rbd = dbd.x * dbd.x + dbd.y * dbd.y + dbd.z * dbd.z;
+        rbd = sqrt(rbd);
+
+        Scalar3 nab, nac, nad, nbc, nbd;
+        nab = dab / rab;
+        nac = dac / rac;
+        nad = dad / rad;
+        nbc = dbc / rbc;
+        nbd = dbd / rbd;
+
+        Scalar c_accb = nac.x * nbc.x + nac.y * nbc.y + nac.z * nbc.z;
+
+        if (c_accb > 1.0)
+            c_accb = 1.0;
+        if (c_accb < -1.0)
+            c_accb = -1.0;
+
+        Scalar c_addb = nad.x * nbd.x + nad.y * nbd.y + nad.z * nbd.z;
+
+        if (c_addb > 1.0)
+            c_addb = 1.0;
+        if (c_addb < -1.0)
+            c_addb = -1.0;
+
+        vec3<Scalar> nbac
+            = cross(vec3<Scalar>(nab.x, nab.y, nab.z), vec3<Scalar>(nac.x, nac.y, nac.z));
+
+        Scalar inv_nbac = 1.0 / sqrt(dot(nbac, nbac));
+
+        vec3<Scalar> nbad
+            = cross(vec3<Scalar>(nab.x, nab.y, nab.z), vec3<Scalar>(nad.x, nad.y, nad.z));
+
+        Scalar inv_nbad = 1.0 / sqrt(dot(nbad, nbad));
+
+        if (dot(nbac, nbad) * inv_nbad * inv_nbac > 0.9)
+            {
+            this->m_exec_conf->msg->error() << "volume_constraint calculations : triangles "
+                                            << tr_idx1 << " " << tr_idx2 << " overlap." << std::endl
+                                            << std::endl;
+            throw std::runtime_error("Error in bending energy calculation");
+            }
+
+        Scalar inv_s_accb = sqrt(1.0 - c_accb * c_accb);
+        if (inv_s_accb < SMALL)
+            inv_s_accb = SMALL;
+        inv_s_accb = 1.0 / inv_s_accb;
+
+        Scalar inv_s_addb = sqrt(1.0 - c_addb * c_addb);
+        if (inv_s_addb < SMALL)
+            inv_s_addb = SMALL;
+        inv_s_addb = 1.0 / inv_s_addb;
+
+        Scalar cot_accb = c_accb * inv_s_accb;
+        Scalar cot_addb = c_addb * inv_s_addb;
+
+        Scalar sigma_hat_ab = (cot_accb + cot_addb) / 2;
+
+        Scalar sigma_a = sigma_hat_ab * rsqab * 0.25;
+
+        Scalar3 sigma_dash_a = sigma_hat_ab * dab;
+
+        sigma += sigma_a;
+        sigma_dash += sigma_dash_a;
+        }
+
+    // now that the force calculation is complete, write out the result (MEM TRANSFER: 20 bytes)
+    d_sigma[idx] = sigma;
+    d_sigma_dash[idx] = sigma_dash;
+    }
+
+/*! \param d_sigma Device memory to write per paricle sigma
+    \param d_sigma_dash Device memory to write per particle sigma_dash
+    \param N number of particles
+    \param d_pos device array of particle positions
+    \param d_rtag device array of particle reverse tags
+    \param box Box dimensions (in GPU format) to use for periodic boundary conditions
+    \param blist List of mesh bonds stored on the GPU
+    \param d_triangles device array of mesh triangles
+    \param n_bonds_list List of numbers of mesh bonds stored on the GPU
+    \param block_size Block size to use when performing calculations
+    \param compute_capability Device compute capability (200, 300, 350, ...)
+
+    \returns Any error code resulting from the kernel launch
+    \note Always returns hipSuccess in release builds to avoid the hipDeviceSynchronize()
+*/
+hipError_t gpu_compute_volume_constraint_volume(Scalar volume,
+                                                const unsigned int N,
+                                                const Scalar4* d_pos,
+                                                const int3* d_image,
+                                                const BoxDim& box,
+                                                const group_storage<6>* tlist,
+                                                const unsigned int* tpos_list,
+                                                const Index2D tlist_idx,
+                                                const unsigned int* n_triangles_list,
+                                                int block_size)
+    {
+    unsigned int max_block_size;
+    hipFuncAttributes attr;
+    hipFuncGetAttributes(&attr, (const void*)gpu_compute_volume_constraint_volume_kernel);
+    max_block_size = attr.maxThreadsPerBlock;
+
+    unsigned int run_block_size = min(block_size, max_block_size);
+
+    // setup the grid to run the kernel
+    dim3 grid(N / run_block_size + 1, 1, 1);
+    dim3 threads(run_block_size, 1, 1);
+
+    // run the kernel
+    hipLaunchKernelGGL((gpu_compute_volume_constraint_volume_kernel),
+                       dim3(grid),
+                       dim3(threads),
+                       0,
+                       0,
+                       volume,
+                       N,
+                       d_pos,
+                       d_image,
+                       box,
+                       tlist,
+                       tpos_list,
+                       tlist_idx,
+                       n_triangles_list);
+
+    return hipSuccess;
+    }
+
+//! Kernel for calculating volume_constraint sigmas on the GPU
+/*! \param d_force Device memory to write computed forces
+    \param d_virial Device memory to write computed virials
+    \param virial_pitch
+    \param N number of particles
+    \param d_pos device array of particle positions
+    \param d_rtag device array of particle reverse tags
+    \param box Box dimensions (in GPU format) to use for periodic boundary conditions
+    \param d_sigma Device memory to write per paricle sigma
+    \param d_sigma_dash Device memory to write per particle sigma_dash
+    \param blist List of mesh bonds stored on the GPU
+    \param d_triangles device array of mesh triangles
+    \param n_bonds_list List of numbers of mesh bonds stored on the GPU
+    \param d_params K params packed as Scalar variables
+    \param n_bond_type number of mesh bond types
+    \param d_flags Flag allocated on the device for use in checking for bonds that cannot be
+*/
+__global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
+                                                           Scalar* d_virial,
+                                                           const size_t virial_pitch,
+                                                           const unsigned int N,
+                                                           const Scalar4* d_pos,
+                                                           const int3* d_image,
+                                                           const BoxDim& box,
+                                                           const Scalar volume,
+                                                           const group_storage<6>* tlist,
+                                                           const unsigned int* tpos_list,
+                                                           const Index2D tlist_idx,
+                                                           const unsigned int* n_triangles_list,
+                                                           Scalar* d_params,
+                                                           const unsigned int n_triangle_type,
+                                                           unsigned int* d_flags);
+    {
+    // start by identifying which particle we are to handle
+    int idx = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if (idx >= N)
+        return;
+
+    // load in the length of the list for this thread (MEM TRANSFER: 4 bytes)
+    int n_bonds = n_bonds_list[idx];
+
+    // read in the position of our b-particle from the a-b-c triplet. (MEM TRANSFER: 16 bytes)
+    Scalar4 postype = __ldg(d_pos + idx);
+    Scalar3 pos = make_scalar3(postype.x, postype.y, postype.z);
+
+    int3 image_a = __ldg(d_image + idx);
+
+    vec3<Scalar> pos_a = box.shift(pos, image_a);
+
+    Scalar4 force = make_scalar4(Scalar(0.0), Scalar(0.0), Scalar(0.0), Scalar(0.0));
+
+    // initialize the virial to 0
+    Scalar virial[6];
+    for (int i = 0; i < 6; i++)
+        virial[i] = Scalar(0.0);
+
+    // loop over all angles
+    for (int triangle_idx = 0; triangle_idx < n_triangles; triangle_idx++)
+        {
+        group_storage<6> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
+
+        int cur_triangle_b = cur_triangle.idx[0];
+        int cur_triangle_c = cur_triangle.idx[1];
+        int cur_triangle_type = cur_triangle.idx[5];
+
+        // get the angle parameters (MEM TRANSFER: 8 bytes)
+        Scalar2 params = __ldg(d_params + cur_triangle_type);
+        Scalar K = params.x;
+        Scalar V0 = params.y;
+
+        Scalar VolDiff = volume - V0;
+
+        Scalar energy = K * VolDiff * VolDiff / (2 * V0 * N);
+
+        VolDiff = -K / V0 * VolDiff / 6.0;
+
+        int cur_triangle_abc = tpos_list[tlist_idx(idx, triangle_idx)];
+
+        // get the b-particle's position (MEM TRANSFER: 16 bytes)
+        Scalar4 bb_postype = d_pos[cur_triangle_b];
+        Scalar3 bb_pos = make_scalar3(bb_postype.x, bb_postype.y, bb_postype.z);
+        int3 image_b = d_image[cur_triangle_b] vec3<Scalar> pos_b = box.shift(bb_pos, image_b);
+
+        // get the c-particle's position (MEM TRANSFER: 16 bytes)
+        Scalar4 cc_postype = d_pos[cur_triangle_c];
+        Scalar3 cc_pos = make_scalar3(cc_postype.x, cc_postype.y, cc_postype.z);
+        int3 image_c = d_image[cur_triangle_c] vec3<Scalar> pos_c = box.shift(cc_pos, image_c);
+
+        vec3<Scalar> dVol;
+        if (cur_triangle_abc == 1)
+            {
+            dVol = cross(pos_b, pos_c);
+            }
+        else
+            {
+            dVol = cross(pos_c, pos_b);
+            }
+
+        Scalar3 Fa;
+
+        Fa.x = VolDiff * dVol.x;
+        Fa.y = VolDiff * dVol.y;
+        Fa.z = VolDiff * dVol.z;
+
+        force.x += Fa.x;
+        force.y += Fa.y;
+        force.z += Fa.z;
+        force.w = energy;
+
+        virial[0] += Scalar(1. / 2.) * pos.x * Fa.x; // xx
+        virial[1] += Scalar(1. / 2.) * pos.y * Fa.x; // xy
+        virial[2] += Scalar(1. / 2.) * pos.z * Fa.x; // xz
+        virial[3] += Scalar(1. / 2.) * pos.y * Fa.y; // yy
+        virial[4] += Scalar(1. / 2.) * pos.z * Fa.y; // yz
+        virial[5] += Scalar(1. / 2.) * pos.z * Fa.z; // zz
+        }
+
+    // now that the force calculation is complete, write out the result (MEM TRANSFER: 20 bytes)
+    d_force[idx] = force;
+
+    for (unsigned int i = 0; i < 6; i++)
+        d_virial[i * virial_pitch + idx] = virial[i];
+    }
+
+/*! \param d_force Device memory to write computed forces
+    \param d_virial Device memory to write computed virials
+    \param N number of particles
+    \param d_pos device array of particle positions
+    \param d_rtag device array of particle reverse tags
+    \param box Box dimensions (in GPU format) to use for periodic boundary conditions
+    \param d_sigma Device memory to write per paricle sigma
+    \param d_sigma_dash Device memory to write per particle sigma_dash
+    \param blist List of mesh bonds stored on the GPU
+    \param d_triangles device array of mesh triangles
+    \param n_bonds_list List of numbers of mesh bonds stored on the GPU
+    \param d_params K params packed as Scalar variables
+    \param n_bond_type number of mesh bond types
+    \param block_size Block size to use when performing calculations
+    \param d_flags Flag allocated on the device for use in checking for bonds that cannot be
+    \param compute_capability Device compute capability (200, 300, 350, ...)
+
+    \returns Any error code resulting from the kernel launch
+    \note Always returns hipSuccess in release builds to avoid the hipDeviceSynchronize()
+*/
+hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
+                                               Scalar* d_virial,
+                                               const size_t virial_pitch,
+                                               const unsigned int N,
+                                               const Scalar4* d_pos,
+                                               const int3* d_image,
+                                               const unsigned int* d_rtag,
+                                               const BoxDim& box,
+                                               const Scalar volume,
+                                               const group_storage<6>* tlist,
+                                               const unsigned int* tpos_list,
+                                               const Index2D tlist_idx,
+                                               const unsigned int* n_triangles_list,
+                                               Scalar* d_params,
+                                               const unsigned int n_triangle_type,
+                                               int block_size,
+                                               unsigned int* d_flags);
+    {
+    unsigned int max_block_size;
+    hipFuncAttributes attr;
+    hipFuncGetAttributes(&attr, (const void*)gpu_compute_volume_constraint_force_kernel);
+    max_block_size = attr.maxThreadsPerBlock;
+
+    unsigned int run_block_size = min(block_size, max_block_size);
+
+    // setup the grid to run the kernel
+    dim3 grid(N / run_block_size + 1, 1, 1);
+    dim3 threads(run_block_size, 1, 1);
+
+    // run the kernel
+    hipLaunchKernelGGL((gpu_compute_volume_constraint_force_kernel),
+                       dim3(grid),
+                       dim3(threads),
+                       0,
+                       0,
+                       d_force,
+                       d_virial,
+                       virial_pitch,
+                       N,
+                       d_pos,
+                       d_image,
+                       box,
+                       volume,
+                       tlist,
+                       tpos_list,
+                       tlist_idx,
+                       n_triangles_list,
+                       d_params,
+                       n_triangle_type,
+                       d_flags);
+
+    return hipSuccess;
+    }
+
+    } // end namespace kernel
+    } // end namespace md
+    } // end namespace hoomd
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
new file mode 100644
index 0000000000..7ee3a64f16
--- /dev/null
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
@@ -0,0 +1,56 @@
+// Copyright (c) 2009-2022 The Regents of the University of Michigan.
+// Part of HOOMD-blue, released under the BSD 3-Clause License.
+
+#include "hoomd/HOOMDMath.h"
+#include "hoomd/Index1D.h"
+#include "hoomd/MeshGroupData.cuh"
+#include "hoomd/ParticleData.cuh"
+
+/*! \file MeshVolumeConservationGPU.cuh
+    \brief Declares GPU kernel code for calculating the volume cnstraint forces. Used by
+   MeshVolumeConservationGPU.
+*/
+
+#ifndef __MESHVOLUMECONSERVATION_CUH__
+#define __MESHVOLUMECONSERVATION_CUH__
+
+namespace hoomd
+    {
+namespace md
+    {
+namespace kernel
+    {
+//! Kernel driver that computes the volume for MeshVolumeConservationGPU
+hipError_t gpu_compute_volume_constraint_volume(Scalar volume,
+                                                const unsigned int N,
+                                                const Scalar4* d_pos,
+                                                const int3* d_image,
+                                                const BoxDim& box,
+                                                const group_storage<6>* tlist,
+                                                const unsigned int* tpos_list,
+                                                const Index2D tlist_idx,
+                                                const unsigned int* n_triangles_list,
+                                                int block_size);
+
+//! Kernel driver that computes the forces for MeshVolumeConservationGPU
+hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
+                                               Scalar* d_virial,
+                                               const size_t virial_pitch,
+                                               const unsigned int N,
+                                               const Scalar4* d_pos,
+                                               const int3* d_image,
+                                               const BoxDim& box,
+                                               const Scalar volume,
+                                               const group_storage<6>* tlist,
+                                               const unsigned int* tpos_list,
+                                               const Index2D tlist_idx,
+                                               const unsigned int* n_triangles_list,
+                                               Scalar* d_params,
+                                               const unsigned int n_triangle_type,
+                                               int block_size,
+                                               unsigned int* d_flags);
+    } // end namespace kernel
+    } // end namespace md
+    } // end namespace hoomd
+
+#endif
diff --git a/hoomd/md/MeshVolumeConservationGPU.h b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
similarity index 100%
rename from hoomd/md/MeshVolumeConservationGPU.h
rename to hoomd/md/VolumeConservationMeshForceComputeGPU.h
diff --git a/hoomd/md/module-md.cc b/hoomd/md/module-md.cc
index 3be191f535..bfb4884bc1 100644
--- a/hoomd/md/module-md.cc
+++ b/hoomd/md/module-md.cc
@@ -60,6 +60,7 @@
 #include "TwoStepRATTLEBD.h"
 #include "TwoStepRATTLELangevin.h"
 #include "TwoStepRATTLENVE.h"
+#include "VolumeConservationMeshForceCompute.h"
 #include "WallData.h"
 #include "ZeroMomentumUpdater.h"
 
@@ -436,6 +437,9 @@ PYBIND11_MODULE(_md, m)
     export_TwoStepRATTLENVE<ManifoldPrimitive>(m, "TwoStepRATTLENVEPrimitive");
     export_TwoStepRATTLENVE<ManifoldSphere>(m, "TwoStepRATTLENVESphere");
 
+    // mesh
+    export_VolumeConservationMeshForceCompute(m);
+
 #ifdef ENABLE_HIP
     export_TwoStepNVEGPU(m);
     export_TwoStepNVTMTKGPU(m);

From 163141d305e4f24507630daa62fd83a46fe5f69e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Thu, 27 Jan 2022 13:58:15 -0500
Subject: [PATCH 06/50] writing API

---
 .../md/VolumeConservationMeshForceCompute.cc  |  6 +--
 hoomd/md/mesh/CMakeLists.txt                  |  3 +-
 hoomd/md/mesh/__init__.py                     |  2 +-
 hoomd/md/mesh/conservation.py                 | 44 +++++++++++++++++++
 4 files changed, 50 insertions(+), 5 deletions(-)
 create mode 100644 hoomd/md/mesh/conservation.py

diff --git a/hoomd/md/VolumeConservationMeshForceCompute.cc b/hoomd/md/VolumeConservationMeshForceCompute.cc
index 417af16be8..c21674d10b 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.cc
+++ b/hoomd/md/VolumeConservationMeshForceCompute.cc
@@ -242,9 +242,9 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
 
         if (idx_c < m_pdata->getN())
             {
-            h_force.data[idx_c].x -= Fc.x;
-            h_force.data[idx_c].y -= Fc.y;
-            h_force.data[idx_c].z -= Fc.z;
+            h_force.data[idx_c].x += Fc.x;
+            h_force.data[idx_c].y += Fc.y;
+            h_force.data[idx_c].z += Fc.z;
             h_force.data[idx_c].w = energy;
             for (int j = 0; j < 6; j++)
                 h_virial.data[j * virial_pitch + idx_c] += helfrich_virial[j];
diff --git a/hoomd/md/mesh/CMakeLists.txt b/hoomd/md/mesh/CMakeLists.txt
index 52ba831539..c33f2f5f79 100644
--- a/hoomd/md/mesh/CMakeLists.txt
+++ b/hoomd/md/mesh/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(files __init__.py
-          potential.py
           bond.py
+          conservation.py
+          potential.py
    )
 
 install(FILES ${files}
diff --git a/hoomd/md/mesh/__init__.py b/hoomd/md/mesh/__init__.py
index 2f89d04e24..80fc5a233c 100644
--- a/hoomd/md/mesh/__init__.py
+++ b/hoomd/md/mesh/__init__.py
@@ -4,4 +4,4 @@
 """Mesh potentials for molecular dynamics."""
 
 from .potential import MeshPotential
-from . import bond
+from . import bond, conservation
diff --git a/hoomd/md/mesh/conservation.py b/hoomd/md/mesh/conservation.py
new file mode 100644
index 0000000000..beeffe80c0
--- /dev/null
+++ b/hoomd/md/mesh/conservation.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2009-2022 The Regents of the University of Michigan.
+# Part of HOOMD-blue, released under the BSD 3-Clause License.
+
+"""Mesh Bond potentials."""
+
+from hoomd.md.mesh.potential import MeshPotential
+from hoomd.data.typeparam import TypeParameter
+from hoomd.data.parameterdicts import TypeParameterDict
+
+
+class Volume(MeshPotential):
+    r"""Volume conservation potential.
+
+    :py:class:`Volume` specifies a volume constrainton the whole mesh
+    surface.
+
+    Args:
+        mesh (:py:mod:`hoomd.mesh.Mesh`): Mesh data structure constraint.
+
+    Attributes:
+        parameter (TypeParameter[dict]):
+            The parameter of the harmonic bonds for the defined mesh.
+            As the mesh can only have one type a type name does not have
+            to be stated. The dictionary has the following keys:
+
+            * ``k`` (`float`, **required**) - potential constant
+              :math:`[\mathrm{energy} \cdot \mathrm{length}^{-2}]`
+
+            * ``V0`` (`float`, **required**) - rest length
+              :math:`[\mathrm{length}]`
+
+    Examples::
+
+        volume = mesh.conservation.Volume(mesh)
+        volume.params["mesh"] = dict(k=10.0, r0=100)
+    """
+    _cpp_class_name = "VolumeConservationMeshForceCompute"
+
+    def __init__(self, mesh):
+        params = TypeParameter("params", "types",
+                               TypeParameterDict(k=float, V0=float, len_keys=1))
+        self._add_typeparam(params)
+
+        super().__init__(mesh)

From b254c30654aeb190e7afadb66f5005f269b58a31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Thu, 27 Jan 2022 14:46:41 -0500
Subject: [PATCH 07/50] add test for volume

---
 hoomd/md/pytest/test_meshbond.py | 197 +++++++++++++++----------------
 1 file changed, 93 insertions(+), 104 deletions(-)

diff --git a/hoomd/md/pytest/test_meshbond.py b/hoomd/md/pytest/test_meshbond.py
index 584736ab67..e84435e32b 100644
--- a/hoomd/md/pytest/test_meshbond.py
+++ b/hoomd/md/pytest/test_meshbond.py
@@ -31,36 +31,56 @@
 _Tether_arg_list = [(hoomd.md.mesh.bond.Tether, dict(zip(_Tether_args, val)))
                     for val in zip(*_Tether_args.values())]
 
-
-def get_mesh_bond_and_args():
-    return _harmonic_arg_list + _FENE_arg_list + _Tether_arg_list
-
-
-def get_mesh_bond_args_forces_and_energies():
-    harmonic_forces = [[[-28.395, 16.393861, 0], [0, -32.787722, 0],
-                        [28.395, 16.393861, 0]],
-                       [[-27.4125, 15.826614, 0], [0, -31.653229, 0],
-                        [27.4125, 15.826614, 0]],
-                       [[-24.93, 14.393342, 0], [0, -28.786684, 0],
-                        [24.93, 14.393342, 0]]]
-    harmonic_energies = [17.9172, 20.0385, 20.7168]
-    FENE_forces = [[[-165.834803, 95.744768, 0], [0, -191.489537, 0],
-                    [165.834803, 95.744768, 0]],
-                   [[-9.719869, 5.611769, 0], [0., -11.223537, 0],
-                    [9.719869, 5.611769, 0]],
-                   [[33.483261, -19.331569, 0], [0, 38.663139, 0],
-                    [-33.483261, -19.331569, 0]]]
-    FENE_energies = [82.0225, 48.6153, 33.4625]
-    Tether_forces = [[[0, 0, 0], [0, 0, 0], [0, 0, 0]],
-                     [[-0.036666, 0.021169, 0], [0, -0.042339, 0],
-                      [0.036666, 0.021169, 0]],
-                     [[-5.358389, 3.093667, 0], [0, -6.187334, 0],
-                      [5.358389, 3.093667, 0]]]
-    Tether_energies = [0, 0.000463152, 0.1472802]
+_volume_args = {'k': [20.0, 50.0, 100.0], 'V0': [0.107227, 1, 0.01]}
+_volume_arg_list = [(hoomd.md.mesh.conservation.Volume,
+                     dict(zip(_volume_args, val)))
+                    for val in zip(*_volume_args.values())]
+
+
+def get_mesh_potential_and_args():
+    return (_harmonic_arg_list + _FENE_arg_list + _Tether_arg_list
+            + _volume_arg_list)
+
+
+def get_mesh_potential_args_forces_and_energies():
+    harmonic_forces = [[[37.86, 0., -26.771063], [-37.86, 0., -26.771063],
+                        [0., 37.86, 26.771063], [0., -37.86, 26.771063]],
+                       [[36.55, 0., -25.844753], [-36.55, 0., -25.844753],
+                        [0., 36.55, 25.844753], [0., -36.55, 25.844753]],
+                       [[33.24, 0., -23.504229], [-33.24, 0., -23.504229],
+                        [0., 33.24, 23.504229], [0., -33.24, 23.504229]]]
+    harmonic_energies = [35.83449, 40.077075, 41.43366]
+    FENE_forces = [[[221.113071, 0.,
+                     -156.350552], [-221.113071, 0., -156.350552],
+                    [0., 221.113071, 156.350552], [0., -221.113071,
+                                                   156.350552]],
+                   [[12.959825, 0., -9.16398], [-12.959825, 0., -9.16398],
+                    [0., 12.959825, 9.16398], [0., -12.959825, 9.16398]],
+                   [[-44.644347, 0., 31.568321], [44.644347, 0., 31.568321],
+                    [0., -44.644347, -31.568321], [0., 44.644347, -31.568321]]]
+    FENE_energies = [163.374213, 97.189301, 67.058202]
+    Tether_forces = [[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
+                     [[0.048888, 0., -0.034569], [-0.048888, 0., -0.034569],
+                      [0., 0.048888, 0.034569], [0., -0.048888, 0.034569]],
+                     [[7.144518, 0., -5.051937], [-7.144518, 0., -5.051937],
+                      [0., 7.144518, 5.051937], [0., -7.144518, 5.051937]]]
+    Tether_energies = [0, 0.000926, 0.294561]
+
+    volume_forces = [[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
+                     [[4.93960528, 0,
+                       -3.49282839], [-4.93960528, 0, -3.49282839],
+                      [0, 4.93960528, 3.49282839], [0, -4.93960528,
+                                                    3.49282839]],
+                     [[-107.5893328, 0, 76.0771468],
+                      [107.5893328, 0, 76.0771468],
+                      [0, -107.5893328, -76.0771468],
+                      [0, 107.5893328, -76.0771468]]]
+    volume_energies = [0, 19.92608051621174, 47.2656702899458]
 
     harmonic_args_and_vals = []
     FENE_args_and_vals = []
     Tether_args_and_vals = []
+    volume_args_and_vals = []
     for i in range(3):
         harmonic_args_and_vals.append(
             (*_harmonic_arg_list[i], harmonic_forces[i], harmonic_energies[i]))
@@ -68,77 +88,71 @@ def get_mesh_bond_args_forces_and_energies():
             (*_FENE_arg_list[i], FENE_forces[i], FENE_energies[i]))
         Tether_args_and_vals.append(
             (*_Tether_arg_list[i], Tether_forces[i], Tether_energies[i]))
-    return harmonic_args_and_vals + FENE_args_and_vals + Tether_args_and_vals
+        volume_args_and_vals.append(
+            (*_volume_arg_list[i], volume_forces[i], volume_energies[i]))
+    return (harmonic_args_and_vals + FENE_args_and_vals + Tether_args_and_vals
+            + volume_args_and_vals)
 
 
 @pytest.fixture(scope='session')
-def triplet_snapshot_factory(device):
-
-    def make_snapshot(d=1.0,
-                      theta_deg=60,
-                      particle_types=['A'],
-                      dimensions=3,
-                      L=20):
-        theta_rad = theta_deg * (np.pi / 180)
+def tetrahedron_snapshot_factory(device):
+
+    def make_snapshot(d=1.0, particle_types=['A'], L=20):
         s = hoomd.Snapshot(device.communicator)
-        N = 3
+        N = 4
         if s.communicator.rank == 0:
             box = [L, L, L, 0, 0, 0]
-            if dimensions == 2:
-                box[2] = 0
             s.configuration.box = box
             s.particles.N = N
 
-            base_positions = np.array(
-                [[-d * np.sin(theta_rad / 2), d * np.cos(theta_rad / 2), 0.0],
-                 [0.0, 0.0, 0.0],
-                 [d * np.sin(theta_rad / 2), d * np.cos(theta_rad / 2), 0.0]])
+            base_positions = np.array([[1.0, 0.0, -1.0 / np.sqrt(2.0)],
+                                       [-1.0, 0.0, -1.0 / np.sqrt(2.0)],
+                                       [0.0, 1.0, 1.0 / np.sqrt(2.0)],
+                                       [0.0, -1.0, 1.0 / np.sqrt(2.0)]])
             # move particles slightly in direction of MPI decomposition which
             # varies by simulation dimension
-            nudge_dimension = 2 if dimensions == 3 else 1
-            base_positions[:, nudge_dimension] += 0.1
-            s.particles.position[:] = base_positions
+            s.particles.position[:] = 0.5 * d * base_positions
             s.particles.types = particle_types
         return s
 
     return make_snapshot
 
 
-@pytest.mark.parametrize("mesh_bond_cls, potential_kwargs",
-                         get_mesh_bond_and_args())
-def test_before_attaching(mesh_bond_cls, potential_kwargs):
+@pytest.mark.parametrize("mesh_potential_cls, potential_kwargs",
+                         get_mesh_potential_and_args())
+def test_before_attaching(mesh_potential_cls, potential_kwargs):
     mesh = hoomd.mesh.Mesh()
-    mesh_bond_potential = mesh_bond_cls(mesh)
-    mesh_bond_potential.params["mesh"] = potential_kwargs
+    mesh_potential = mesh_potential_cls(mesh)
+    mesh_potential.params["mesh"] = potential_kwargs
 
-    assert mesh is mesh_bond_potential.mesh
+    assert mesh is mesh_potential.mesh
     for key in potential_kwargs:
-        np.testing.assert_allclose(mesh_bond_potential.params["mesh"][key],
+        np.testing.assert_allclose(mesh_potential.params["mesh"][key],
                                    potential_kwargs[key],
                                    rtol=1e-6)
 
     mesh1 = hoomd.mesh.Mesh()
-    mesh_bond_potential.mesh = mesh1
-    assert mesh1 is mesh_bond_potential.mesh
+    mesh_potential.mesh = mesh1
+    assert mesh1 is mesh_potential.mesh
 
 
-@pytest.mark.parametrize("mesh_bond_cls, potential_kwargs",
-                         get_mesh_bond_and_args())
-def test_after_attaching(triplet_snapshot_factory, simulation_factory,
-                         mesh_bond_cls, potential_kwargs):
-    snap = triplet_snapshot_factory(d=0.969, L=5)
+@pytest.mark.parametrize("mesh_potential_cls, potential_kwargs",
+                         get_mesh_potential_and_args())
+def test_after_attaching(tetrahedron_snapshot_factory, simulation_factory,
+                         mesh_potential_cls, potential_kwargs):
+    snap = tetrahedron_snapshot_factory(d=0.969, L=5)
     sim = simulation_factory(snap)
 
     mesh = hoomd.mesh.Mesh(name=["triags"])
-    mesh.size = 1
-    mesh.triangles = [[0, 1, 2]]
+    mesh.size = 4
+    mesh.triangles = [[2, 1, 0], [0, 1, 3], [2, 0, 3], [1, 2, 3]]
 
-    mesh_bond_potential = mesh_bond_cls(mesh)
-    mesh_bond_potential.params["triags"] = potential_kwargs
+    mesh_potential = mesh_potential_cls(mesh)
+    mesh_potential.params["triags"] = potential_kwargs
 
     integrator = hoomd.md.Integrator(dt=0.005)
 
-    integrator.forces.append(mesh_bond_potential)
+    integrator.forces.append(mesh_potential)
 
     langevin = hoomd.md.methods.Langevin(kT=1,
                                          filter=hoomd.filter.All(),
@@ -148,32 +162,33 @@ def test_after_attaching(triplet_snapshot_factory, simulation_factory,
 
     sim.run(0)
     for key in potential_kwargs:
-        np.testing.assert_allclose(mesh_bond_potential.params["triags"][key],
+        np.testing.assert_allclose(mesh_potential.params["triags"][key],
                                    potential_kwargs[key],
                                    rtol=1e-6)
 
     mesh1 = hoomd.mesh.Mesh()
     with pytest.raises(RuntimeError):
-        mesh_bond_potential.mesh = mesh1
+        mesh_potential.mesh = mesh1
 
 
-@pytest.mark.parametrize("mesh_bond_cls, potential_kwargs, force, energy",
-                         get_mesh_bond_args_forces_and_energies())
-def test_forces_and_energies(triplet_snapshot_factory, simulation_factory,
-                             mesh_bond_cls, potential_kwargs, force, energy):
-    snap = triplet_snapshot_factory(d=0.969, L=5)
+@pytest.mark.parametrize("mesh_potential_cls, potential_kwargs, force, energy",
+                         get_mesh_potential_args_forces_and_energies())
+def test_forces_and_energies(tetrahedron_snapshot_factory, simulation_factory,
+                             mesh_potential_cls, potential_kwargs, force,
+                             energy):
+    snap = tetrahedron_snapshot_factory(d=0.969, L=5)
     sim = simulation_factory(snap)
 
     mesh = hoomd.mesh.Mesh()
     mesh.size = 1
-    mesh.triangles = [[0, 1, 2]]
+    mesh.triangles = [[2, 1, 0], [0, 1, 3], [2, 0, 3], [1, 2, 3]]
 
-    mesh_bond_potential = mesh_bond_cls(mesh)
-    mesh_bond_potential.params["mesh"] = potential_kwargs
+    mesh_potential = mesh_potential_cls(mesh)
+    mesh_potential.params["mesh"] = potential_kwargs
 
     integrator = hoomd.md.Integrator(dt=0.005)
 
-    integrator.forces.append(mesh_bond_potential)
+    integrator.forces.append(mesh_potential)
 
     langevin = hoomd.md.methods.Langevin(kT=1,
                                          filter=hoomd.filter.All(),
@@ -193,35 +208,9 @@ def test_forces_and_energies(triplet_snapshot_factory, simulation_factory,
         np.testing.assert_allclose(sim_forces, force, rtol=1e-2, atol=1e-5)
 
 
-@pytest.fixture(scope='session')
-def mesh_snapshot_factory(device):
-
-    def make_snapshot(d=1.0, phi_deg=45, particle_types=['A'], L=20):
-        phi_rad = phi_deg * (np.pi / 180)
-        # the central particles are along the x-axis, so phi is determined from
-        # the angle in the yz plane.
-
-        s = hoomd.Snapshot(device.communicator)
-        N = 4
-        if s.communicator.rank == 0:
-            box = [L, L, L, 0, 0, 0]
-            s.configuration.box = box
-            s.particles.N = N
-            s.particles.types = particle_types
-            # shift particle positions slightly in z so MPI tests pass
-            s.particles.position[:] = [
-                [0.0, d * np.cos(phi_rad / 2), d * np.sin(phi_rad / 2) + 0.1],
-                [0.0, 0.0, 0.1], [d, 0.0, 0.1],
-                [d, d * np.cos(phi_rad / 2), -d * np.sin(phi_rad / 2) + 0.1]
-            ]
-
-        return s
-
-    return make_snapshot
-
-
-def test_auto_detach_simulation(simulation_factory, mesh_snapshot_factory):
-    sim = simulation_factory(mesh_snapshot_factory(d=0.969, L=5))
+def test_auto_detach_simulation(simulation_factory,
+                                tetrahedron_snapshot_factory):
+    sim = simulation_factory(tetrahedron_snapshot_factory(d=0.969, L=5))
     mesh = hoomd.mesh.Mesh()
     mesh.triangles = [[0, 1, 2], [0, 2, 3]]
 

From 1a555f859d333f0e18c7cbcd40eccf192c7fc318 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Fri, 28 Jan 2022 11:14:43 -0500
Subject: [PATCH 08/50] update VolumeConstraintGPU code and write partial sum
 code

---
 .../VolumeConservationMeshForceComputeGPU.cc  | 168 +++++++----
 .../VolumeConservationMeshForceComputeGPU.cu  | 283 ++++++++----------
 .../VolumeConservationMeshForceComputeGPU.cuh |   6 +-
 .../VolumeConservationMeshForceComputeGPU.h   |  44 +--
 4 files changed, 269 insertions(+), 232 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index 08941b3c9d..f838cd3c32 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -1,12 +1,12 @@
 // Copyright (c) 2009-2022 The Regents of the University of Michigan.
 // Part of HOOMD-blue, released under the BSD 3-Clause License.
 
-#include "MeshVolumeConservationGPU.h"
+#include "VolumeConservationMeshForceComputeGPU.h"
 
 using namespace std;
 
-/*! \file MeshVolumeConservationGPU.cc
-    \brief Contains code for the MeshVolumeConservationGPU class
+/*! \file VolumeConservationMeshForceComputeGPU.cc
+    \brief Contains code for the VolumeConservationMeshForceComputeGPU class
 */
 
 namespace hoomd
@@ -16,16 +16,17 @@ namespace md
 /*! \param sysdef System to compute forces on
     \post Memory is allocated, and forces are zeroed.
 */
-MeshVolumeConservationGPU::MeshVolumeConservationGPU(std::shared_ptr<SystemDefinition> sysdef,
-                                                     std::shared_ptr<MeshDefinition> meshdef)
-    : MeshVolumeConservation(sysdef, meshdef)
+VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
+    std::shared_ptr<SystemDefinition> sysdef,
+    std::shared_ptr<MeshDefinition> meshdef)
+    : VolumeConservationMeshForceCompute(sysdef, meshdef), m_block_size(256)
     {
     if (!m_exec_conf->isCUDAEnabled())
         {
-        m_exec_conf->msg->error()
-            << "Creating a MeshVolumeConservationGPU with no GPU in the execution configuration"
-            << endl;
-        throw std::runtime_error("Error initializing MeshVolumeConservationGPU");
+        m_exec_conf->msg->error() << "Creating a VolumeConservationMeshForceComputeGPU with no GPU "
+                                     "in the execution configuration"
+                                  << endl;
+        throw std::runtime_error("Error initializing VolumeConservationMeshForceComputeGPU");
         }
 
     // allocate and zero device memory
@@ -36,30 +37,32 @@ MeshVolumeConservationGPU::MeshVolumeConservationGPU(std::shared_ptr<SystemDefin
     GPUArray<unsigned int> flags(1, this->m_exec_conf);
     m_flags.swap(flags);
 
+    GPUArray<Scalar> sum(1, m_exec_conf);
+    m_sum.swap(sum);
+
+    unsigned int group_size = m_pdata->getN();
+
+    m_num_blocks = group_size / m_block_size + 1;
+    GPUArray<Scalar> partial_sum(m_num_blocks, m_exec_conf);
+    m_partial_sum.swap(partial_sum);
+
     // reset flags
     ArrayHandle<unsigned int> h_flags(m_flags, access_location::host, access_mode::overwrite);
     h_flags.data[0] = 0;
 
     unsigned int warp_size = this->m_exec_conf->dev_prop.warpSize;
-    m_tuner_force.reset(new Autotuner(warp_size,
-                                      1024,
-                                      warp_size,
-                                      5,
-                                      100000,
-                                      "vconstraint_forces",
-                                      this->m_exec_conf));
-    m_tuner_volume.reset(new Autotuner(warp_size,
-                                       1024,
-                                       warp_size,
-                                       5,
-                                       100000,
-                                       "vconstraint_volume",
-                                       this->m_exec_conf));
+    m_tuner.reset(new Autotuner(warp_size,
+                                1024,
+                                warp_size,
+                                5,
+                                100000,
+                                "vconstraint_forces",
+                                this->m_exec_conf));
     }
 
-void MeshVolumeConservationGPU::setParams(unsigned int type, Scalar K, Scalar V0)
+void VolumeConservationMeshForceComputeGPU::setParams(unsigned int type, Scalar K, Scalar V0)
     {
-    MeshVolumeConservation::setParams(type, K, V0);
+    VolumeConservationMeshForceCompute::setParams(type, K, V0);
 
     ArrayHandle<Scalar> h_params(m_params, access_location::host, access_mode::readwrite);
     // update the local copy of the memory
@@ -69,12 +72,14 @@ void MeshVolumeConservationGPU::setParams(unsigned int type, Scalar K, Scalar V0
 /*! Actually perform the force computation
     \param timestep Current time step
  */
-void MeshVolumeConservationGPU::computeForces(uint64_t timestep)
+void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
     {
     // start the profile
     if (this->m_prof)
         this->m_prof->push(this->m_exec_conf, "VolumeConstraint");
 
+    computeVolume();
+
     // access the particle data arrays
     ArrayHandle<Scalar4> d_pos(m_pdata->getPositions(), access_location::device, access_mode::read);
     ArrayHandle<int3> d_image(m_pdata->getImages(), access_location::device, access_mode::read);
@@ -98,25 +103,6 @@ void MeshVolumeConservationGPU::computeForces(uint64_t timestep)
         access_location::device,
         access_mode::read);
 
-    m_tuner_volume->begin();
-    kernel::gpu_compute_volume_constraint_volume(m_volume,
-                                                 m_pdata->getN(),
-                                                 d_pos.data,
-                                                 d_image.data,
-                                                 box,
-                                                 d_gpu_meshtrianglelist.data,
-                                                 d_gpu_meshtriangle_pos_list,
-                                                 gpu_table_indexer,
-                                                 d_gpu_n_meshtriangle.data,
-                                                 m_tuner_sigma->getParam());
-
-    if (this->m_exec_conf->isCUDAErrorCheckingEnabled())
-        {
-        CHECK_CUDA_ERROR();
-        }
-
-    m_tuner_volume->end();
-
     ArrayHandle<Scalar4> d_force(m_force, access_location::device, access_mode::overwrite);
     ArrayHandle<Scalar> d_virial(m_virial, access_location::device, access_mode::overwrite);
     ArrayHandle<Scalar> d_params(m_params, access_location::device, access_mode::read);
@@ -124,7 +110,7 @@ void MeshVolumeConservationGPU::computeForces(uint64_t timestep)
     // access the flags array for overwriting
     ArrayHandle<unsigned int> d_flags(m_flags, access_location::device, access_mode::readwrite);
 
-    m_tuner_force->begin();
+    m_tuner->begin();
     kernel::gpu_compute_volume_constraint_force(d_force.data,
                                                 d_virial.data,
                                                 m_virial.getPitch(),
@@ -139,7 +125,7 @@ void MeshVolumeConservationGPU::computeForces(uint64_t timestep)
                                                 d_gpu_n_meshtriangle.data,
                                                 d_params.data,
                                                 m_mesh_data->getMeshTriangleData()->getNTypes(),
-                                                m_tuner_force->getParam(),
+                                                m_tuner->getParam(),
                                                 d_flags.data);
 
     if (this->m_exec_conf->isCUDAErrorCheckingEnabled())
@@ -157,7 +143,81 @@ void MeshVolumeConservationGPU::computeForces(uint64_t timestep)
             throw std::runtime_error("Error in meshtriangle calculation");
             }
         }
-    m_tuner_force->end();
+    m_tuner->end();
+
+    if (this->m_prof)
+        this->m_prof->pop(this->m_exec_conf);
+    }
+
+/*! Actually perform the force computation
+    \param timestep Current time step
+ */
+void VolumeConservationMeshForceComputeGPU::computeVolume()
+    {
+    // start the profile
+    if (this->m_prof)
+        this->m_prof->push(this->m_exec_conf, "VolumeCalculation");
+
+    // access the particle data arrays
+    ArrayHandle<Scalar4> d_pos(m_pdata->getPositions(), access_location::device, access_mode::read);
+    ArrayHandle<int3> d_image(m_pdata->getImages(), access_location::device, access_mode::read);
+
+    BoxDim box = this->m_pdata->getGlobalBox();
+
+    m_num_blocks = m_pdata->getN() / m_block_size + 1;
+
+    const GPUArray<typename MeshTriangle::members_t>& gpu_meshtriangle_list
+        = this->m_mesh_data->getMeshTriangleData()->getGPUTable();
+    const Index2D& gpu_table_indexer
+        = this->m_mesh_data->getMeshTriangleData()->getGPUTableIndexer();
+
+    ArrayHandle<typename MeshTriangle::members_t> d_gpu_meshtrianglelist(gpu_meshtriangle_list,
+                                                                         access_location::device,
+                                                                         access_mode::read);
+    ArrayHandle<unsigned int> d_gpu_meshtriangle_pos_list(
+        m_mesh_data->getMeshTriangleData()->getGPUPosTable(),
+        access_location::device,
+        access_mode::read);
+    ArrayHandle<unsigned int> d_gpu_n_meshtriangle(
+        this->m_mesh_data->getMeshTriangleData()->getNGroupsArray(),
+        access_location::device,
+        access_mode::read);
+
+    ArrayHandle<Scalar> d_partial_sumVol(m_partial_sum,
+                                         access_location::device,
+                                         access_mode::overwrite);
+    ArrayHandle<Scalar> d_sumVol(m_sum, access_location::device, access_mode::overwrite);
+
+    kernel::gpu_compute_volume_constraint_volume(d_sumVol,
+                                                 d_partial_sumVol m_pdata->getN(),
+                                                 d_pos.data,
+                                                 d_image.data,
+                                                 box,
+                                                 d_gpu_meshtrianglelist.data,
+                                                 d_gpu_meshtriangle_pos_list,
+                                                 gpu_table_indexer,
+                                                 d_gpu_n_meshtriangle.data,
+                                                 m_block_size,
+                                                 m_num_blocks);
+
+    if (this->m_exec_conf->isCUDAErrorCheckingEnabled())
+        {
+        CHECK_CUDA_ERROR();
+        }
+
+    ArrayHandle<Scalar> h_sumVol(m_sum, access_location::host, access_mode::read);
+#ifdef ENABLE_MPI
+    if (m_sysdef->isDomainDecomposed())
+        {
+        MPI_Allreduce(MPI_IN_PLACE,
+                      &h_sumVol.data[0],
+                      1,
+                      MPI_HOOMD_SCALAR,
+                      MPI_SUM,
+                      m_exec_conf->getMPICommunicator());
+        }
+#endif
+    m_volume = h_sumVol.data[0];
 
     if (this->m_prof)
         this->m_prof->pop(this->m_exec_conf);
@@ -165,11 +225,13 @@ void MeshVolumeConservationGPU::computeForces(uint64_t timestep)
 
 namespace detail
     {
-void export_MeshVolumeConservationGPU(pybind11::module& m)
+void export_VolumeConservationMeshForceComputeGPU(pybind11::module& m)
     {
-    pybind11::class_<MeshVolumeConservationGPU,
-                     MeshVolumeConservation,
-                     std::shared_ptr<MeshVolumeConservationGPU>>(m, "MeshVolumeConservationGPU")
+    pybind11::class_<VolumeConservationMeshForceComputeGPU,
+                     VolumeConservationMeshForceCompute,
+                     std::shared_ptr<VolumeConservationMeshForceComputeGPU>>(
+        m,
+        "VolumeConservationMeshForceComputeGPU")
         .def(pybind11::init<std::shared_ptr<SystemDefinition>, std::shared_ptr<MeshDefinition>>());
     }
 
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index c5091418b7..2febf45ccc 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -35,7 +35,7 @@ namespace kernel
     \param d_triangles device array of mesh triangles
     \param n_bonds_list List of numbers of mesh bonds stored on the GPU
 */
-__global__ void gpu_compute_volume_constraint_volume_kernel(Scalar volume,
+__global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_volume_partial_sum,
                                                             const unsigned int N,
                                                             const Scalar4* d_pos,
                                                             const unsigned int* d_rtag,
@@ -46,159 +46,116 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar volume,
                                                             const unsigned int* n_triangles_list)
     {
     // start by identifying which particle we are to handle
-    int idx = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if (idx >= N)
-        return;
-
-    // load in the length of the list for this thread (MEM TRANSFER: 4 bytes)
-    int n_bonds = n_bonds_list[idx];
-
-    // read in the position of our b-particle from the a-b-c triplet. (MEM TRANSFER: 16 bytes)
-    Scalar4 postype = __ldg(d_pos + idx);
-    Scalar3 pos = make_scalar3(postype.x, postype.y, postype.z);
+    HIP_DYNAMIC_SHARED(char, s_data)
+    Scalar* s_gammas = (Scalar*)s_data;
 
-    // initialize the force to 0
-    Scalar3 sigma_dash = make_scalar3(Scalar(0.0), Scalar(0.0), Scalar(0.0));
+    int idx = blockIdx.x * blockDim.x + threadIdx.x;
 
-    Scalar sigma = 0.0;
+    Scalar volume_transfer = 0;
 
-    // loop over all angles
-    for (int bond_idx = 0; bond_idx < n_bonds; bond_idx++)
+    if (idx < N)
         {
-        group_storage<4> cur_bond = blist[blist_idx(idx, bond_idx)];
-
-        int cur_bond_idx = cur_bond.idx[0];
-        int cur_tr1_idx = cur_bond.idx[1];
-        int cur_tr2_idx = cur_bond.idx[2];
-
-        if (cur_tr1_idx == cur_tr2_idx)
-            continue;
-
-        const group_storage<6>& triangle1 = d_triangles[cur_tr1_idx];
-
-        unsigned int cur_idx_c = d_rtag[triangle1.tag[0]];
-
-        unsigned int iterator = 1;
-        while (idx == cur_idx_c || cur_bond_idx == cur_idx_c)
-            {
-            cur_idx_c = d_rtag[triangle1.tag[iterator]];
-            iterator++;
-            }
+        int n_triangles = n_triangles_list[idx];
+        Scalar4 postype = d_pos[idx];
+        Scalar3 pos = make_scalar3(postype.x, postype.y, postype.z);
 
-        const group_storage<6>& triangle2 = d_triangles[cur_tr2_idx];
+        int3 image_a = d_image[dx];
 
-        unsigned int cur_idx_d = d_rtag[triangle2.tag[0]];
+        vec3<Scalar> pos_a = box.shift(pos, image_a);
 
-        iterator = 1;
-        while (idx == cur_idx_d || cur_bond_idx == cur_idx_d)
+        for (int triangle_idx = 0; triangle_idx < n_triangles; triangle_idx++)
             {
-            cur_idx_d = d_rtag[triangle2.tag[iterator]];
-            iterator++;
+            group_storage<6> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
+
+            int cur_triangle_b = cur_triangle.idx[0];
+            int cur_triangle_c = cur_triangle.idx[1];
+
+            int cur_triangle_abc = tpos_list[tlist_idx(idx, triangle_idx)];
+
+            // get the b-particle's position (MEM TRANSFER: 16 bytes)
+            Scalar4 bb_postype = d_pos[cur_triangle_b];
+            Scalar3 bb_pos = make_scalar3(bb_postype.x, bb_postype.y, bb_postype.z);
+            int3 image_b = d_image[cur_triangle_b] vec3<Scalar> pos_b = box.shift(bb_pos, image_b);
+
+            // get the c-particle's position (MEM TRANSFER: 16 bytes)
+            Scalar4 cc_postype = d_pos[cur_triangle_c];
+            Scalar3 cc_pos = make_scalar3(cc_postype.x, cc_postype.y, cc_postype.z);
+            int3 image_c = d_image[cur_triangle_c] vec3<Scalar> pos_c = box.shift(cc_pos, image_c);
+
+            Scalar Vol;
+            if (cur_triangle_abc == 1)
+                {
+                Vol = dot(cross(pos_b, pos_c), pos_a);
+                }
+            else
+                {
+                Vol = dot(cross(pos_c, pos_b), pos_a);
+                }
+            volume_transfer += Vol / 6;
             }
+        }
 
-        // get the b-particle's position (MEM TRANSFER: 16 bytes)
-        Scalar4 bb_postype = d_pos[cur_bond_idx];
-        Scalar3 bb_pos = make_scalar3(bb_postype.x, bb_postype.y, bb_postype.z);
-        // get the c-particle's position (MEM TRANSFER: 16 bytes)
-        Scalar4 cc_postype = d_pos[cur_idx_c];
-        Scalar3 cc_pos = make_scalar3(cc_postype.x, cc_postype.y, cc_postype.z);
-        // get the c-particle's position (MEM TRANSFER: 16 bytes)
-        Scalar4 dd_postype = d_pos[cur_idx_d];
-        Scalar3 dd_pos = make_scalar3(dd_postype.x, dd_postype.y, dd_postype.z);
-
-        Scalar3 dab = pos - bb_pos;
-        Scalar3 dac = pos - cc_pos;
-        Scalar3 dad = pos - dd_pos;
-        Scalar3 dbc = bb_pos - cc_pos;
-        Scalar3 dbd = bb_pos - dd_pos;
-
-        dab = box.minImage(dab);
-        dac = box.minImage(dac);
-        dad = box.minImage(dad);
-        dbc = box.minImage(dbc);
-        dbd = box.minImage(dbd);
-
-        // on paper, the formula turns out to be: F = K*\vec{r} * (r_0/r - 1)
-        // FLOPS: 14 / MEM TRANSFER: 2 Scalars
-
-        // FLOPS: 42 / MEM TRANSFER: 6 Scalars
-        Scalar rsqab = dab.x * dab.x + dab.y * dab.y + dab.z * dab.z;
-        Scalar rac = dac.x * dac.x + dac.y * dac.y + dac.z * dac.z;
-        rac = sqrt(rac);
-        Scalar rad = dad.x * dad.x + dad.y * dad.y + dad.z * dad.z;
-        rad = sqrt(rad);
-
-        Scalar rbc = dbc.x * dbc.x + dbc.y * dbc.y + dbc.z * dbc.z;
-        rbc = sqrt(rbc);
-        Scalar rbd = dbd.x * dbd.x + dbd.y * dbd.y + dbd.z * dbd.z;
-        rbd = sqrt(rbd);
-
-        Scalar3 nab, nac, nad, nbc, nbd;
-        nab = dab / rab;
-        nac = dac / rac;
-        nad = dad / rad;
-        nbc = dbc / rbc;
-        nbd = dbd / rbd;
-
-        Scalar c_accb = nac.x * nbc.x + nac.y * nbc.y + nac.z * nbc.z;
-
-        if (c_accb > 1.0)
-            c_accb = 1.0;
-        if (c_accb < -1.0)
-            c_accb = -1.0;
-
-        Scalar c_addb = nad.x * nbd.x + nad.y * nbd.y + nad.z * nbd.z;
+    Scalar* volume_sdata = (Scalar*)&s_data[0];
 
-        if (c_addb > 1.0)
-            c_addb = 1.0;
-        if (c_addb < -1.0)
-            c_addb = -1.0;
+    __syncthreads();
+    volume_sdata[threadIdx.x] = volume_transfer;
+    __syncthreads();
 
-        vec3<Scalar> nbac
-            = cross(vec3<Scalar>(nab.x, nab.y, nab.z), vec3<Scalar>(nac.x, nac.y, nac.z));
+    // reduce the sum in parallel
+    int offs = blockDim.x >> 1;
+    while (offs > 0)
+        {
+        if (threadIdx.x < offs)
+            volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
+        offs >>= 1;
+        __syncthreads();
+        }
 
-        Scalar inv_nbac = 1.0 / sqrt(dot(nbac, nbac));
+    // write out our partial sum
+    if (threadIdx.x == 0)
+        {
+        d_partial_sum_volume[blockIdx.x] = volume_sdata[0];
+        }
+    }
 
-        vec3<Scalar> nbad
-            = cross(vec3<Scalar>(nab.x, nab.y, nab.z), vec3<Scalar>(nad.x, nad.y, nad.z));
+//! Kernel function for reducing a partial sum to a full sum (one value)
+/*! \param d_sum Placeholder for the sum
+    \param d_partial_sum Array containing the partial sum
+    \param num_blocks Number of blocks to execute
+*/
+__global__ void
+gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum, Scalar* d_partial_sum, unsigned int num_blocks)
+    {
+    Scalar sum = Scalar(0.0);
+    HIP_DYNAMIC_SHARED(char, s_data)
+    Scalar* volume_sdata = (Scalar*)&s_data[0];
 
-        Scalar inv_nbad = 1.0 / sqrt(dot(nbad, nbad));
+    // sum up the values in the partial sum via a sliding window
+    for (int start = 0; start < num_blocks; start += blockDim.x)
+        {
+        __syncthreads();
+        if (start + threadIdx.x < num_blocks)
+            volume_sdata[threadIdx.x] = d_partial_sum[start + threadIdx.x];
+        else
+            volume_sdata[threadIdx.x] = Scalar(0.0);
+        __syncthreads();
 
-        if (dot(nbac, nbad) * inv_nbad * inv_nbac > 0.9)
+        // reduce the sum in parallel
+        int offs = blockDim.x >> 1;
+        while (offs > 0)
             {
-            this->m_exec_conf->msg->error() << "volume_constraint calculations : triangles "
-                                            << tr_idx1 << " " << tr_idx2 << " overlap." << std::endl
-                                            << std::endl;
-            throw std::runtime_error("Error in bending energy calculation");
+            if (threadIdx.x < offs)
+                volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
+            offs >>= 1;
+            __syncthreads();
             }
 
-        Scalar inv_s_accb = sqrt(1.0 - c_accb * c_accb);
-        if (inv_s_accb < SMALL)
-            inv_s_accb = SMALL;
-        inv_s_accb = 1.0 / inv_s_accb;
-
-        Scalar inv_s_addb = sqrt(1.0 - c_addb * c_addb);
-        if (inv_s_addb < SMALL)
-            inv_s_addb = SMALL;
-        inv_s_addb = 1.0 / inv_s_addb;
-
-        Scalar cot_accb = c_accb * inv_s_accb;
-        Scalar cot_addb = c_addb * inv_s_addb;
-
-        Scalar sigma_hat_ab = (cot_accb + cot_addb) / 2;
-
-        Scalar sigma_a = sigma_hat_ab * rsqab * 0.25;
-
-        Scalar3 sigma_dash_a = sigma_hat_ab * dab;
-
-        sigma += sigma_a;
-        sigma_dash += sigma_dash_a;
+        // everybody sums up sum2K
+        sum += volume_sdata[0];
         }
 
-    // now that the force calculation is complete, write out the result (MEM TRANSFER: 20 bytes)
-    d_sigma[idx] = sigma;
-    d_sigma_dash[idx] = sigma_dash;
+    if (threadIdx.x == 0)
+        *d_sum = sum;
     }
 
 /*! \param d_sigma Device memory to write per paricle sigma
@@ -216,7 +173,8 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar volume,
     \returns Any error code resulting from the kernel launch
     \note Always returns hipSuccess in release builds to avoid the hipDeviceSynchronize()
 */
-hipError_t gpu_compute_volume_constraint_volume(Scalar volume,
+hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
+                                                Scalar* d_sum_partial_volume,
                                                 const unsigned int N,
                                                 const Scalar4* d_pos,
                                                 const int3* d_image,
@@ -225,34 +183,39 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar volume,
                                                 const unsigned int* tpos_list,
                                                 const Index2D tlist_idx,
                                                 const unsigned int* n_triangles_list,
-                                                int block_size)
+                                                unsigned int block_size,
+                                                unsigned int num_blocks)
     {
-    unsigned int max_block_size;
-    hipFuncAttributes attr;
-    hipFuncGetAttributes(&attr, (const void*)gpu_compute_volume_constraint_volume_kernel);
-    max_block_size = attr.maxThreadsPerBlock;
-
-    unsigned int run_block_size = min(block_size, max_block_size);
-
-    // setup the grid to run the kernel
-    dim3 grid(N / run_block_size + 1, 1, 1);
-    dim3 threads(run_block_size, 1, 1);
+    dim3 grid(num_blocks, 1, 1);
+    dim3 grid1(1, 1, 1);
+    dim3 threads(block_size, 1, 1);
+    dim3 threads1(256, 1, 1);
 
     // run the kernel
-    hipLaunchKernelGGL((gpu_compute_volume_constraint_volume_kernel),
-                       dim3(grid),
-                       dim3(threads),
-                       0,
+    hipLaunchKernelGGL(
+        (gpu_compute_volume_constraint_volume_kernel),
+        grid,
+        threads,
+        max((unsigned int)(sizeof(Scalar)), (unsigned int)(block_size * sizeof(Scalar))),
+        0,
+        d_sum_partial_volume,
+        N,
+        d_pos,
+        d_image,
+        box,
+        tlist,
+        tpos_list,
+        tlist_idx,
+        n_triangles_list);
+
+    hipLaunchKernelGGL((gpu_volume_reduce_partial_sum_kernel),
+                       dim3(grid1),
+                       dim3(threads1),
+                       block_size * sizeof(Scalar),
                        0,
-                       volume,
-                       N,
-                       d_pos,
-                       d_image,
-                       box,
-                       tlist,
-                       tpos_list,
-                       tlist_idx,
-                       n_triangles_list);
+                       d_sum_volume[0],
+                       d_partial_sum_volume,
+                       num_blocks);
 
     return hipSuccess;
     }
@@ -297,7 +260,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
         return;
 
     // load in the length of the list for this thread (MEM TRANSFER: 4 bytes)
-    int n_bonds = n_bonds_list[idx];
+    int n_triangles = n_triangles_list[idx];
 
     // read in the position of our b-particle from the a-b-c triplet. (MEM TRANSFER: 16 bytes)
     Scalar4 postype = __ldg(d_pos + idx);
@@ -314,7 +277,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
     for (int i = 0; i < 6; i++)
         virial[i] = Scalar(0.0);
 
-    // loop over all angles
+    // loop over all triangles
     for (int triangle_idx = 0; triangle_idx < n_triangles; triangle_idx++)
         {
         group_storage<6> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
index 7ee3a64f16..5b4399e75e 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
@@ -21,7 +21,8 @@ namespace md
 namespace kernel
     {
 //! Kernel driver that computes the volume for MeshVolumeConservationGPU
-hipError_t gpu_compute_volume_constraint_volume(Scalar volume,
+hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
+                                                Scalar* d_sum_partial_volume,
                                                 const unsigned int N,
                                                 const Scalar4* d_pos,
                                                 const int3* d_image,
@@ -30,7 +31,8 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar volume,
                                                 const unsigned int* tpos_list,
                                                 const Index2D tlist_idx,
                                                 const unsigned int* n_triangles_list,
-                                                int block_size);
+                                                unsigned int block_size,
+                                                unsigned int num_blocks);
 
 //! Kernel driver that computes the forces for MeshVolumeConservationGPU
 hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.h b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
index d7c4831980..833306e622 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.h
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
@@ -1,13 +1,13 @@
 // Copyright (c) 2009-2022 The Regents of the University of Michigan.
 // Part of HOOMD-blue, released under the BSD 3-Clause License.
 
-#include "MeshVolumeConservation.h"
-#include "MeshVolumeConservationGPU.cuh"
+#include "VolumeConservationMeshForceCompute.h"
+#include "VolumeConservationMeshForceComputeGPU.cuh"
 #include "hoomd/Autotuner.h"
 
 #include <memory>
 
-/*! \file MeshVolumeConservationGPU.h
+/*! \file VolumeConservationMeshForceComputeGPU.h
     \brief Declares a class for computing volume constraint forces on the GPU
 */
 
@@ -28,12 +28,13 @@ namespace md
 
     \ingroup computes
 */
-class PYBIND11_EXPORT MeshVolumeConservationGPU : public MeshVolumeConservation
+class PYBIND11_EXPORT VolumeConservationMeshForceComputeGPU
+    : public VolumeConservationMeshForceCompute
     {
     public:
     //! Constructs the compute
-    MeshVolumeConservationGPU(std::shared_ptr<SystemDefinition> sysdef,
-                              std::shared_ptr<MeshDefinition> meshdef);
+    VolumeConservationMeshForceComputeGPU(std::shared_ptr<SystemDefinition> sysdef,
+                                          std::shared_ptr<MeshDefinition> meshdef);
 
     //! Set autotuner parameters
     /*! \param enable Enable/disable autotuning
@@ -41,30 +42,39 @@ class PYBIND11_EXPORT MeshVolumeConservationGPU : public MeshVolumeConservation
     */
     virtual void setAutotunerParams(bool enable, unsigned int period)
         {
-        MeshVolumeConservation::setAutotunerParams(enable, period);
-        m_tuner_force->setPeriod(period);
-        m_tuner_force->setEnabled(enable);
-        m_tuner_volume->setPeriod(period);
-        m_tuner_volume->setEnabled(enable);
+        VolumeConservationMeshForceCompute::setAutotunerParams(enable, period);
+        m_tuner->setPeriod(period);
+        m_tuner->setEnabled(enable);
         }
 
     //! Set the parameters
     virtual void setParams(unsigned int type, Scalar K, Scalar V0);
 
     protected:
-    std::unique_ptr<Autotuner> m_tuner_force;  //!< Autotuner for block size of force loop
-    std::unique_ptr<Autotuner> m_tuner_volume; //!< Autotuner for block size of volume loop
-    GPUArray<unsigned int> m_flags;            //!< Flags set during the kernel execution
-    GPUArray<Scalar> m_params;                 //!< Parameters stored on the GPU
+    unsigned int m_block_size; //!< block size for partial sum memory
+
+    std::unique_ptr<Autotuner> m_tuner; //!< Autotuner for block size of force loop
+    GPUArray<unsigned int> m_flags;     //!< Flags set during the kernel execution
+    GPUArray<Scalar> m_params;          //!< Parameters stored on the GPU
+
+    GPUVector<Scalar> m_partial_sum; //!< memory space for partial sum over volume
+    GPUArray<Scalar> m_sum;          //!< memory space for sum over volume
 
     //! Actually compute the forces
     virtual void computeForces(uint64_t timestep);
+
+    //! compute volumes
+    virtual void computeVolume();
+
+    private:
+    //! allocate the memory needed to store partial sums
+    void resizePartialSumArrays();
     };
 
 namespace detail
     {
-//! Exports the MeshVolumeConservationGPU class to python
-void export_MeshVolumeConservationGPU(pybind11::module& m);
+//! Exports the VolumeConservationMeshForceComputeGPU class to python
+void export_VolumeConservationMeshForceComputeGPU(pybind11::module& m);
 
     } // end namespace detail
     } // end namespace md

From 5cdeee37681d8d9f97a31addfc6cb94749668ea9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Fri, 28 Jan 2022 11:19:06 -0500
Subject: [PATCH 09/50] link Volume constraint to hoomd

---
 hoomd/md/CMakeLists.txt                          | 3 +++
 hoomd/md/VolumeConservationMeshForceCompute.h    | 2 +-
 hoomd/md/VolumeConservationMeshForceComputeGPU.h | 4 ++--
 hoomd/md/module-md.cc                            | 3 +++
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/hoomd/md/CMakeLists.txt b/hoomd/md/CMakeLists.txt
index ad127d5ddd..1a3a8b34aa 100644
--- a/hoomd/md/CMakeLists.txt
+++ b/hoomd/md/CMakeLists.txt
@@ -192,6 +192,7 @@ set(_md_headers ActiveForceComputeGPU.h
                 TwoStepNVTMTKGPU.h
                 TwoStepNVTMTK.h
 		VolumeConservationMeshForceCompute.h
+		VolumeConservationMeshForceComputeGPU.h
                 WallData.h
                 ZeroMomentumUpdater.h
                 )
@@ -223,6 +224,7 @@ list(APPEND _md_sources ActiveForceComputeGPU.cc
                            TwoStepNPTMTKGPU.cc
                            TwoStepNVEGPU.cc
                            TwoStepNVTMTKGPU.cc
+			   VolumeConservationMeshForceComputeGPU.cc
                            MuellerPlatheFlowGPU.cc
                            CosineSqAngleForceComputeGPU.cc
                            )
@@ -284,6 +286,7 @@ set(_md_cu_sources ActiveForceComputeGPU.cu
                       TwoStepNVEGPU.cu
                       TwoStepRATTLENVEGPU.cu
                       TwoStepNVTMTKGPU.cu
+		      VolumeConservationMeshForceComputeGPU.cu
                       MuellerPlatheFlowGPU.cu
                       CosineSqAngleForceGPU.cu
                       all_kernels_diamond_manifold.cu
diff --git a/hoomd/md/VolumeConservationMeshForceCompute.h b/hoomd/md/VolumeConservationMeshForceCompute.h
index 77974aa167..080a3a25a7 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.h
+++ b/hoomd/md/VolumeConservationMeshForceCompute.h
@@ -100,7 +100,7 @@ class PYBIND11_EXPORT VolumeConservationMeshForceCompute : public ForceCompute
     //! Actually compute the forces
     virtual void computeForces(uint64_t timestep);
 
-    //! compute normals
+    //! compute volumes
     virtual void computeVolume();
     };
 
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.h b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
index 833306e622..5636762a5c 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.h
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
@@ -15,8 +15,8 @@
 #error This header cannot be compiled by nvcc
 #endif
 
-#ifndef __MESHVOLUMECONSERVATION_GPU_H__
-#define __MESHVOLUMECONSERVATION_GPU_H__
+#ifndef __VOLUMECONSERVATIONMESHFORCECOMPUTE_GPU_H__
+#define __VOLUMECONSERVATIONMESHFORCECOMPUTE_GPU_H__
 
 namespace hoomd
     {
diff --git a/hoomd/md/module-md.cc b/hoomd/md/module-md.cc
index bfb4884bc1..6d95a96cb3 100644
--- a/hoomd/md/module-md.cc
+++ b/hoomd/md/module-md.cc
@@ -102,6 +102,7 @@
 #include "TwoStepRATTLEBDGPU.h"
 #include "TwoStepRATTLELangevinGPU.h"
 #include "TwoStepRATTLENVEGPU.h"
+#include "VolumeConservationMeshForceComputeGPU.h"
 #endif
 
 #include <pybind11/pybind11.h>
@@ -473,6 +474,8 @@ PYBIND11_MODULE(_md, m)
     export_TwoStepRATTLENVEGPU<ManifoldXYPlane>(m, "TwoStepRATTLENVEPlaneGPU");
     export_TwoStepRATTLENVEGPU<ManifoldPrimitive>(m, "TwoStepRATTLENVEPrimitiveGPU");
     export_TwoStepRATTLENVEGPU<ManifoldSphere>(m, "TwoStepRATTLENVESphereGPU");
+
+    export_VolumeConservationMeshForceComputeGPU(m);
 #endif
 
     // manifolds

From 88df923f7993cd269cbd869f121c8e0c4f6e97a5 Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@mays.engin.umich.edu>
Date: Fri, 28 Jan 2022 12:02:31 -0500
Subject: [PATCH 10/50] compiling GPU code (VolumeConstraint)

---
 .../VolumeConservationMeshForceComputeGPU.cc  | 13 ++---
 .../VolumeConservationMeshForceComputeGPU.cu  | 48 ++++++++++---------
 .../VolumeConservationMeshForceComputeGPU.cuh |  2 +-
 .../VolumeConservationMeshForceComputeGPU.h   |  5 +-
 4 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index f838cd3c32..5d711d0b3a 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -30,7 +30,7 @@ VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
         }
 
     // allocate and zero device memory
-    GPUArray<Scalar2> params(this->m_angle_data->getNTypes(), m_exec_conf);
+    GPUArray<Scalar2> params(this->m_mesh_data->getMeshTriangleData()->getNTypes(), m_exec_conf);
     m_params.swap(params);
 
     // allocate flags storage on the GPU
@@ -64,7 +64,7 @@ void VolumeConservationMeshForceComputeGPU::setParams(unsigned int type, Scalar
     {
     VolumeConservationMeshForceCompute::setParams(type, K, V0);
 
-    ArrayHandle<Scalar> h_params(m_params, access_location::host, access_mode::readwrite);
+    ArrayHandle<Scalar2> h_params(m_params, access_location::host, access_mode::readwrite);
     // update the local copy of the memory
     h_params.data[type] = make_scalar2(K, V0);
     }
@@ -105,7 +105,7 @@ void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
 
     ArrayHandle<Scalar4> d_force(m_force, access_location::device, access_mode::overwrite);
     ArrayHandle<Scalar> d_virial(m_virial, access_location::device, access_mode::overwrite);
-    ArrayHandle<Scalar> d_params(m_params, access_location::device, access_mode::read);
+    ArrayHandle<Scalar2> d_params(m_params, access_location::device, access_mode::read);
 
     // access the flags array for overwriting
     ArrayHandle<unsigned int> d_flags(m_flags, access_location::device, access_mode::readwrite);
@@ -188,13 +188,14 @@ void VolumeConservationMeshForceComputeGPU::computeVolume()
                                          access_mode::overwrite);
     ArrayHandle<Scalar> d_sumVol(m_sum, access_location::device, access_mode::overwrite);
 
-    kernel::gpu_compute_volume_constraint_volume(d_sumVol,
-                                                 d_partial_sumVol m_pdata->getN(),
+    kernel::gpu_compute_volume_constraint_volume(d_sumVol.data,
+                                                 d_partial_sumVol.data, 
+						 m_pdata->getN(),
                                                  d_pos.data,
                                                  d_image.data,
                                                  box,
                                                  d_gpu_meshtrianglelist.data,
-                                                 d_gpu_meshtriangle_pos_list,
+                                                 d_gpu_meshtriangle_pos_list.data,
                                                  gpu_table_indexer,
                                                  d_gpu_n_meshtriangle.data,
                                                  m_block_size,
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index 2febf45ccc..77cf9c465d 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -5,7 +5,7 @@
 // Copyright (c) 2009-2021 The Regents of the University of Michigan
 // This file is part of the HOOMD-blue project, released under the BSD 3-Clause License.
 
-#include "MeshVolumeConservationGPU.cuh"
+#include "VolumeConservationMeshForceComputeGPU.cuh"
 #include "hoomd/TextureTools.h"
 
 #include <assert.h>
@@ -35,10 +35,10 @@ namespace kernel
     \param d_triangles device array of mesh triangles
     \param n_bonds_list List of numbers of mesh bonds stored on the GPU
 */
-__global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_volume_partial_sum,
+__global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_sum_volume,
                                                             const unsigned int N,
                                                             const Scalar4* d_pos,
-                                                            const unsigned int* d_rtag,
+                                                            const int3* d_image,
                                                             const BoxDim& box,
                                                             const group_storage<6>* tlist,
                                                             const unsigned int* tpos_list,
@@ -47,7 +47,6 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_volume_par
     {
     // start by identifying which particle we are to handle
     HIP_DYNAMIC_SHARED(char, s_data)
-    Scalar* s_gammas = (Scalar*)s_data;
 
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
 
@@ -57,9 +56,9 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_volume_par
         {
         int n_triangles = n_triangles_list[idx];
         Scalar4 postype = d_pos[idx];
-        Scalar3 pos = make_scalar3(postype.x, postype.y, postype.z);
+        vec3<Scalar> pos(postype.x, postype.y, postype.z);
 
-        int3 image_a = d_image[dx];
+        int3 image_a = d_image[idx];
 
         vec3<Scalar> pos_a = box.shift(pos, image_a);
 
@@ -74,13 +73,15 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_volume_par
 
             // get the b-particle's position (MEM TRANSFER: 16 bytes)
             Scalar4 bb_postype = d_pos[cur_triangle_b];
-            Scalar3 bb_pos = make_scalar3(bb_postype.x, bb_postype.y, bb_postype.z);
-            int3 image_b = d_image[cur_triangle_b] vec3<Scalar> pos_b = box.shift(bb_pos, image_b);
+            vec3<Scalar> bb_pos(bb_postype.x, bb_postype.y, bb_postype.z);
+            int3 image_b = d_image[cur_triangle_b]; 
+	    vec3<Scalar> pos_b = box.shift(bb_pos, image_b);
 
             // get the c-particle's position (MEM TRANSFER: 16 bytes)
             Scalar4 cc_postype = d_pos[cur_triangle_c];
-            Scalar3 cc_pos = make_scalar3(cc_postype.x, cc_postype.y, cc_postype.z);
-            int3 image_c = d_image[cur_triangle_c] vec3<Scalar> pos_c = box.shift(cc_pos, image_c);
+            vec3<Scalar> cc_pos(cc_postype.x, cc_postype.y, cc_postype.z);
+            int3 image_c = d_image[cur_triangle_c]; 
+	    vec3<Scalar> pos_c = box.shift(cc_pos, image_c);
 
             Scalar Vol;
             if (cur_triangle_abc == 1)
@@ -213,8 +214,8 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                        dim3(threads1),
                        block_size * sizeof(Scalar),
                        0,
-                       d_sum_volume[0],
-                       d_partial_sum_volume,
+                       &d_sum_volume[0],
+                       d_sum_partial_volume,
                        num_blocks);
 
     return hipSuccess;
@@ -249,9 +250,9 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
                                                            const unsigned int* tpos_list,
                                                            const Index2D tlist_idx,
                                                            const unsigned int* n_triangles_list,
-                                                           Scalar* d_params,
+                                                           Scalar2* d_params,
                                                            const unsigned int n_triangle_type,
-                                                           unsigned int* d_flags);
+                                                           unsigned int* d_flags)
     {
     // start by identifying which particle we are to handle
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
@@ -264,9 +265,9 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
 
     // read in the position of our b-particle from the a-b-c triplet. (MEM TRANSFER: 16 bytes)
     Scalar4 postype = __ldg(d_pos + idx);
-    Scalar3 pos = make_scalar3(postype.x, postype.y, postype.z);
+    vec3<Scalar> pos(postype.x, postype.y, postype.z);
 
-    int3 image_a = __ldg(d_image + idx);
+    int3 image_a = d_image[idx];
 
     vec3<Scalar> pos_a = box.shift(pos, image_a);
 
@@ -301,13 +302,15 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
 
         // get the b-particle's position (MEM TRANSFER: 16 bytes)
         Scalar4 bb_postype = d_pos[cur_triangle_b];
-        Scalar3 bb_pos = make_scalar3(bb_postype.x, bb_postype.y, bb_postype.z);
-        int3 image_b = d_image[cur_triangle_b] vec3<Scalar> pos_b = box.shift(bb_pos, image_b);
+        vec3<Scalar> bb_pos(bb_postype.x, bb_postype.y, bb_postype.z);
+        int3 image_b = d_image[cur_triangle_b]; 
+	vec3<Scalar> pos_b = box.shift(bb_pos, image_b);
 
         // get the c-particle's position (MEM TRANSFER: 16 bytes)
         Scalar4 cc_postype = d_pos[cur_triangle_c];
-        Scalar3 cc_pos = make_scalar3(cc_postype.x, cc_postype.y, cc_postype.z);
-        int3 image_c = d_image[cur_triangle_c] vec3<Scalar> pos_c = box.shift(cc_pos, image_c);
+        vec3<Scalar> cc_pos(cc_postype.x, cc_postype.y, cc_postype.z);
+        int3 image_c = d_image[cur_triangle_c]; 
+	vec3<Scalar> pos_c = box.shift(cc_pos, image_c);
 
         vec3<Scalar> dVol;
         if (cur_triangle_abc == 1)
@@ -371,17 +374,16 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                                                const unsigned int N,
                                                const Scalar4* d_pos,
                                                const int3* d_image,
-                                               const unsigned int* d_rtag,
                                                const BoxDim& box,
                                                const Scalar volume,
                                                const group_storage<6>* tlist,
                                                const unsigned int* tpos_list,
                                                const Index2D tlist_idx,
                                                const unsigned int* n_triangles_list,
-                                               Scalar* d_params,
+                                               Scalar2* d_params,
                                                const unsigned int n_triangle_type,
                                                int block_size,
-                                               unsigned int* d_flags);
+                                               unsigned int* d_flags)
     {
     unsigned int max_block_size;
     hipFuncAttributes attr;
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
index 5b4399e75e..27560ea955 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
@@ -47,7 +47,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                                                const unsigned int* tpos_list,
                                                const Index2D tlist_idx,
                                                const unsigned int* n_triangles_list,
-                                               Scalar* d_params,
+                                               Scalar2* d_params,
                                                const unsigned int n_triangle_type,
                                                int block_size,
                                                unsigned int* d_flags);
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.h b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
index 5636762a5c..66b0693294 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.h
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
@@ -52,12 +52,13 @@ class PYBIND11_EXPORT VolumeConservationMeshForceComputeGPU
 
     protected:
     unsigned int m_block_size; //!< block size for partial sum memory
+    unsigned int m_num_blocks;       //!< number of memory blocks reserved for partial sum memory
 
     std::unique_ptr<Autotuner> m_tuner; //!< Autotuner for block size of force loop
     GPUArray<unsigned int> m_flags;     //!< Flags set during the kernel execution
-    GPUArray<Scalar> m_params;          //!< Parameters stored on the GPU
+    GPUArray<Scalar2> m_params;          //!< Parameters stored on the GPU
 
-    GPUVector<Scalar> m_partial_sum; //!< memory space for partial sum over volume
+    GPUArray<Scalar> m_partial_sum; //!< memory space for partial sum over volume
     GPUArray<Scalar> m_sum;          //!< memory space for sum over volume
 
     //! Actually compute the forces

From 230484bbbf514346e4f17bac21927a8abce775c2 Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@mays.engin.umich.edu>
Date: Fri, 28 Jan 2022 17:35:12 -0500
Subject: [PATCH 11/50] working GPU version (VolumeConstraint)

---
 .../VolumeConservationMeshForceComputeGPU.cc  | 18 ++--
 .../VolumeConservationMeshForceComputeGPU.cu  | 99 ++++++++++---------
 .../VolumeConservationMeshForceComputeGPU.cuh |  5 +-
 .../VolumeConservationMeshForceComputeGPU.h   |  4 -
 4 files changed, 63 insertions(+), 63 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index 5d711d0b3a..9b43cbfee0 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -19,7 +19,7 @@ namespace md
 VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
     std::shared_ptr<SystemDefinition> sysdef,
     std::shared_ptr<MeshDefinition> meshdef)
-    : VolumeConservationMeshForceCompute(sysdef, meshdef), m_block_size(256)
+    : VolumeConservationMeshForceCompute(sysdef, meshdef)
     {
     if (!m_exec_conf->isCUDAEnabled())
         {
@@ -37,19 +37,20 @@ VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
     GPUArray<unsigned int> flags(1, this->m_exec_conf);
     m_flags.swap(flags);
 
+    // reset flags
+    ArrayHandle<unsigned int> h_flags(m_flags, access_location::host, access_mode::overwrite);
+    h_flags.data[0] = 0;
+
+
     GPUArray<Scalar> sum(1, m_exec_conf);
     m_sum.swap(sum);
 
+    m_block_size=256;
     unsigned int group_size = m_pdata->getN();
-
     m_num_blocks = group_size / m_block_size + 1;
     GPUArray<Scalar> partial_sum(m_num_blocks, m_exec_conf);
     m_partial_sum.swap(partial_sum);
 
-    // reset flags
-    ArrayHandle<unsigned int> h_flags(m_flags, access_location::host, access_mode::overwrite);
-    h_flags.data[0] = 0;
-
     unsigned int warp_size = this->m_exec_conf->dev_prop.warpSize;
     m_tuner.reset(new Autotuner(warp_size,
                                 1024,
@@ -74,12 +75,13 @@ void VolumeConservationMeshForceComputeGPU::setParams(unsigned int type, Scalar
  */
 void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
     {
+
+    computeVolume();
+
     // start the profile
     if (this->m_prof)
         this->m_prof->push(this->m_exec_conf, "VolumeConstraint");
 
-    computeVolume();
-
     // access the particle data arrays
     ArrayHandle<Scalar4> d_pos(m_pdata->getPositions(), access_location::device, access_mode::read);
     ArrayHandle<int3> d_image(m_pdata->getImages(), access_location::device, access_mode::read);
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index 77cf9c465d..7404f7a105 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -1,17 +1,15 @@
+#include "hip/hip_runtime.h"
 // Copyright (c) 2009-2022 The Regents of the University of Michigan.
 // Part of HOOMD-blue, released under the BSD 3-Clause License.
 
-#include "hip/hip_runtime.h"
-// Copyright (c) 2009-2021 The Regents of the University of Michigan
-// This file is part of the HOOMD-blue project, released under the BSD 3-Clause License.
-
 #include "VolumeConservationMeshForceComputeGPU.cuh"
 #include "hoomd/TextureTools.h"
+#include "hoomd/VectorMath.h"
 
 #include <assert.h>
 
-// SMALL a relatively small number
-#define SMALL Scalar(0.001)
+#include <stdio.h>
+
 
 /*! \file MeshVolumeConservationGPU.cu
     \brief Defines GPU kernel code for calculating the volume_constraint forces. Used by
@@ -39,14 +37,14 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
                                                             const unsigned int N,
                                                             const Scalar4* d_pos,
                                                             const int3* d_image,
-                                                            const BoxDim& box,
+                                                            BoxDim box,
                                                             const group_storage<6>* tlist,
                                                             const unsigned int* tpos_list,
                                                             const Index2D tlist_idx,
                                                             const unsigned int* n_triangles_list)
     {
-    // start by identifying which particle we are to handle
     HIP_DYNAMIC_SHARED(char, s_data)
+    Scalar* volume_sdata = (Scalar*)&s_data[0];
 
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
 
@@ -55,12 +53,12 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
     if (idx < N)
         {
         int n_triangles = n_triangles_list[idx];
-        Scalar4 postype = d_pos[idx];
-        vec3<Scalar> pos(postype.x, postype.y, postype.z);
-
-        int3 image_a = d_image[idx];
+        Scalar4 postype = __ldg(d_pos + idx);
+        Scalar3 pos_a = make_scalar3(postype.x, postype.y, postype.z);
+        int3 image_a = d_image[idx]; 
+        pos_a = box.shift(pos_a, image_a);
 
-        vec3<Scalar> pos_a = box.shift(pos, image_a);
+        volume_transfer = 0;
 
         for (int triangle_idx = 0; triangle_idx < n_triangles; triangle_idx++)
             {
@@ -73,32 +71,34 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
 
             // get the b-particle's position (MEM TRANSFER: 16 bytes)
             Scalar4 bb_postype = d_pos[cur_triangle_b];
-            vec3<Scalar> bb_pos(bb_postype.x, bb_postype.y, bb_postype.z);
+            Scalar3 pos_b = make_scalar3(bb_postype.x, bb_postype.y, bb_postype.z);
             int3 image_b = d_image[cur_triangle_b]; 
-	    vec3<Scalar> pos_b = box.shift(bb_pos, image_b);
+            pos_b = box.shift(pos_b, image_b);
 
             // get the c-particle's position (MEM TRANSFER: 16 bytes)
             Scalar4 cc_postype = d_pos[cur_triangle_c];
-            vec3<Scalar> cc_pos(cc_postype.x, cc_postype.y, cc_postype.z);
+            Scalar3 pos_c = make_scalar3(cc_postype.x, cc_postype.y, cc_postype.z);
             int3 image_c = d_image[cur_triangle_c]; 
-	    vec3<Scalar> pos_c = box.shift(cc_pos, image_c);
+            pos_c = box.shift(pos_c, image_c);
 
-            Scalar Vol;
+            vec3<Scalar> dVol(0,0,0);
             if (cur_triangle_abc == 1)
                 {
-                Vol = dot(cross(pos_b, pos_c), pos_a);
+                dVol.x = pos_b.y * pos_c.z - pos_b.z * pos_c.y;
+                dVol.y = pos_b.z * pos_c.x - pos_b.x * pos_c.z;
+                dVol.z = pos_b.x * pos_c.y - pos_b.y * pos_c.x;
                 }
             else
                 {
-                Vol = dot(cross(pos_c, pos_b), pos_a);
+                dVol.x = pos_c.y * pos_b.z - pos_c.z * pos_b.y;
+                dVol.y = pos_c.z * pos_b.x - pos_c.x * pos_b.z;
+                dVol.z = pos_c.x * pos_b.y - pos_c.y * pos_b.x;
                 }
-            volume_transfer += Vol / 6;
+            Scalar Vol = dVol.x * pos_a.x + dVol.y * pos_a.y + dVol.z * pos_a.z;
+            volume_transfer += Vol / 18.0;
             }
         }
 
-    Scalar* volume_sdata = (Scalar*)&s_data[0];
-
-    __syncthreads();
     volume_sdata[threadIdx.x] = volume_transfer;
     __syncthreads();
 
@@ -190,14 +190,13 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
     dim3 grid(num_blocks, 1, 1);
     dim3 grid1(1, 1, 1);
     dim3 threads(block_size, 1, 1);
-    dim3 threads1(256, 1, 1);
 
     // run the kernel
     hipLaunchKernelGGL(
         (gpu_compute_volume_constraint_volume_kernel),
-        grid,
-        threads,
-        max((unsigned int)(sizeof(Scalar)), (unsigned int)(block_size * sizeof(Scalar))),
+        dim3(grid),
+        dim3(threads),
+        block_size * sizeof(Scalar),
         0,
         d_sum_partial_volume,
         N,
@@ -211,7 +210,7 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
 
     hipLaunchKernelGGL((gpu_volume_reduce_partial_sum_kernel),
                        dim3(grid1),
-                       dim3(threads1),
+                       dim3(threads),
                        block_size * sizeof(Scalar),
                        0,
                        &d_sum_volume[0],
@@ -244,7 +243,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
                                                            const unsigned int N,
                                                            const Scalar4* d_pos,
                                                            const int3* d_image,
-                                                           const BoxDim& box,
+                                                           BoxDim box,
                                                            const Scalar volume,
                                                            const group_storage<6>* tlist,
                                                            const unsigned int* tpos_list,
@@ -265,13 +264,11 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
 
     // read in the position of our b-particle from the a-b-c triplet. (MEM TRANSFER: 16 bytes)
     Scalar4 postype = __ldg(d_pos + idx);
-    vec3<Scalar> pos(postype.x, postype.y, postype.z);
-
-    int3 image_a = d_image[idx];
-
-    vec3<Scalar> pos_a = box.shift(pos, image_a);
-
-    Scalar4 force = make_scalar4(Scalar(0.0), Scalar(0.0), Scalar(0.0), Scalar(0.0));
+    Scalar3 pos_a = make_scalar3(postype.x, postype.y, postype.z);
+    int3 image_a = d_image[idx]; 
+    pos_a = box.shift(pos_a, image_a);
+    
+    Scalar4 force = make_scalar4(Scalar(0.0),Scalar(0.0),Scalar(0.0),Scalar(0.0));
 
     // initialize the virial to 0
     Scalar virial[6];
@@ -302,24 +299,28 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
 
         // get the b-particle's position (MEM TRANSFER: 16 bytes)
         Scalar4 bb_postype = d_pos[cur_triangle_b];
-        vec3<Scalar> bb_pos(bb_postype.x, bb_postype.y, bb_postype.z);
+        Scalar3 pos_b = make_scalar3(bb_postype.x, bb_postype.y, bb_postype.z);
         int3 image_b = d_image[cur_triangle_b]; 
-	vec3<Scalar> pos_b = box.shift(bb_pos, image_b);
+        pos_b = box.shift(pos_b, image_b);
 
         // get the c-particle's position (MEM TRANSFER: 16 bytes)
         Scalar4 cc_postype = d_pos[cur_triangle_c];
-        vec3<Scalar> cc_pos(cc_postype.x, cc_postype.y, cc_postype.z);
+        Scalar3 pos_c = make_scalar3(cc_postype.x, cc_postype.y, cc_postype.z);
         int3 image_c = d_image[cur_triangle_c]; 
-	vec3<Scalar> pos_c = box.shift(cc_pos, image_c);
+        pos_c = box.shift(pos_c, image_c);
 
         vec3<Scalar> dVol;
         if (cur_triangle_abc == 1)
             {
-            dVol = cross(pos_b, pos_c);
+            dVol.x = pos_b.y * pos_c.z - pos_b.z * pos_c.y;
+            dVol.y = pos_b.z * pos_c.x - pos_b.x * pos_c.z;
+            dVol.z = pos_b.x * pos_c.y - pos_b.y * pos_c.x;
             }
         else
             {
-            dVol = cross(pos_c, pos_b);
+            dVol.x = pos_c.y * pos_b.z - pos_c.z * pos_b.y;
+            dVol.y = pos_c.z * pos_b.x - pos_c.x * pos_b.z;
+            dVol.z = pos_c.x * pos_b.y - pos_c.y * pos_b.x;
             }
 
         Scalar3 Fa;
@@ -333,12 +334,12 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
         force.z += Fa.z;
         force.w = energy;
 
-        virial[0] += Scalar(1. / 2.) * pos.x * Fa.x; // xx
-        virial[1] += Scalar(1. / 2.) * pos.y * Fa.x; // xy
-        virial[2] += Scalar(1. / 2.) * pos.z * Fa.x; // xz
-        virial[3] += Scalar(1. / 2.) * pos.y * Fa.y; // yy
-        virial[4] += Scalar(1. / 2.) * pos.z * Fa.y; // yz
-        virial[5] += Scalar(1. / 2.) * pos.z * Fa.z; // zz
+        virial[0] += Scalar(1. / 2.) * pos_a.x * Fa.x; // xx
+        virial[1] += Scalar(1. / 2.) * pos_a.y * Fa.x; // xy
+        virial[2] += Scalar(1. / 2.) * pos_a.z * Fa.x; // xz
+        virial[3] += Scalar(1. / 2.) * pos_a.y * Fa.y; // yy
+        virial[4] += Scalar(1. / 2.) * pos_a.z * Fa.y; // yz
+        virial[5] += Scalar(1. / 2.) * pos_a.z * Fa.z; // zz
         }
 
     // now that the force calculation is complete, write out the result (MEM TRANSFER: 20 bytes)
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
index 27560ea955..4f06fa6033 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
@@ -5,14 +5,15 @@
 #include "hoomd/Index1D.h"
 #include "hoomd/MeshGroupData.cuh"
 #include "hoomd/ParticleData.cuh"
+#include <hip/hip_runtime.h>
 
 /*! \file MeshVolumeConservationGPU.cuh
     \brief Declares GPU kernel code for calculating the volume cnstraint forces. Used by
    MeshVolumeConservationGPU.
 */
 
-#ifndef __MESHVOLUMECONSERVATION_CUH__
-#define __MESHVOLUMECONSERVATION_CUH__
+#ifndef __VOLUMECONSERVATIONMESHFORCECOMPUTE_CUH__
+#define __VOLUMECONSERVATIONMESHFORCECOMPUTE_CUH__
 
 namespace hoomd
     {
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.h b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
index 66b0693294..d21f577b0a 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.h
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
@@ -66,10 +66,6 @@ class PYBIND11_EXPORT VolumeConservationMeshForceComputeGPU
 
     //! compute volumes
     virtual void computeVolume();
-
-    private:
-    //! allocate the memory needed to store partial sums
-    void resizePartialSumArrays();
     };
 
 namespace detail

From ea8550afc51fe87941823ecd678483bf779e5bec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Sun, 30 Jan 2022 10:58:18 -0500
Subject: [PATCH 12/50] add get Volume

---
 .../md/VolumeConservationMeshForceCompute.cc  |  3 +-
 hoomd/md/VolumeConservationMeshForceCompute.h |  8 ++++-
 hoomd/md/mesh/conservation.py                 |  6 ++++
 hoomd/md/pytest/test_meshbond.py              | 29 +++++++++++++++++++
 4 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceCompute.cc b/hoomd/md/VolumeConservationMeshForceCompute.cc
index c21674d10b..278dbbdd0b 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.cc
+++ b/hoomd/md/VolumeConservationMeshForceCompute.cc
@@ -319,7 +319,8 @@ void export_VolumeConservationMeshForceCompute(pybind11::module& m)
         "VolumeConservationMeshForceCompute")
         .def(pybind11::init<std::shared_ptr<SystemDefinition>, std::shared_ptr<MeshDefinition>>())
         .def("setParams", &VolumeConservationMeshForceCompute::setParamsPython)
-        .def("getParams", &VolumeConservationMeshForceCompute::getParams);
+        .def("getParams", &VolumeConservationMeshForceCompute::getParams)
+        .def("getVolume", &VolumeConservationMeshForceCompute::getVolume);
     }
 
     } // end namespace detail
diff --git a/hoomd/md/VolumeConservationMeshForceCompute.h b/hoomd/md/VolumeConservationMeshForceCompute.h
index 080a3a25a7..f99672ee08 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.h
+++ b/hoomd/md/VolumeConservationMeshForceCompute.h
@@ -76,6 +76,11 @@ class PYBIND11_EXPORT VolumeConservationMeshForceCompute : public ForceCompute
     /// Get the parameters for a type
     pybind11::dict getParams(std::string type);
 
+    Scalar getVolume()
+        {
+        return m_volume;
+        };
+
 #ifdef ENABLE_MPI
     //! Get ghost particle fields requested by this pair potential
     /*! \param timestep Current time step
@@ -96,7 +101,8 @@ class PYBIND11_EXPORT VolumeConservationMeshForceCompute : public ForceCompute
 
     std::shared_ptr<MeshDefinition> m_mesh_data; //!< Mesh data to use in computing helfich energy
 
-    Scalar m_volume; //! sum of the distances weighted by the bending angle over all neighbors
+    Scalar m_volume; //! sum of the triangle areas within the mesh
+
     //! Actually compute the forces
     virtual void computeForces(uint64_t timestep);
 
diff --git a/hoomd/md/mesh/conservation.py b/hoomd/md/mesh/conservation.py
index beeffe80c0..58afd52e4e 100644
--- a/hoomd/md/mesh/conservation.py
+++ b/hoomd/md/mesh/conservation.py
@@ -6,6 +6,7 @@
 from hoomd.md.mesh.potential import MeshPotential
 from hoomd.data.typeparam import TypeParameter
 from hoomd.data.parameterdicts import TypeParameterDict
+from hoomd.logging import log
 
 
 class Volume(MeshPotential):
@@ -42,3 +43,8 @@ def __init__(self, mesh):
         self._add_typeparam(params)
 
         super().__init__(mesh)
+
+    @log(requires_run=True)
+    def volume(self):
+        """Volume of the mesh triangulation."""
+        return self._cpp_obj.getVolume()
diff --git a/hoomd/md/pytest/test_meshbond.py b/hoomd/md/pytest/test_meshbond.py
index e84435e32b..2048c4565d 100644
--- a/hoomd/md/pytest/test_meshbond.py
+++ b/hoomd/md/pytest/test_meshbond.py
@@ -4,6 +4,7 @@
 import copy as cp
 import hoomd
 import pytest
+import math
 import numpy as np
 
 _harmonic_args = {'k': [30.0, 25.0, 20.0], 'r0': [1.6, 1.7, 1.8]}
@@ -208,6 +209,34 @@ def test_forces_and_energies(tetrahedron_snapshot_factory, simulation_factory,
         np.testing.assert_allclose(sim_forces, force, rtol=1e-2, atol=1e-5)
 
 
+def test_volume(simulation_factory, tetrahedron_snapshot_factory):
+    snap = tetrahedron_snapshot_factory(d=0.969, L=5)
+    sim = simulation_factory(snap)
+
+    mesh = hoomd.mesh.Mesh(name=["tetrahedron"])
+    mesh.triangles = [[2, 1, 0], [0, 1, 3], [2, 0, 3], [1, 2, 3]]
+
+    mesh_potential = hoomd.md.mesh.conservation.Volume(mesh)
+    mesh_potential.params["tetrahedron"] = dict(k=1, V0=1)
+
+    integrator = hoomd.md.Integrator(dt=0.005)
+
+    integrator.forces.append(mesh_potential)
+
+    langevin = hoomd.md.methods.Langevin(kT=1,
+                                         filter=hoomd.filter.All(),
+                                         alpha=0.1)
+    integrator.methods.append(langevin)
+    sim.operations.integrator = integrator
+
+    sim.run(0)
+
+    assert math.isclose(mesh_potential.volume,
+                        0.107227,
+                        rel_tol=1e-2,
+                        abs_tol=1e-5)
+
+
 def test_auto_detach_simulation(simulation_factory,
                                 tetrahedron_snapshot_factory):
     sim = simulation_factory(tetrahedron_snapshot_factory(d=0.969, L=5))

From 17a4948ce7089f155a9c9516bfd85b4a24272c5c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Fri, 30 Dec 2022 21:26:18 -0500
Subject: [PATCH 13/50] update VolumeCompute

---
 hoomd/md/VolumeConservationMeshForceCompute.cc   |  8 ++++----
 .../md/VolumeConservationMeshForceComputeGPU.cc  | 16 ++++++++--------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceCompute.cc b/hoomd/md/VolumeConservationMeshForceCompute.cc
index 25fae053e0..1464494454 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.cc
+++ b/hoomd/md/VolumeConservationMeshForceCompute.cc
@@ -101,7 +101,7 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
     ArrayHandle<Scalar> h_virial(m_virial, access_location::host, access_mode::overwrite);
     size_t virial_pitch = m_virial.getPitch();
 
-    ArrayHandle<typename MeshTriangle::members_t> h_triangles(
+    ArrayHandle<typename Angle::members_t> h_triangles(
         m_mesh_data->getMeshTriangleData()->getMembersArray(),
         access_location::host,
         access_mode::read);
@@ -138,7 +138,7 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
     for (unsigned int i = 0; i < size; i++)
         {
         // lookup the tag of each of the particles participating in the bond
-        const typename MeshTriangle::members_t& triangle = h_triangles.data[i];
+        const typename Angle::members_t& triangle = h_triangles.data[i];
 
         unsigned int ttag_a = triangle.tag[0];
         assert(ttag_a < m_pdata->getMaximumTag() + 1);
@@ -255,7 +255,7 @@ void VolumeConservationMeshForceCompute::computeVolume()
     ArrayHandle<unsigned int> h_rtag(m_pdata->getRTags(), access_location::host, access_mode::read);
     ArrayHandle<int3> h_image(m_pdata->getImages(), access_location::host, access_mode::read);
 
-    ArrayHandle<typename MeshTriangle::members_t> h_triangles(
+    ArrayHandle<typename Angle::members_t> h_triangles(
         m_mesh_data->getMeshTriangleData()->getMembersArray(),
         access_location::host,
         access_mode::read);
@@ -269,7 +269,7 @@ void VolumeConservationMeshForceCompute::computeVolume()
     for (unsigned int i = 0; i < size; i++)
         {
         // lookup the tag of each of the particles participating in the bond
-        const typename MeshTriangle::members_t& triangle = h_triangles.data[i];
+        const typename Angle::members_t& triangle = h_triangles.data[i];
 
         unsigned int ttag_a = triangle.tag[0];
         assert(ttag_a < m_pdata->getMaximumTag() + 1);
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index d3a89dde61..60a3df31e5 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -82,14 +82,14 @@ void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
 
     BoxDim box = this->m_pdata->getGlobalBox();
 
-    const GPUArray<typename MeshTriangle::members_t>& gpu_meshtriangle_list
+    const GPUArray<typename Angle::members_t>& gpu_meshtriangle_list
         = this->m_mesh_data->getMeshTriangleData()->getGPUTable();
     const Index2D& gpu_table_indexer
         = this->m_mesh_data->getMeshTriangleData()->getGPUTableIndexer();
 
-    ArrayHandle<typename MeshTriangle::members_t> d_gpu_meshtrianglelist(gpu_meshtriangle_list,
-                                                                         access_location::device,
-                                                                         access_mode::read);
+    ArrayHandle<typename Angle::members_t> d_gpu_meshtrianglelist(gpu_meshtriangle_list,
+                                                                  access_location::device,
+                                                                  access_mode::read);
     ArrayHandle<unsigned int> d_gpu_meshtriangle_pos_list(
         m_mesh_data->getMeshTriangleData()->getGPUPosTable(),
         access_location::device,
@@ -155,14 +155,14 @@ void VolumeConservationMeshForceComputeGPU::computeVolume()
 
     m_num_blocks = m_pdata->getN() / m_block_size + 1;
 
-    const GPUArray<typename MeshTriangle::members_t>& gpu_meshtriangle_list
+    const GPUArray<typename Angle::members_t>& gpu_meshtriangle_list
         = this->m_mesh_data->getMeshTriangleData()->getGPUTable();
     const Index2D& gpu_table_indexer
         = this->m_mesh_data->getMeshTriangleData()->getGPUTableIndexer();
 
-    ArrayHandle<typename MeshTriangle::members_t> d_gpu_meshtrianglelist(gpu_meshtriangle_list,
-                                                                         access_location::device,
-                                                                         access_mode::read);
+    ArrayHandle<typename Angle::members_t> d_gpu_meshtrianglelist(gpu_meshtriangle_list,
+                                                                  access_location::device,
+                                                                  access_mode::read);
     ArrayHandle<unsigned int> d_gpu_meshtriangle_pos_list(
         m_mesh_data->getMeshTriangleData()->getGPUPosTable(),
         access_location::device,

From 2d0c0c6a488eecc916c903ab5e363324d525eefc Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Sat, 31 Dec 2022 10:48:54 -0500
Subject: [PATCH 14/50] fix Autotuner

---
 hoomd/md/VolumeConservationMeshForceComputeGPU.cc  | 14 +++++---------
 hoomd/md/VolumeConservationMeshForceComputeGPU.cu  | 12 ++++++------
 hoomd/md/VolumeConservationMeshForceComputeGPU.cuh |  6 +++---
 hoomd/md/VolumeConservationMeshForceComputeGPU.h   | 13 +------------
 4 files changed, 15 insertions(+), 30 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index 60a3df31e5..978e26eb21 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -50,14 +50,10 @@ VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
     GPUArray<Scalar> partial_sum(m_num_blocks, m_exec_conf);
     m_partial_sum.swap(partial_sum);
 
-    unsigned int warp_size = this->m_exec_conf->dev_prop.warpSize;
-    m_tuner.reset(new Autotuner(warp_size,
-                                1024,
-                                warp_size,
-                                5,
-                                100000,
-                                "vconstraint_forces",
-                                this->m_exec_conf));
+    m_tuner.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)},
+                                   m_exec_conf,
+                                   "vconstraint_forces"));
+    m_autotuners.push_back(m_tuner);
     }
 
 void VolumeConservationMeshForceComputeGPU::setParams(unsigned int type, Scalar K, Scalar V0)
@@ -121,7 +117,7 @@ void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
                                                 d_gpu_n_meshtriangle.data,
                                                 d_params.data,
                                                 m_mesh_data->getMeshTriangleData()->getNTypes(),
-                                                m_tuner->getParam(),
+                                                m_tuner->getParam()[0],
                                                 d_flags.data);
 
     if (this->m_exec_conf->isCUDAErrorCheckingEnabled())
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index 7404f7a105..3e25405e09 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -38,7 +38,7 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
                                                             const Scalar4* d_pos,
                                                             const int3* d_image,
                                                             BoxDim box,
-                                                            const group_storage<6>* tlist,
+                                                            const group_storage<3>* tlist,
                                                             const unsigned int* tpos_list,
                                                             const Index2D tlist_idx,
                                                             const unsigned int* n_triangles_list)
@@ -62,7 +62,7 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
 
         for (int triangle_idx = 0; triangle_idx < n_triangles; triangle_idx++)
             {
-            group_storage<6> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
+            group_storage<3> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
 
             int cur_triangle_b = cur_triangle.idx[0];
             int cur_triangle_c = cur_triangle.idx[1];
@@ -180,7 +180,7 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                                                 const Scalar4* d_pos,
                                                 const int3* d_image,
                                                 const BoxDim& box,
-                                                const group_storage<6>* tlist,
+                                                const group_storage<3>* tlist,
                                                 const unsigned int* tpos_list,
                                                 const Index2D tlist_idx,
                                                 const unsigned int* n_triangles_list,
@@ -245,7 +245,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
                                                            const int3* d_image,
                                                            BoxDim box,
                                                            const Scalar volume,
-                                                           const group_storage<6>* tlist,
+                                                           const group_storage<3>* tlist,
                                                            const unsigned int* tpos_list,
                                                            const Index2D tlist_idx,
                                                            const unsigned int* n_triangles_list,
@@ -278,7 +278,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
     // loop over all triangles
     for (int triangle_idx = 0; triangle_idx < n_triangles; triangle_idx++)
         {
-        group_storage<6> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
+        group_storage<3> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
 
         int cur_triangle_b = cur_triangle.idx[0];
         int cur_triangle_c = cur_triangle.idx[1];
@@ -377,7 +377,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                                                const int3* d_image,
                                                const BoxDim& box,
                                                const Scalar volume,
-                                               const group_storage<6>* tlist,
+                                               const group_storage<3>* tlist,
                                                const unsigned int* tpos_list,
                                                const Index2D tlist_idx,
                                                const unsigned int* n_triangles_list,
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
index 4f06fa6033..eedae31643 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
@@ -3,7 +3,7 @@
 
 #include "hoomd/HOOMDMath.h"
 #include "hoomd/Index1D.h"
-#include "hoomd/MeshGroupData.cuh"
+#include "hoomd/BondedGroupData.cuh"
 #include "hoomd/ParticleData.cuh"
 #include <hip/hip_runtime.h>
 
@@ -28,7 +28,7 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                                                 const Scalar4* d_pos,
                                                 const int3* d_image,
                                                 const BoxDim& box,
-                                                const group_storage<6>* tlist,
+                                                const group_storage<3>* tlist,
                                                 const unsigned int* tpos_list,
                                                 const Index2D tlist_idx,
                                                 const unsigned int* n_triangles_list,
@@ -44,7 +44,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                                                const int3* d_image,
                                                const BoxDim& box,
                                                const Scalar volume,
-                                               const group_storage<6>* tlist,
+                                               const group_storage<3>* tlist,
                                                const unsigned int* tpos_list,
                                                const Index2D tlist_idx,
                                                const unsigned int* n_triangles_list,
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.h b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
index d21f577b0a..8e6a8b62d8 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.h
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
@@ -36,17 +36,6 @@ class PYBIND11_EXPORT VolumeConservationMeshForceComputeGPU
     VolumeConservationMeshForceComputeGPU(std::shared_ptr<SystemDefinition> sysdef,
                                           std::shared_ptr<MeshDefinition> meshdef);
 
-    //! Set autotuner parameters
-    /*! \param enable Enable/disable autotuning
-        \param period period (approximate) in time steps when returning occurs
-    */
-    virtual void setAutotunerParams(bool enable, unsigned int period)
-        {
-        VolumeConservationMeshForceCompute::setAutotunerParams(enable, period);
-        m_tuner->setPeriod(period);
-        m_tuner->setEnabled(enable);
-        }
-
     //! Set the parameters
     virtual void setParams(unsigned int type, Scalar K, Scalar V0);
 
@@ -54,7 +43,7 @@ class PYBIND11_EXPORT VolumeConservationMeshForceComputeGPU
     unsigned int m_block_size; //!< block size for partial sum memory
     unsigned int m_num_blocks;       //!< number of memory blocks reserved for partial sum memory
 
-    std::unique_ptr<Autotuner> m_tuner; //!< Autotuner for block size of force loop
+    std::shared_ptr<Autotuner<1>> m_tuner; //!< Autotuner for block size
     GPUArray<unsigned int> m_flags;     //!< Flags set during the kernel execution
     GPUArray<Scalar2> m_params;          //!< Parameters stored on the GPU
 

From fadc08e8b2a51de774c4d77a3c8e93d989c93837 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Sat, 31 Dec 2022 10:50:49 -0500
Subject: [PATCH 15/50] add equation to Docs

---
 hoomd/md/mesh/conservation.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/hoomd/md/mesh/conservation.py b/hoomd/md/mesh/conservation.py
index 58afd52e4e..b659f3f017 100644
--- a/hoomd/md/mesh/conservation.py
+++ b/hoomd/md/mesh/conservation.py
@@ -12,28 +12,32 @@
 class Volume(MeshPotential):
     r"""Volume conservation potential.
 
-    :py:class:`Volume` specifies a volume constrainton the whole mesh
-    surface.
+    :py:class:`Volume` specifies a volume constraint on the whole mesh
+    surface:
+
+    .. math::
+
+        U(r) = k \frac{( V(r) - V_0 )^2}{2 \cdot V_0}
 
     Args:
         mesh (:py:mod:`hoomd.mesh.Mesh`): Mesh data structure constraint.
 
     Attributes:
         parameter (TypeParameter[dict]):
-            The parameter of the harmonic bonds for the defined mesh.
-            As the mesh can only have one type a type name does not have
-            to be stated. The dictionary has the following keys:
+            The parameter of the volume constraint for the defined mesh.
+            A type name does not have to be stated as the mesh can only
+            have one type. The dictionary has the following keys:
 
             * ``k`` (`float`, **required**) - potential constant
-              :math:`[\mathrm{energy} \cdot \mathrm{length}^{-2}]`
+              :math:`[\mathrm{energy} \cdot \mathrm{length}^{-3}]`
 
-            * ``V0`` (`float`, **required**) - rest length
-              :math:`[\mathrm{length}]`
+            * ``V0`` (`float`, **required**) - target volume
+              :math:`[\mathrm{length}^{3}]`
 
     Examples::
 
         volume = mesh.conservation.Volume(mesh)
-        volume.params["mesh"] = dict(k=10.0, r0=100)
+        volume.params["mesh"] = dict(k=10.0, V0=100)
     """
     _cpp_class_name = "VolumeConservationMeshForceCompute"
 

From badbdaa7fdba628328c5e0afc93ca984ee1e881f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Sat, 31 Dec 2022 11:08:52 -0500
Subject: [PATCH 16/50] create sphinx files for doc

---
 sphinx-doc/module-md-mesh-conservation.rst | 22 ++++++++++++++++++++++
 sphinx-doc/module-md-mesh.rst              |  1 +
 2 files changed, 23 insertions(+)
 create mode 100644 sphinx-doc/module-md-mesh-conservation.rst

diff --git a/sphinx-doc/module-md-mesh-conservation.rst b/sphinx-doc/module-md-mesh-conservation.rst
new file mode 100644
index 0000000000..2f2b9bee13
--- /dev/null
+++ b/sphinx-doc/module-md-mesh-conservation.rst
@@ -0,0 +1,22 @@
+.. Copyright (c) 2009-2022 The Regents of the University of Michigan.
+.. Part of HOOMD-blue, released under the BSD 3-Clause License.
+
+md.mesh.conservation
+------------
+
+.. rubric:: Overview
+
+.. py:currentmodule:: hoomd.md.mesh.conservation
+
+.. autosummary::
+    :nosignatures:
+
+    Volume
+
+.. rubric:: Details
+
+.. automodule:: hoomd.md.mesh.conservation
+    :synopsis: Constraints applied to a mesh data structure.
+    :members: Volume
+    :no-inherited-members:
+    :show-inheritance:
diff --git a/sphinx-doc/module-md-mesh.rst b/sphinx-doc/module-md-mesh.rst
index fda620c865..0128faad58 100644
--- a/sphinx-doc/module-md-mesh.rst
+++ b/sphinx-doc/module-md-mesh.rst
@@ -25,3 +25,4 @@ md.mesh
    :maxdepth: 3
 
    module-md-mesh-bond
+   module-md-mesh-conservation

From 593680067310cd92b80f87ac57ad8ea1c9642cba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Mon, 9 Jan 2023 17:21:33 -0500
Subject: [PATCH 17/50] allow multi mesh types

---
 .../md/VolumeConservationMeshForceCompute.cc  | 31 +++++++----
 hoomd/md/VolumeConservationMeshForceCompute.h | 13 +++--
 .../VolumeConservationMeshForceComputeGPU.cc  |  6 ++-
 .../VolumeConservationMeshForceComputeGPU.cu  | 54 +++++++++----------
 4 files changed, 61 insertions(+), 43 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceCompute.cc b/hoomd/md/VolumeConservationMeshForceCompute.cc
index 1464494454..1d37989647 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.cc
+++ b/hoomd/md/VolumeConservationMeshForceCompute.cc
@@ -28,11 +28,15 @@ VolumeConservationMeshForceCompute::VolumeConservationMeshForceCompute(
     {
     m_exec_conf->msg->notice(5) << "Constructing VolumeConservationMeshForceCompute" << endl;
 
+    unsigned int n_types = m_mesh_data->getMeshTriangleData()->getNTypes();
+
     // allocate the parameters
-    m_K = new Scalar[m_pdata->getNTypes()];
+    m_K = new Scalar[n_types];
 
     // allocate the parameters
-    m_V0 = new Scalar[m_pdata->getNTypes()];
+    m_V0 = new Scalar[n_types];
+
+    m_volume = new Scalar[n_types];
     }
 
 VolumeConservationMeshForceCompute::~VolumeConservationMeshForceCompute()
@@ -127,12 +131,6 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
     for (unsigned int i = 0; i < 6; i++)
         helfrich_virial[i] = Scalar(0.0);
 
-    Scalar VolDiff = m_volume - m_V0[0];
-
-    Scalar energy = m_K[0] * VolDiff * VolDiff / (2 * m_V0[0] * m_pdata->getN());
-
-    VolDiff = -m_K[0] / m_V0[0] * VolDiff / 6.0;
-
     // for each of the angles
     const unsigned int size = (unsigned int)m_mesh_data->getMeshTriangleData()->getN();
     for (unsigned int i = 0; i < size; i++)
@@ -173,6 +171,15 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
 
         Scalar3 Fa, Fb, Fc;
 
+        unsigned int triangle_type = m_mesh_data->getMeshTriangleData()->getTypeByIndex(i);
+
+        Scalar VolDiff = m_volume[triangle_type] - m_V0[triangle_type];
+
+        Scalar energy
+            = m_K[triangle_type] * VolDiff * VolDiff / (2 * m_V0[triangle_type] * m_pdata->getN());
+
+        VolDiff = -m_K[triangle_type] / m_V0[triangle_type] * VolDiff / 6.0;
+
         Fa.x = VolDiff * dVol_a.x;
         Fa.y = VolDiff * dVol_a.y;
         Fa.z = VolDiff * dVol_a.z;
@@ -262,7 +269,9 @@ void VolumeConservationMeshForceCompute::computeVolume()
 
     // get a local copy of the simulation box too
     const BoxDim& box = m_pdata->getGlobalBox();
-    m_volume = 0;
+
+    for (unsigned int i = 0; i < m_mesh_data->getMeshTriangleData()->getNTypes(); i++)
+        m_volume[i] = 0;
 
     // for each of the angles
     const unsigned int size = (unsigned int)m_mesh_data->getMeshTriangleData()->getN();
@@ -298,7 +307,9 @@ void VolumeConservationMeshForceCompute::computeVolume()
 
         Scalar vol_tri = dot(cross(pos_c, pos_b), pos_a) / 6.0;
 
-        m_volume += vol_tri;
+        unsigned int triangle_type = m_mesh_data->getMeshTriangleData()->getTypeByIndex(i);
+
+        m_volume[triangle_type] += vol_tri;
         }
     }
 
diff --git a/hoomd/md/VolumeConservationMeshForceCompute.h b/hoomd/md/VolumeConservationMeshForceCompute.h
index f99672ee08..d0100eb458 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.h
+++ b/hoomd/md/VolumeConservationMeshForceCompute.h
@@ -76,10 +76,15 @@ class PYBIND11_EXPORT VolumeConservationMeshForceCompute : public ForceCompute
     /// Get the parameters for a type
     pybind11::dict getParams(std::string type);
 
-    Scalar getVolume()
+    // Scalar getVolume()
+    //     {
+    //     return m_volume[0];
+    //     };
+
+    pybind11::array_t<Scalar> getVolume()
         {
-        return m_volume;
-        };
+        return pybind11::array(m_mesh_data->getMeshTriangleData()->getNTypes(), m_volume);
+        }
 
 #ifdef ENABLE_MPI
     //! Get ghost particle fields requested by this pair potential
@@ -101,7 +106,7 @@ class PYBIND11_EXPORT VolumeConservationMeshForceCompute : public ForceCompute
 
     std::shared_ptr<MeshDefinition> m_mesh_data; //!< Mesh data to use in computing helfich energy
 
-    Scalar m_volume; //! sum of the triangle areas within the mesh
+    Scalar* m_volume; //! sum of the triangle areas within the mesh
 
     //! Actually compute the forces
     virtual void computeForces(uint64_t timestep);
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index 978e26eb21..288b6aac4b 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -41,12 +41,14 @@ VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
     ArrayHandle<unsigned int> h_flags(m_flags, access_location::host, access_mode::overwrite);
     h_flags.data[0] = 0;
 
-    GPUArray<Scalar> sum(1, m_exec_conf);
+    GPUArray<Scalar> sum(this->m_mesh_data->getMeshTriangleData()->getNTypes(), m_exec_conf);
     m_sum.swap(sum);
 
     m_block_size = 256;
     unsigned int group_size = m_pdata->getN();
-    m_num_blocks = group_size / m_block_size + 1;
+    m_num_blocks = group_size / m_block_size;
+    m_num_blocks *= this->m_mesh_data->getMeshTriangleData()->getNTypes();
+    m_num_blocks += 1;
     GPUArray<Scalar> partial_sum(m_num_blocks, m_exec_conf);
     m_partial_sum.swap(partial_sum);
 
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index 3e25405e09..4268ac2113 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -1,3 +1,6 @@
+// Copyright (c) 2009-2022 The Regents of the University of Michigan.
+// Part of HOOMD-blue, released under the BSD 3-Clause License.
+
 #include "hip/hip_runtime.h"
 // Copyright (c) 2009-2022 The Regents of the University of Michigan.
 // Part of HOOMD-blue, released under the BSD 3-Clause License.
@@ -10,7 +13,6 @@
 
 #include <stdio.h>
 
-
 /*! \file MeshVolumeConservationGPU.cu
     \brief Defines GPU kernel code for calculating the volume_constraint forces. Used by
    MeshVolumeConservationComputeGPU.
@@ -55,7 +57,7 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
         int n_triangles = n_triangles_list[idx];
         Scalar4 postype = __ldg(d_pos + idx);
         Scalar3 pos_a = make_scalar3(postype.x, postype.y, postype.z);
-        int3 image_a = d_image[idx]; 
+        int3 image_a = d_image[idx];
         pos_a = box.shift(pos_a, image_a);
 
         volume_transfer = 0;
@@ -72,16 +74,16 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
             // get the b-particle's position (MEM TRANSFER: 16 bytes)
             Scalar4 bb_postype = d_pos[cur_triangle_b];
             Scalar3 pos_b = make_scalar3(bb_postype.x, bb_postype.y, bb_postype.z);
-            int3 image_b = d_image[cur_triangle_b]; 
+            int3 image_b = d_image[cur_triangle_b];
             pos_b = box.shift(pos_b, image_b);
 
             // get the c-particle's position (MEM TRANSFER: 16 bytes)
             Scalar4 cc_postype = d_pos[cur_triangle_c];
             Scalar3 pos_c = make_scalar3(cc_postype.x, cc_postype.y, cc_postype.z);
-            int3 image_c = d_image[cur_triangle_c]; 
+            int3 image_c = d_image[cur_triangle_c];
             pos_c = box.shift(pos_c, image_c);
 
-            vec3<Scalar> dVol(0,0,0);
+            vec3<Scalar> dVol(0, 0, 0);
             if (cur_triangle_abc == 1)
                 {
                 dVol.x = pos_b.y * pos_c.z - pos_b.z * pos_c.y;
@@ -148,7 +150,6 @@ gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum, Scalar* d_partial_sum, unsig
             if (threadIdx.x < offs)
                 volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
             offs >>= 1;
-            __syncthreads();
             }
 
         // everybody sums up sum2K
@@ -192,21 +193,20 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
     dim3 threads(block_size, 1, 1);
 
     // run the kernel
-    hipLaunchKernelGGL(
-        (gpu_compute_volume_constraint_volume_kernel),
-        dim3(grid),
-        dim3(threads),
-        block_size * sizeof(Scalar),
-        0,
-        d_sum_partial_volume,
-        N,
-        d_pos,
-        d_image,
-        box,
-        tlist,
-        tpos_list,
-        tlist_idx,
-        n_triangles_list);
+    hipLaunchKernelGGL((gpu_compute_volume_constraint_volume_kernel),
+                       dim3(grid),
+                       dim3(threads),
+                       block_size * sizeof(Scalar),
+                       0,
+                       d_sum_partial_volume,
+                       N,
+                       d_pos,
+                       d_image,
+                       box,
+                       tlist,
+                       tpos_list,
+                       tlist_idx,
+                       n_triangles_list);
 
     hipLaunchKernelGGL((gpu_volume_reduce_partial_sum_kernel),
                        dim3(grid1),
@@ -265,10 +265,10 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
     // read in the position of our b-particle from the a-b-c triplet. (MEM TRANSFER: 16 bytes)
     Scalar4 postype = __ldg(d_pos + idx);
     Scalar3 pos_a = make_scalar3(postype.x, postype.y, postype.z);
-    int3 image_a = d_image[idx]; 
+    int3 image_a = d_image[idx];
     pos_a = box.shift(pos_a, image_a);
-    
-    Scalar4 force = make_scalar4(Scalar(0.0),Scalar(0.0),Scalar(0.0),Scalar(0.0));
+
+    Scalar4 force = make_scalar4(Scalar(0.0), Scalar(0.0), Scalar(0.0), Scalar(0.0));
 
     // initialize the virial to 0
     Scalar virial[6];
@@ -282,7 +282,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
 
         int cur_triangle_b = cur_triangle.idx[0];
         int cur_triangle_c = cur_triangle.idx[1];
-        int cur_triangle_type = cur_triangle.idx[5];
+        int cur_triangle_type = cur_triangle.idx[2];
 
         // get the angle parameters (MEM TRANSFER: 8 bytes)
         Scalar2 params = __ldg(d_params + cur_triangle_type);
@@ -300,13 +300,13 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
         // get the b-particle's position (MEM TRANSFER: 16 bytes)
         Scalar4 bb_postype = d_pos[cur_triangle_b];
         Scalar3 pos_b = make_scalar3(bb_postype.x, bb_postype.y, bb_postype.z);
-        int3 image_b = d_image[cur_triangle_b]; 
+        int3 image_b = d_image[cur_triangle_b];
         pos_b = box.shift(pos_b, image_b);
 
         // get the c-particle's position (MEM TRANSFER: 16 bytes)
         Scalar4 cc_postype = d_pos[cur_triangle_c];
         Scalar3 pos_c = make_scalar3(cc_postype.x, cc_postype.y, cc_postype.z);
-        int3 image_c = d_image[cur_triangle_c]; 
+        int3 image_c = d_image[cur_triangle_c];
         pos_c = box.shift(pos_c, image_c);
 
         vec3<Scalar> dVol;

From 26c95e38a34041756189ca4cdf816a5983d508f5 Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Mon, 9 Jan 2023 19:32:43 -0500
Subject: [PATCH 18/50] fix Volume GPU code

---
 hoomd/md/VolumeConservationMeshForceCompute.h |  7 +-
 .../VolumeConservationMeshForceComputeGPU.cc  | 13 ++-
 .../VolumeConservationMeshForceComputeGPU.cu  | 81 +++++++++++--------
 .../VolumeConservationMeshForceComputeGPU.cuh |  3 +-
 .../VolumeConservationMeshForceComputeGPU.h   |  9 +++
 5 files changed, 70 insertions(+), 43 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceCompute.h b/hoomd/md/VolumeConservationMeshForceCompute.h
index d0100eb458..320070c788 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.h
+++ b/hoomd/md/VolumeConservationMeshForceCompute.h
@@ -76,12 +76,7 @@ class PYBIND11_EXPORT VolumeConservationMeshForceCompute : public ForceCompute
     /// Get the parameters for a type
     pybind11::dict getParams(std::string type);
 
-    // Scalar getVolume()
-    //     {
-    //     return m_volume[0];
-    //     };
-
-    pybind11::array_t<Scalar> getVolume()
+    virtual pybind11::array_t<Scalar> getVolume()
         {
         return pybind11::array(m_mesh_data->getMeshTriangleData()->getNTypes(), m_volume);
         }
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index 288b6aac4b..a546e616d9 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -33,6 +33,10 @@ VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
     GPUArray<Scalar2> params(this->m_mesh_data->getMeshTriangleData()->getNTypes(), m_exec_conf);
     m_params.swap(params);
 
+    // allocate and zero device memory
+    GPUArray<Scalar> volume_GPU(this->m_mesh_data->getMeshTriangleData()->getNTypes(), m_exec_conf);
+    m_volume_GPU.swap(volume_GPU);
+
     // allocate flags storage on the GPU
     GPUArray<unsigned int> flags(1, this->m_exec_conf);
     m_flags.swap(flags);
@@ -101,6 +105,8 @@ void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
     ArrayHandle<Scalar> d_virial(m_virial, access_location::device, access_mode::overwrite);
     ArrayHandle<Scalar2> d_params(m_params, access_location::device, access_mode::read);
 
+    ArrayHandle<Scalar> d_volume(m_volume_GPU, access_location::device, access_mode::read);
+
     // access the flags array for overwriting
     ArrayHandle<unsigned int> d_flags(m_flags, access_location::device, access_mode::readwrite);
 
@@ -112,7 +118,7 @@ void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
                                                 d_pos.data,
                                                 d_image.data,
                                                 box,
-                                                m_volume,
+                                                d_volume.data,
                                                 d_gpu_meshtrianglelist.data,
                                                 d_gpu_meshtriangle_pos_list.data,
                                                 gpu_table_indexer,
@@ -178,6 +184,7 @@ void VolumeConservationMeshForceComputeGPU::computeVolume()
     kernel::gpu_compute_volume_constraint_volume(d_sumVol.data,
                                                  d_partial_sumVol.data,
                                                  m_pdata->getN(),
+                                                 m_mesh_data->getMeshTriangleData()->getNTypes(),
                                                  d_pos.data,
                                                  d_image.data,
                                                  box,
@@ -194,6 +201,7 @@ void VolumeConservationMeshForceComputeGPU::computeVolume()
         }
 
     ArrayHandle<Scalar> h_sumVol(m_sum, access_location::host, access_mode::read);
+    ArrayHandle<Scalar> h_volume(m_volume_GPU, access_location::host, access_mode::overwrite);
 #ifdef ENABLE_MPI
     if (m_sysdef->isDomainDecomposed())
         {
@@ -205,7 +213,8 @@ void VolumeConservationMeshForceComputeGPU::computeVolume()
                       m_exec_conf->getMPICommunicator());
         }
 #endif
-    m_volume = h_sumVol.data[0];
+    for (unsigned int i = 0; i <  m_mesh_data->getMeshTriangleData()->getNTypes(); i++)
+    	h_volume.data[i] = h_sumVol.data[i];
     }
 
 namespace detail
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index 4268ac2113..3cbbbfd645 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -37,6 +37,7 @@ namespace kernel
 */
 __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_sum_volume,
                                                             const unsigned int N,
+                                                            const unsigned int tN,
                                                             const Scalar4* d_pos,
                                                             const int3* d_image,
                                                             BoxDim box,
@@ -50,7 +51,9 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
 
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
 
-    Scalar volume_transfer = 0;
+    Scalar volume = 0;
+
+    unsigned int cur_triangle_type = 0;
 
     if (idx < N)
         {
@@ -60,14 +63,13 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
         int3 image_a = d_image[idx];
         pos_a = box.shift(pos_a, image_a);
 
-        volume_transfer = 0;
-
         for (int triangle_idx = 0; triangle_idx < n_triangles; triangle_idx++)
             {
             group_storage<3> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
 
             int cur_triangle_b = cur_triangle.idx[0];
             int cur_triangle_c = cur_triangle.idx[1];
+            cur_triangle_type = cur_triangle.idx[2];
 
             int cur_triangle_abc = tpos_list[tlist_idx(idx, triangle_idx)];
 
@@ -97,11 +99,12 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
                 dVol.z = pos_c.x * pos_b.y - pos_c.y * pos_b.x;
                 }
             Scalar Vol = dVol.x * pos_a.x + dVol.y * pos_a.y + dVol.z * pos_a.z;
-            volume_transfer += Vol / 18.0;
+            volume += Vol / 18.0;
             }
         }
+    
+    volume_sdata[threadIdx.x*tN + cur_triangle_type] = volume;
 
-    volume_sdata[threadIdx.x] = volume_transfer;
     __syncthreads();
 
     // reduce the sum in parallel
@@ -109,7 +112,10 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
     while (offs > 0)
         {
         if (threadIdx.x < offs)
-            volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
+	    {
+            for ( int i_types = 0; i_types < tN; i_types++)
+                 volume_sdata[threadIdx.x*tN + i_types] += volume_sdata[(threadIdx.x + offs)*tN + i_types];
+	    }
         offs >>= 1;
         __syncthreads();
         }
@@ -117,7 +123,8 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
     // write out our partial sum
     if (threadIdx.x == 0)
         {
-        d_partial_sum_volume[blockIdx.x] = volume_sdata[0];
+        for ( int i_types = 0; i_types < tN; i_types++)
+            d_partial_sum_volume[blockIdx.x*tN+i_types] = volume_sdata[i_types];
         }
     }
 
@@ -127,38 +134,41 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
     \param num_blocks Number of blocks to execute
 */
 __global__ void
-gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum, Scalar* d_partial_sum, unsigned int num_blocks)
+gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum, Scalar* d_partial_sum, unsigned int tN, unsigned int num_blocks)
     {
-    Scalar sum = Scalar(0.0);
     HIP_DYNAMIC_SHARED(char, s_data)
     Scalar* volume_sdata = (Scalar*)&s_data[0];
 
     // sum up the values in the partial sum via a sliding window
-    for (int start = 0; start < num_blocks; start += blockDim.x)
-        {
-        __syncthreads();
-        if (start + threadIdx.x < num_blocks)
-            volume_sdata[threadIdx.x] = d_partial_sum[start + threadIdx.x];
-        else
-            volume_sdata[threadIdx.x] = Scalar(0.0);
-        __syncthreads();
-
-        // reduce the sum in parallel
-        int offs = blockDim.x >> 1;
-        while (offs > 0)
+    for ( int i_types = 0; i_types < tN; i_types++)
+	{
+        Scalar sum = Scalar(0.0);
+        for (int start = 0; start < num_blocks; start += blockDim.x)
             {
-            if (threadIdx.x < offs)
-                volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
-            offs >>= 1;
+            __syncthreads();
+            if (start + threadIdx.x < num_blocks)
+                volume_sdata[threadIdx.x] = d_partial_sum[(start + threadIdx.x)*tN+i_types];
+            else
+                volume_sdata[threadIdx.x] = Scalar(0.0);
+            __syncthreads();
+
+            // reduce the sum in parallel
+            int offs = blockDim.x >> 1;
+            while (offs > 0)
+                {
+                if (threadIdx.x < offs)
+                   volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
+                offs >>= 1;
+                }
+
+            // everybody sums up sum2K
+            sum += volume_sdata[0];
             }
 
-        // everybody sums up sum2K
-        sum += volume_sdata[0];
+        if (threadIdx.x == 0)
+            d_sum[i_types] = sum;
         }
-
-    if (threadIdx.x == 0)
-        *d_sum = sum;
-    }
+   }
 
 /*! \param d_sigma Device memory to write per paricle sigma
     \param d_sigma_dash Device memory to write per particle sigma_dash
@@ -178,6 +188,7 @@ gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum, Scalar* d_partial_sum, unsig
 hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                                                 Scalar* d_sum_partial_volume,
                                                 const unsigned int N,
+                                                const unsigned int tN,
                                                 const Scalar4* d_pos,
                                                 const int3* d_image,
                                                 const BoxDim& box,
@@ -200,6 +211,7 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                        0,
                        d_sum_partial_volume,
                        N,
+                       tN,
                        d_pos,
                        d_image,
                        box,
@@ -213,8 +225,9 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                        dim3(threads),
                        block_size * sizeof(Scalar),
                        0,
-                       &d_sum_volume[0],
+                       d_sum_volume,
                        d_sum_partial_volume,
+		       tN,
                        num_blocks);
 
     return hipSuccess;
@@ -244,7 +257,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
                                                            const Scalar4* d_pos,
                                                            const int3* d_image,
                                                            BoxDim box,
-                                                           const Scalar volume,
+                                                           const Scalar* volume,
                                                            const group_storage<3>* tlist,
                                                            const unsigned int* tpos_list,
                                                            const Index2D tlist_idx,
@@ -289,7 +302,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
         Scalar K = params.x;
         Scalar V0 = params.y;
 
-        Scalar VolDiff = volume - V0;
+        Scalar VolDiff = volume[0] - V0;
 
         Scalar energy = K * VolDiff * VolDiff / (2 * V0 * N);
 
@@ -376,7 +389,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                                                const Scalar4* d_pos,
                                                const int3* d_image,
                                                const BoxDim& box,
-                                               const Scalar volume,
+                                               const Scalar* volume,
                                                const group_storage<3>* tlist,
                                                const unsigned int* tpos_list,
                                                const Index2D tlist_idx,
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
index eedae31643..7f39f14b4d 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
@@ -25,6 +25,7 @@ namespace kernel
 hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                                                 Scalar* d_sum_partial_volume,
                                                 const unsigned int N,
+                                                const unsigned int tN,
                                                 const Scalar4* d_pos,
                                                 const int3* d_image,
                                                 const BoxDim& box,
@@ -43,7 +44,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                                                const Scalar4* d_pos,
                                                const int3* d_image,
                                                const BoxDim& box,
-                                               const Scalar volume,
+                                               const Scalar* volume,
                                                const group_storage<3>* tlist,
                                                const unsigned int* tpos_list,
                                                const Index2D tlist_idx,
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.h b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
index 8e6a8b62d8..fd1b2a83aa 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.h
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
@@ -39,6 +39,12 @@ class PYBIND11_EXPORT VolumeConservationMeshForceComputeGPU
     //! Set the parameters
     virtual void setParams(unsigned int type, Scalar K, Scalar V0);
 
+    virtual pybind11::array_t<Scalar> getVolume()
+        {
+        ArrayHandle<Scalar> h_volume(m_volume_GPU, access_location::host, access_mode::read);
+        return pybind11::array(m_mesh_data->getMeshTriangleData()->getNTypes(), h_volume.data);
+        }
+
     protected:
     unsigned int m_block_size; //!< block size for partial sum memory
     unsigned int m_num_blocks;       //!< number of memory blocks reserved for partial sum memory
@@ -50,11 +56,14 @@ class PYBIND11_EXPORT VolumeConservationMeshForceComputeGPU
     GPUArray<Scalar> m_partial_sum; //!< memory space for partial sum over volume
     GPUArray<Scalar> m_sum;          //!< memory space for sum over volume
 
+    GPUArray<Scalar> m_volume_GPU;          //!< memory space for sum over volume
+
     //! Actually compute the forces
     virtual void computeForces(uint64_t timestep);
 
     //! compute volumes
     virtual void computeVolume();
+
     };
 
 namespace detail

From 1aeabc376431a4f1c3284d46621816e741c80bfa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Mon, 9 Jan 2023 21:43:48 -0500
Subject: [PATCH 19/50] minor improvements

---
 .../VolumeConservationMeshForceComputeGPU.cu  | 37 ++++++++++---------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index 3cbbbfd645..fc5bed5a08 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -102,8 +102,8 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
             volume += Vol / 18.0;
             }
         }
-    
-    volume_sdata[threadIdx.x*tN + cur_triangle_type] = volume;
+
+    volume_sdata[threadIdx.x * tN + cur_triangle_type] = volume;
 
     __syncthreads();
 
@@ -112,10 +112,11 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
     while (offs > 0)
         {
         if (threadIdx.x < offs)
-	    {
-            for ( int i_types = 0; i_types < tN; i_types++)
-                 volume_sdata[threadIdx.x*tN + i_types] += volume_sdata[(threadIdx.x + offs)*tN + i_types];
-	    }
+            {
+            for (int i_types = 0; i_types < tN; i_types++)
+                volume_sdata[threadIdx.x * tN + i_types]
+                    += volume_sdata[(threadIdx.x + offs) * tN + i_types];
+            }
         offs >>= 1;
         __syncthreads();
         }
@@ -123,8 +124,8 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
     // write out our partial sum
     if (threadIdx.x == 0)
         {
-        for ( int i_types = 0; i_types < tN; i_types++)
-            d_partial_sum_volume[blockIdx.x*tN+i_types] = volume_sdata[i_types];
+        for (int i_types = 0; i_types < tN; i_types++)
+            d_partial_sum_volume[blockIdx.x * tN + i_types] = volume_sdata[i_types];
         }
     }
 
@@ -133,21 +134,23 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
     \param d_partial_sum Array containing the partial sum
     \param num_blocks Number of blocks to execute
 */
-__global__ void
-gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum, Scalar* d_partial_sum, unsigned int tN, unsigned int num_blocks)
+__global__ void gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum,
+                                                     Scalar* d_partial_sum,
+                                                     unsigned int tN,
+                                                     unsigned int num_blocks)
     {
     HIP_DYNAMIC_SHARED(char, s_data)
     Scalar* volume_sdata = (Scalar*)&s_data[0];
 
     // sum up the values in the partial sum via a sliding window
-    for ( int i_types = 0; i_types < tN; i_types++)
-	{
+    for (int i_types = 0; i_types < tN; i_types++)
+        {
         Scalar sum = Scalar(0.0);
         for (int start = 0; start < num_blocks; start += blockDim.x)
             {
             __syncthreads();
             if (start + threadIdx.x < num_blocks)
-                volume_sdata[threadIdx.x] = d_partial_sum[(start + threadIdx.x)*tN+i_types];
+                volume_sdata[threadIdx.x] = d_partial_sum[(start + threadIdx.x) * tN + i_types];
             else
                 volume_sdata[threadIdx.x] = Scalar(0.0);
             __syncthreads();
@@ -157,7 +160,7 @@ gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum, Scalar* d_partial_sum, unsig
             while (offs > 0)
                 {
                 if (threadIdx.x < offs)
-                   volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
+                    volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
                 offs >>= 1;
                 }
 
@@ -168,7 +171,7 @@ gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum, Scalar* d_partial_sum, unsig
         if (threadIdx.x == 0)
             d_sum[i_types] = sum;
         }
-   }
+    }
 
 /*! \param d_sigma Device memory to write per paricle sigma
     \param d_sigma_dash Device memory to write per particle sigma_dash
@@ -227,7 +230,7 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                        0,
                        d_sum_volume,
                        d_sum_partial_volume,
-		       tN,
+                       tN,
                        num_blocks);
 
     return hipSuccess;
@@ -302,7 +305,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
         Scalar K = params.x;
         Scalar V0 = params.y;
 
-        Scalar VolDiff = volume[0] - V0;
+        Scalar VolDiff = volume[cur_triangle_type] - V0;
 
         Scalar energy = K * VolDiff * VolDiff / (2 * V0 * N);
 

From e2cc0123c9a30caba9b1bcf8435821964a78ef6b Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Thu, 12 Jan 2023 18:06:27 -0500
Subject: [PATCH 20/50] update GPU Volume code

---
 .../md/VolumeConservationMeshForceCompute.cc  |  2 +
 .../VolumeConservationMeshForceComputeGPU.cc  | 35 +++++----
 .../VolumeConservationMeshForceComputeGPU.cu  | 72 ++++++++++---------
 .../VolumeConservationMeshForceComputeGPU.cuh |  1 +
 hoomd/md/pytest/test_meshpotential.py         | 27 +++----
 5 files changed, 77 insertions(+), 60 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceCompute.cc b/hoomd/md/VolumeConservationMeshForceCompute.cc
index 1d37989647..eac324546d 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.cc
+++ b/hoomd/md/VolumeConservationMeshForceCompute.cc
@@ -45,8 +45,10 @@ VolumeConservationMeshForceCompute::~VolumeConservationMeshForceCompute()
 
     delete[] m_K;
     delete[] m_V0;
+    delete[] m_volume;
     m_K = NULL;
     m_V0 = NULL;
+    m_volume = NULL;
     }
 
 /*! \param type Type of the angle to set parameters for
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index a546e616d9..6f09407703 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -51,8 +51,8 @@ VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
     m_block_size = 256;
     unsigned int group_size = m_pdata->getN();
     m_num_blocks = group_size / m_block_size;
-    m_num_blocks *= this->m_mesh_data->getMeshTriangleData()->getNTypes();
     m_num_blocks += 1;
+    m_num_blocks *= this->m_mesh_data->getMeshTriangleData()->getNTypes();
     GPUArray<Scalar> partial_sum(m_num_blocks, m_exec_conf);
     m_partial_sum.swap(partial_sum);
 
@@ -181,19 +181,26 @@ void VolumeConservationMeshForceComputeGPU::computeVolume()
                                          access_mode::overwrite);
     ArrayHandle<Scalar> d_sumVol(m_sum, access_location::device, access_mode::overwrite);
 
-    kernel::gpu_compute_volume_constraint_volume(d_sumVol.data,
-                                                 d_partial_sumVol.data,
-                                                 m_pdata->getN(),
-                                                 m_mesh_data->getMeshTriangleData()->getNTypes(),
-                                                 d_pos.data,
-                                                 d_image.data,
-                                                 box,
-                                                 d_gpu_meshtrianglelist.data,
-                                                 d_gpu_meshtriangle_pos_list.data,
-                                                 gpu_table_indexer,
-                                                 d_gpu_n_meshtriangle.data,
-                                                 m_block_size,
-                                                 m_num_blocks);
+    unsigned int NTypes =  m_mesh_data->getMeshTriangleData()->getNTypes();
+
+    for( unsigned int tid = 0; tid < NTypes; tid++)
+    	{
+
+         kernel::gpu_compute_volume_constraint_volume(d_sumVol.data,
+                                                      d_partial_sumVol.data,
+                                                      m_pdata->getN(),
+                                                      m_mesh_data->getMeshTriangleData()->getNTypes(),
+             					      tid,
+                                                      d_pos.data,
+                                                      d_image.data,
+                                                      box,
+                                                      d_gpu_meshtrianglelist.data,
+                                                      d_gpu_meshtriangle_pos_list.data,
+                                                      gpu_table_indexer,
+                                                      d_gpu_n_meshtriangle.data,
+                                                      m_block_size,
+                                                      m_num_blocks);
+        }
 
     if (this->m_exec_conf->isCUDAErrorCheckingEnabled())
         {
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index fc5bed5a08..20861fa4ff 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -38,6 +38,7 @@ namespace kernel
 __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_sum_volume,
                                                             const unsigned int N,
                                                             const unsigned int tN,
+                                                            const unsigned int type_id,
                                                             const Scalar4* d_pos,
                                                             const int3* d_image,
                                                             BoxDim box,
@@ -51,9 +52,7 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
 
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
 
-    Scalar volume = 0;
-
-    unsigned int cur_triangle_type = 0;
+    Scalar volume_transfer = 0;
 
     if (idx < N)
         {
@@ -62,14 +61,19 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
         Scalar3 pos_a = make_scalar3(postype.x, postype.y, postype.z);
         int3 image_a = d_image[idx];
         pos_a = box.shift(pos_a, image_a);
+    
 
         for (int triangle_idx = 0; triangle_idx < n_triangles; triangle_idx++)
             {
             group_storage<3> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
 
+            int cur_triangle_type = cur_triangle.idx[2];
+
+	    if(cur_triangle_type != type_id)
+		    continue;
+
             int cur_triangle_b = cur_triangle.idx[0];
             int cur_triangle_c = cur_triangle.idx[1];
-            cur_triangle_type = cur_triangle.idx[2];
 
             int cur_triangle_abc = tpos_list[tlist_idx(idx, triangle_idx)];
 
@@ -99,11 +103,12 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
                 dVol.z = pos_c.x * pos_b.y - pos_c.y * pos_b.x;
                 }
             Scalar Vol = dVol.x * pos_a.x + dVol.y * pos_a.y + dVol.z * pos_a.z;
-            volume += Vol / 18.0;
+            volume_transfer += Vol / 18.0;
             }
         }
 
-    volume_sdata[threadIdx.x * tN + cur_triangle_type] = volume;
+
+    volume_sdata[threadIdx.x] = volume_transfer;
 
     __syncthreads();
 
@@ -113,9 +118,7 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
         {
         if (threadIdx.x < offs)
             {
-            for (int i_types = 0; i_types < tN; i_types++)
-                volume_sdata[threadIdx.x * tN + i_types]
-                    += volume_sdata[(threadIdx.x + offs) * tN + i_types];
+            volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
             }
         offs >>= 1;
         __syncthreads();
@@ -124,8 +127,7 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
     // write out our partial sum
     if (threadIdx.x == 0)
         {
-        for (int i_types = 0; i_types < tN; i_types++)
-            d_partial_sum_volume[blockIdx.x * tN + i_types] = volume_sdata[i_types];
+        d_partial_sum_volume[blockIdx.x + tN * type_id] = volume_sdata[0];
         }
     }
 
@@ -137,40 +139,38 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
 __global__ void gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum,
                                                      Scalar* d_partial_sum,
                                                      unsigned int tN,
+                                                     unsigned int type_id,
                                                      unsigned int num_blocks)
     {
     HIP_DYNAMIC_SHARED(char, s_data)
     Scalar* volume_sdata = (Scalar*)&s_data[0];
 
     // sum up the values in the partial sum via a sliding window
-    for (int i_types = 0; i_types < tN; i_types++)
+    Scalar sum = Scalar(0.0);
+    for (int start = 0; start < num_blocks; start += blockDim.x)
         {
-        Scalar sum = Scalar(0.0);
-        for (int start = 0; start < num_blocks; start += blockDim.x)
-            {
-            __syncthreads();
-            if (start + threadIdx.x < num_blocks)
-                volume_sdata[threadIdx.x] = d_partial_sum[(start + threadIdx.x) * tN + i_types];
-            else
-                volume_sdata[threadIdx.x] = Scalar(0.0);
-            __syncthreads();
-
-            // reduce the sum in parallel
-            int offs = blockDim.x >> 1;
-            while (offs > 0)
-                {
-                if (threadIdx.x < offs)
-                    volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
-                offs >>= 1;
-                }
+        __syncthreads();
+        if (start + threadIdx.x < num_blocks)
+            volume_sdata[threadIdx.x] = d_partial_sum[start + threadIdx.x + tN * type_id];
+        else
+            volume_sdata[threadIdx.x] = Scalar(0.0);
+        __syncthreads();
 
-            // everybody sums up sum2K
-            sum += volume_sdata[0];
+        // reduce the sum in parallel
+        int offs = blockDim.x >> 1;
+        while (offs > 0)
+            {
+            if (threadIdx.x < offs)
+                volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
+            offs >>= 1;
             }
 
-        if (threadIdx.x == 0)
-            d_sum[i_types] = sum;
+        // everybody sums up sum2K
+        sum += volume_sdata[0];
         }
+
+    if (threadIdx.x == 0)
+        d_sum[type_id] = sum;
     }
 
 /*! \param d_sigma Device memory to write per paricle sigma
@@ -192,6 +192,7 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                                                 Scalar* d_sum_partial_volume,
                                                 const unsigned int N,
                                                 const unsigned int tN,
+                                                const unsigned int type_id,
                                                 const Scalar4* d_pos,
                                                 const int3* d_image,
                                                 const BoxDim& box,
@@ -215,6 +216,7 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                        d_sum_partial_volume,
                        N,
                        tN,
+                       type_id,
                        d_pos,
                        d_image,
                        box,
@@ -231,6 +233,7 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                        d_sum_volume,
                        d_sum_partial_volume,
                        tN,
+                       type_id,
                        num_blocks);
 
     return hipSuccess;
@@ -275,6 +278,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
     if (idx >= N)
         return;
 
+
     // load in the length of the list for this thread (MEM TRANSFER: 4 bytes)
     int n_triangles = n_triangles_list[idx];
 
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
index 7f39f14b4d..8c88cbc01a 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
@@ -26,6 +26,7 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                                                 Scalar* d_sum_partial_volume,
                                                 const unsigned int N,
                                                 const unsigned int tN,
+                                                const unsigned int type_id,
                                                 const Scalar4* d_pos,
                                                 const int3* d_image,
                                                 const BoxDim& box,
diff --git a/hoomd/md/pytest/test_meshpotential.py b/hoomd/md/pytest/test_meshpotential.py
index b616ca5c58..de74b04420 100644
--- a/hoomd/md/pytest/test_meshpotential.py
+++ b/hoomd/md/pytest/test_meshpotential.py
@@ -146,7 +146,7 @@ def test_after_attaching(tetrahedron_snapshot_factory, simulation_factory,
 
     mesh = hoomd.mesh.Mesh()
     mesh.type_ids = [0, 0, 0, 0]
-    mesh.triangles = [[0, 1, 2], [0, 1, 3], [0, 2, 3], [1, 2, 3]]
+    mesh.triangles = [[2, 1, 0], [0, 1, 3], [2, 0, 3], [1, 2, 3]]
 
     mesh_potential = mesh_potential_cls(mesh)
     mesh_potential.params["mesh"] = potential_kwargs
@@ -175,14 +175,15 @@ def test_after_attaching(tetrahedron_snapshot_factory, simulation_factory,
 @pytest.mark.parametrize("mesh_potential_cls, potential_kwargs",
                          get_mesh_potential_and_args())
 def test_multiple_types(tetrahedron_snapshot_factory, simulation_factory,
-                        mesh_potential_cls, potential_kwargs):
-
-    sim = simulation_factory(tetrahedron_snapshot_factory(d=0.969, L=5))
+                         mesh_potential_cls, potential_kwargs):
+    
+    snap = tetrahedron_snapshot_factory(d=0.969, L=5)
+    sim = simulation_factory(snap)
 
     mesh = hoomd.mesh.Mesh()
-    mesh.types = ["mesh", "patch"]
-    mesh.type_ids = [0, 0, 0, 1]
-    mesh.triangles = [[0, 1, 2], [0, 1, 3], [0, 2, 3], [1, 2, 3]]
+    mesh.types = ["mesh"]
+    mesh.type_ids = [0, 0, 0, 0]
+    mesh.triangles = [[2, 1, 0], [0, 1, 3], [2, 0, 3], [1, 2, 3]]
 
     mesh_bond_potential = mesh_potential_cls(mesh)
     mesh_bond_potential.all_params = potential_kwargs
@@ -202,9 +203,9 @@ def test_multiple_types(tetrahedron_snapshot_factory, simulation_factory,
         np.testing.assert_allclose(mesh_bond_potential.params["mesh"][key],
                                    potential_kwargs[key],
                                    rtol=1e-6)
-        np.testing.assert_allclose(mesh_bond_potential.params["patch"][key],
-                                   potential_kwargs[key],
-                                   rtol=1e-6)
+        #np.testing.assert_allclose(mesh_bond_potential.params["patch"][key],
+        #                           potential_kwargs[key],
+        #                           rtol=1e-6)
 
     mesh1 = hoomd.mesh.Mesh()
     with pytest.raises(RuntimeError):
@@ -216,13 +217,15 @@ def test_multiple_types(tetrahedron_snapshot_factory, simulation_factory,
 def test_forces_and_energies(tetrahedron_snapshot_factory, simulation_factory,
                              mesh_potential_cls, potential_kwargs, force,
                              energy):
+
     snap = tetrahedron_snapshot_factory(d=0.969, L=5)
     sim = simulation_factory(snap)
 
     mesh = hoomd.mesh.Mesh()
     mesh.types = ["mesh", "patch"]
-    mesh.type_ids = [0, 0, 0, 0]
-    mesh.triangles = [[0, 1, 2], [0, 1, 3], [0, 2, 3], [1, 2, 3]]
+    #mesh.types = ["mesh"]
+    mesh.type_ids = [1, 1, 1, 1]
+    mesh.triangles = [[2, 1, 0], [0, 1, 3], [2, 0, 3], [1, 2, 3]]
 
     mesh_potential = mesh_potential_cls(mesh)
     mesh_potential.params["mesh"] = potential_kwargs

From f26079a2ed88ea2a37b1ce8eb6757b05b4cbc3b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Thu, 2 Feb 2023 11:18:08 -0500
Subject: [PATCH 21/50] remove old file

---
 hoomd/md/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hoomd/md/CMakeLists.txt b/hoomd/md/CMakeLists.txt
index 0005f03568..a9d4ae68ad 100644
--- a/hoomd/md/CMakeLists.txt
+++ b/hoomd/md/CMakeLists.txt
@@ -232,7 +232,6 @@ list(APPEND _md_sources ActiveForceComputeGPU.cc
                            TableDihedralForceComputeGPU.cc
                            TwoStepBDGPU.cc
                            TwoStepLangevinGPU.cc
-                           TwoStepNPTMTKGPU.cc
                            TwoStepNVEGPU.cc
 			   VolumeConservationMeshForceComputeGPU.cc
                            TwoStepConstantVolumeGPU.cc

From ba76983f60c671e28dc6a95e84978a7d44c4f3b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Thu, 2 Feb 2023 11:19:19 -0500
Subject: [PATCH 22/50] remove another old file

---
 hoomd/md/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hoomd/md/CMakeLists.txt b/hoomd/md/CMakeLists.txt
index a9d4ae68ad..8e079b9d4e 100644
--- a/hoomd/md/CMakeLists.txt
+++ b/hoomd/md/CMakeLists.txt
@@ -232,7 +232,6 @@ list(APPEND _md_sources ActiveForceComputeGPU.cc
                            TableDihedralForceComputeGPU.cc
                            TwoStepBDGPU.cc
                            TwoStepLangevinGPU.cc
-                           TwoStepNVEGPU.cc
 			   VolumeConservationMeshForceComputeGPU.cc
                            TwoStepConstantVolumeGPU.cc
                            TwoStepConstantPressureGPU.cc

From c8a842259b94e8ffcc5bbf80a5772aa57deb6e88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Thu, 2 Feb 2023 11:20:58 -0500
Subject: [PATCH 23/50] another removed file

---
 hoomd/md/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hoomd/md/CMakeLists.txt b/hoomd/md/CMakeLists.txt
index 8e079b9d4e..2d8f810cde 100644
--- a/hoomd/md/CMakeLists.txt
+++ b/hoomd/md/CMakeLists.txt
@@ -190,7 +190,6 @@ set(_md_headers ActiveForceComputeGPU.h
                 TwoStepRATTLENVEGPU.h
                 TwoStepRATTLENVEGPU.cuh
                 TwoStepRATTLENVE.h
-                TwoStepNVEGPU.h
                 TwoStepNVE.h
                 TwoStepConstantVolume.h
                 TwoStepConstantVolumeGPU.h

From bf24b1bf49778e21ae34c5f1b9c2f11a762fefa3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Thu, 2 Feb 2023 11:22:57 -0500
Subject: [PATCH 24/50] another removed file

---
 hoomd/md/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hoomd/md/CMakeLists.txt b/hoomd/md/CMakeLists.txt
index 2d8f810cde..6cf9efc015 100644
--- a/hoomd/md/CMakeLists.txt
+++ b/hoomd/md/CMakeLists.txt
@@ -190,7 +190,6 @@ set(_md_headers ActiveForceComputeGPU.h
                 TwoStepRATTLENVEGPU.h
                 TwoStepRATTLENVEGPU.cuh
                 TwoStepRATTLENVE.h
-                TwoStepNVE.h
                 TwoStepConstantVolume.h
                 TwoStepConstantVolumeGPU.h
                 TwoStepConstantPressure.h

From 8658228cce0e75ca78e187141a2aa8b45d5c90b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Thu, 2 Feb 2023 11:29:40 -0500
Subject: [PATCH 25/50] fix pytest

---
 hoomd/md/pytest/test_meshpotential.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hoomd/md/pytest/test_meshpotential.py b/hoomd/md/pytest/test_meshpotential.py
index 793fa9981d..b982656338 100644
--- a/hoomd/md/pytest/test_meshpotential.py
+++ b/hoomd/md/pytest/test_meshpotential.py
@@ -181,7 +181,7 @@ def test_multiple_types(tetrahedron_snapshot_factory, simulation_factory,
     sim = simulation_factory(snap)
 
     mesh = hoomd.mesh.Mesh()
-    mesh.types = ["mesh"]
+    mesh.types = ["mesh", "patch"]
     mesh.type_ids = [0, 0, 0, 1]
     mesh.triangles = [[2, 1, 0], [0, 1, 3], [2, 0, 3], [1, 2, 3]]
 

From c9b482066fb8f0df73d6f4da67aa2c2f88d38d85 Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Thu, 2 Feb 2023 13:46:36 -0500
Subject: [PATCH 26/50] improve Volume calculations

---
 .../VolumeConservationMeshForceComputeGPU.cc  |  35 +-
 .../VolumeConservationMeshForceComputeGPU.cu  | 117 ++---
 .../VolumeConservationMeshForceComputeGPU.cu1 | 460 ++++++++++++++++++
 .../VolumeConservationMeshForceComputeGPU.cuh |   1 -
 4 files changed, 534 insertions(+), 79 deletions(-)
 create mode 100644 hoomd/md/VolumeConservationMeshForceComputeGPU.cu1

diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index 6f09407703..faad184969 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -183,24 +183,19 @@ void VolumeConservationMeshForceComputeGPU::computeVolume()
 
     unsigned int NTypes =  m_mesh_data->getMeshTriangleData()->getNTypes();
 
-    for( unsigned int tid = 0; tid < NTypes; tid++)
-    	{
-
-         kernel::gpu_compute_volume_constraint_volume(d_sumVol.data,
-                                                      d_partial_sumVol.data,
-                                                      m_pdata->getN(),
-                                                      m_mesh_data->getMeshTriangleData()->getNTypes(),
-             					      tid,
-                                                      d_pos.data,
-                                                      d_image.data,
-                                                      box,
-                                                      d_gpu_meshtrianglelist.data,
-                                                      d_gpu_meshtriangle_pos_list.data,
-                                                      gpu_table_indexer,
-                                                      d_gpu_n_meshtriangle.data,
-                                                      m_block_size,
-                                                      m_num_blocks);
-        }
+    kernel::gpu_compute_volume_constraint_volume(d_sumVol.data,
+                                                 d_partial_sumVol.data,
+                                                 m_pdata->getN(),
+                                                 NTypes,
+                                                 d_pos.data,
+                                                 d_image.data,
+                                                 box,
+                                                 d_gpu_meshtrianglelist.data,
+                                                 d_gpu_meshtriangle_pos_list.data,
+                                                 gpu_table_indexer,
+                                                 d_gpu_n_meshtriangle.data,
+                                                 m_block_size,
+                                                 m_num_blocks);
 
     if (this->m_exec_conf->isCUDAErrorCheckingEnabled())
         {
@@ -214,13 +209,13 @@ void VolumeConservationMeshForceComputeGPU::computeVolume()
         {
         MPI_Allreduce(MPI_IN_PLACE,
                       &h_sumVol.data[0],
-                      1,
+                      NTypes,
                       MPI_HOOMD_SCALAR,
                       MPI_SUM,
                       m_exec_conf->getMPICommunicator());
         }
 #endif
-    for (unsigned int i = 0; i <  m_mesh_data->getMeshTriangleData()->getNTypes(); i++)
+    for (unsigned int i = 0; i < NTypes; i++)
     	h_volume.data[i] = h_sumVol.data[i];
     }
 
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index 20861fa4ff..3fda35f999 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -38,7 +38,6 @@ namespace kernel
 __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_sum_volume,
                                                             const unsigned int N,
                                                             const unsigned int tN,
-                                                            const unsigned int type_id,
                                                             const Scalar4* d_pos,
                                                             const int3* d_image,
                                                             BoxDim box,
@@ -52,7 +51,10 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
 
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
 
-    Scalar volume_transfer = 0;
+    Scalar *volume_transfer = (Scalar*)malloc(tN * sizeof *volume_transfer);
+
+    for( int i_types = 0; i_types < tN; i_types++)
+	    volume_transfer[i_types]=0;
 
     if (idx < N)
         {
@@ -67,13 +69,9 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
             {
             group_storage<3> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
 
-            int cur_triangle_type = cur_triangle.idx[2];
-
-	    if(cur_triangle_type != type_id)
-		    continue;
-
             int cur_triangle_b = cur_triangle.idx[0];
             int cur_triangle_c = cur_triangle.idx[1];
+            int cur_triangle_type = cur_triangle.idx[2];
 
             int cur_triangle_abc = tpos_list[tlist_idx(idx, triangle_idx)];
 
@@ -103,32 +101,35 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
                 dVol.z = pos_c.x * pos_b.y - pos_c.y * pos_b.x;
                 }
             Scalar Vol = dVol.x * pos_a.x + dVol.y * pos_a.y + dVol.z * pos_a.z;
-            volume_transfer += Vol / 18.0;
+            volume_transfer[cur_triangle_type] += Vol / 18.0;
             }
         }
 
 
-    volume_sdata[threadIdx.x] = volume_transfer;
-
-    __syncthreads();
-
-    // reduce the sum in parallel
-    int offs = blockDim.x >> 1;
-    while (offs > 0)
-        {
-        if (threadIdx.x < offs)
-            {
-            volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
-            }
-        offs >>= 1;
-        __syncthreads();
-        }
-
-    // write out our partial sum
-    if (threadIdx.x == 0)
-        {
-        d_partial_sum_volume[blockIdx.x + tN * type_id] = volume_sdata[0];
-        }
+    for( int i_types = 0; i_types < tN; i_types++)
+       {
+       volume_sdata[threadIdx.x]  = volume_transfer[i_types];
+
+       __syncthreads();
+
+       // reduce the sum in parallel
+       int offs = blockDim.x >> 1;
+       while (offs > 0)
+           {
+           if (threadIdx.x < offs)
+               {
+               volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
+               }
+           offs >>= 1;
+           __syncthreads();
+           }
+
+       // write out our partial sum
+       if (threadIdx.x == 0)
+           {
+           d_partial_sum_volume[blockIdx.x * tN + i_types] = volume_sdata[0];
+           }
+       }
     }
 
 //! Kernel function for reducing a partial sum to a full sum (one value)
@@ -139,38 +140,41 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
 __global__ void gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum,
                                                      Scalar* d_partial_sum,
                                                      unsigned int tN,
-                                                     unsigned int type_id,
                                                      unsigned int num_blocks)
     {
     HIP_DYNAMIC_SHARED(char, s_data)
     Scalar* volume_sdata = (Scalar*)&s_data[0];
 
-    // sum up the values in the partial sum via a sliding window
-    Scalar sum = Scalar(0.0);
-    for (int start = 0; start < num_blocks; start += blockDim.x)
-        {
-        __syncthreads();
-        if (start + threadIdx.x < num_blocks)
-            volume_sdata[threadIdx.x] = d_partial_sum[start + threadIdx.x + tN * type_id];
-        else
-            volume_sdata[threadIdx.x] = Scalar(0.0);
-        __syncthreads();
-
-        // reduce the sum in parallel
-        int offs = blockDim.x >> 1;
-        while (offs > 0)
-            {
-            if (threadIdx.x < offs)
-                volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
-            offs >>= 1;
-            }
-
-        // everybody sums up sum2K
-        sum += volume_sdata[0];
-        }
 
-    if (threadIdx.x == 0)
-        d_sum[type_id] = sum;
+    for( int i_types = 0; i_types < tN; i_types++)
+       {
+       // sum up the values in the partial sum via a sliding window
+       Scalar sum = Scalar(0.0);
+       for (int start = 0; start < num_blocks; start += blockDim.x)
+           {
+           __syncthreads();
+           if (start + threadIdx.x < num_blocks)
+               volume_sdata[threadIdx.x] = d_partial_sum[(start + threadIdx.x) * tN + i_types];
+           else
+               volume_sdata[threadIdx.x] = Scalar(0.0);
+           __syncthreads();
+
+           // reduce the sum in parallel
+           int offs = blockDim.x >> 1;
+           while (offs > 0)
+               {
+               if (threadIdx.x < offs)
+                   volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
+               offs >>= 1;
+               }
+
+           // everybody sums up sum2K
+           sum += volume_sdata[0];
+           }
+
+       if (threadIdx.x == 0)
+           d_sum[i_types] = sum;
+       }
     }
 
 /*! \param d_sigma Device memory to write per paricle sigma
@@ -192,7 +196,6 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                                                 Scalar* d_sum_partial_volume,
                                                 const unsigned int N,
                                                 const unsigned int tN,
-                                                const unsigned int type_id,
                                                 const Scalar4* d_pos,
                                                 const int3* d_image,
                                                 const BoxDim& box,
@@ -216,7 +219,6 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                        d_sum_partial_volume,
                        N,
                        tN,
-                       type_id,
                        d_pos,
                        d_image,
                        box,
@@ -233,7 +235,6 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                        d_sum_volume,
                        d_sum_partial_volume,
                        tN,
-                       type_id,
                        num_blocks);
 
     return hipSuccess;
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu1 b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu1
new file mode 100644
index 0000000000..b3bf55e106
--- /dev/null
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu1
@@ -0,0 +1,460 @@
+// Copyright (c) 2009-2022 The Regents of the University of Michigan.
+// Part of HOOMD-blue, released under the BSD 3-Clause License.
+
+#include "hip/hip_runtime.h"
+// Copyright (c) 2009-2022 The Regents of the University of Michigan.
+// Part of HOOMD-blue, released under the BSD 3-Clause License.
+
+#include "VolumeConservationMeshForceComputeGPU.cuh"
+#include "hoomd/TextureTools.h"
+#include "hoomd/VectorMath.h"
+
+#include <assert.h>
+
+#include <stdio.h>
+
+/*! \file MeshVolumeConservationGPU.cu
+    \brief Defines GPU kernel code for calculating the volume_constraint forces. Used by
+   MeshVolumeConservationComputeGPU.
+*/
+
+namespace hoomd
+    {
+namespace md
+    {
+namespace kernel
+    {
+//! Kernel for calculating volume_constraint sigmas on the GPU
+/*! \param d_sigma Device memory to write per paricle sigma
+    \param d_sigma_dash Device memory to write per particle sigma_dash
+    \param N number of particles
+    \param d_pos device array of particle positions
+    \param d_rtag device array of particle reverse tags
+    \param box Box dimensions (in GPU format) to use for periodic boundary conditions
+    \param blist List of mesh bonds stored on the GPU
+    \param d_triangles device array of mesh triangles
+    \param n_bonds_list List of numbers of mesh bonds stored on the GPU
+*/
+__global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_sum_volume,
+                                                            const unsigned int N,
+                                                            const unsigned int tN,
+                                                            const unsigned int type_id,
+                                                            const Scalar4* d_pos,
+                                                            const int3* d_image,
+                                                            BoxDim box,
+                                                            const group_storage<3>* tlist,
+                                                            const unsigned int* tpos_list,
+                                                            const Index2D tlist_idx,
+                                                            const unsigned int* n_triangles_list)
+    {
+    HIP_DYNAMIC_SHARED(char, s_data)
+    Scalar* volume_sdata = (Scalar*)&s_data[0];
+
+    int idx = blockIdx.x * blockDim.x + threadIdx.x;
+
+    Scalar *volume_transfer = (Scalar*)malloc(tN * sizeof *volume_transfer);
+
+    for( int i_types = 0; i_types < tN; i_types++)
+	    volume_transfer[i_types]=0;
+
+    if (idx < N)
+        {
+        int n_triangles = n_triangles_list[idx];
+        Scalar4 postype = __ldg(d_pos + idx);
+        Scalar3 pos_a = make_scalar3(postype.x, postype.y, postype.z);
+        int3 image_a = d_image[idx];
+        pos_a = box.shift(pos_a, image_a);
+    
+
+        for (int triangle_idx = 0; triangle_idx < n_triangles; triangle_idx++)
+            {
+            group_storage<3> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
+
+            int cur_triangle_type = cur_triangle.idx[2];
+
+            int cur_triangle_b = cur_triangle.idx[0];
+            int cur_triangle_c = cur_triangle.idx[1];
+
+            int cur_triangle_abc = tpos_list[tlist_idx(idx, triangle_idx)];
+
+            // get the b-particle's position (MEM TRANSFER: 16 bytes)
+            Scalar4 bb_postype = d_pos[cur_triangle_b];
+            Scalar3 pos_b = make_scalar3(bb_postype.x, bb_postype.y, bb_postype.z);
+            int3 image_b = d_image[cur_triangle_b];
+            pos_b = box.shift(pos_b, image_b);
+
+            // get the c-particle's position (MEM TRANSFER: 16 bytes)
+            Scalar4 cc_postype = d_pos[cur_triangle_c];
+            Scalar3 pos_c = make_scalar3(cc_postype.x, cc_postype.y, cc_postype.z);
+            int3 image_c = d_image[cur_triangle_c];
+            pos_c = box.shift(pos_c, image_c);
+
+            vec3<Scalar> dVol(0, 0, 0);
+            if (cur_triangle_abc == 1)
+                {
+                dVol.x = pos_b.y * pos_c.z - pos_b.z * pos_c.y;
+                dVol.y = pos_b.z * pos_c.x - pos_b.x * pos_c.z;
+                dVol.z = pos_b.x * pos_c.y - pos_b.y * pos_c.x;
+                }
+            else
+                {
+                dVol.x = pos_c.y * pos_b.z - pos_c.z * pos_b.y;
+                dVol.y = pos_c.z * pos_b.x - pos_c.x * pos_b.z;
+                dVol.z = pos_c.x * pos_b.y - pos_c.y * pos_b.x;
+                }
+            Scalar Vol = dVol.x * pos_a.x + dVol.y * pos_a.y + dVol.z * pos_a.z;
+            volume_transfer[cur_triangle_type] += Vol / 18.0;
+            }
+        }
+
+    for( int i_types = 0; i_types < tN; i_types++)
+        volume_sdata[threadIdx.x*tN+i_types] = volume_transfer[i_types];
+
+    __syncthreads();
+
+    // reduce the sum in parallel
+    int offs = blockDim.x >> 1;
+    while (offs > 0)
+        {
+        if (threadIdx.x < offs)
+            {
+            for( int i_types = 0; i_types < tN; i_types++)
+                 volume_sdata[threadIdx.x*tN + i_types] += volume_sdata[(threadIdx.x + offs)*tN + i_types];
+            }
+        offs >>= 1;
+        __syncthreads();
+        }
+
+    // write out our partial sum
+    if (threadIdx.x == 0)
+        {
+        for( int i_types = 0; i_types < tN; i_types++)
+            d_partial_sum_volume[blockIdx.x * tN + i_types] = volume_sdata[i_types];
+        }
+    }
+
+//! Kernel function for reducing a partial sum to a full sum (one value)
+/*! \param d_sum Placeholder for the sum
+    \param d_partial_sum Array containing the partial sum
+    \param num_blocks Number of blocks to execute
+*/
+__global__ void gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum,
+                                                     Scalar* d_partial_sum,
+                                                     unsigned int tN,
+                                                     unsigned int type_id,
+                                                     unsigned int num_blocks)
+    {
+    HIP_DYNAMIC_SHARED(char, s_data)
+    Scalar* volume_sdata = (Scalar*)&s_data[0];
+
+    Scalar *sum = (Scalar*)malloc(tN * sizeof *sum);
+
+    for( int i_types = 0; i_types < tN; i_types++)
+	    sum[i_types]=0;
+
+    for (int start = 0; start < num_blocks; start += blockDim.x)
+        {
+        __syncthreads();
+        for( int i_types = 0; i_types < tN; i_types++)
+           {
+           if (start + threadIdx.x < num_blocks)
+               volume_sdata[threadIdx.x*tN + i_types] = d_partial_sum[(start + threadIdx.x) * tN + i_types];
+           else
+               volume_sdata[threadIdx.x*tN + i_types] = Scalar(0.0);
+	   }
+        __syncthreads();
+
+        // reduce the sum in parallel
+        int offs = blockDim.x >> 1;
+        while (offs > 0)
+            {
+            if (threadIdx.x < offs)
+                for( int i_types = 0; i_types < tN; i_types++)
+                    volume_sdata[threadIdx.x*tN + i_types] += volume_sdata[(threadIdx.x + offs)*tN + i_types];
+            offs >>= 1;
+            }
+
+        // everybody sums up sum2K
+        for( int i_types = 0; i_types < tN; i_types++)
+            sum[i_types] += volume_sdata[i_types];
+        }
+
+    if (threadIdx.x == 0)
+	{
+        for( int i_types = 0; i_types < tN; i_types++)
+            d_sum[i_types] = sum[i_types];
+	}
+    }
+
+/*! \param d_sigma Device memory to write per paricle sigma
+    \param d_sigma_dash Device memory to write per particle sigma_dash
+    \param N number of particles
+    \param d_pos device array of particle positions
+    \param d_rtag device array of particle reverse tags
+    \param box Box dimensions (in GPU format) to use for periodic boundary conditions
+    \param blist List of mesh bonds stored on the GPU
+    \param d_triangles device array of mesh triangles
+    \param n_bonds_list List of numbers of mesh bonds stored on the GPU
+    \param block_size Block size to use when performing calculations
+    \param compute_capability Device compute capability (200, 300, 350, ...)
+
+    \returns Any error code resulting from the kernel launch
+    \note Always returns hipSuccess in release builds to avoid the hipDeviceSynchronize()
+*/
+hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
+                                                Scalar* d_sum_partial_volume,
+                                                const unsigned int N,
+                                                const unsigned int tN,
+                                                const unsigned int type_id,
+                                                const Scalar4* d_pos,
+                                                const int3* d_image,
+                                                const BoxDim& box,
+                                                const group_storage<3>* tlist,
+                                                const unsigned int* tpos_list,
+                                                const Index2D tlist_idx,
+                                                const unsigned int* n_triangles_list,
+                                                unsigned int block_size,
+                                                unsigned int num_blocks)
+    {
+    dim3 grid(num_blocks, 1, 1);
+    dim3 grid1(1, 1, 1);
+    dim3 threads(block_size, 1, 1);
+
+    // run the kernel
+    hipLaunchKernelGGL((gpu_compute_volume_constraint_volume_kernel),
+                       dim3(grid),
+                       dim3(threads),
+                       block_size * sizeof(Scalar),
+                       0,
+                       d_sum_partial_volume,
+                       N,
+                       tN,
+                       type_id,
+                       d_pos,
+                       d_image,
+                       box,
+                       tlist,
+                       tpos_list,
+                       tlist_idx,
+                       n_triangles_list);
+
+    hipLaunchKernelGGL((gpu_volume_reduce_partial_sum_kernel),
+                       dim3(grid1),
+                       dim3(threads),
+                       block_size * sizeof(Scalar),
+                       0,
+                       d_sum_volume,
+                       d_sum_partial_volume,
+                       tN,
+                       type_id,
+                       num_blocks);
+
+    return hipSuccess;
+    }
+
+//! Kernel for calculating volume_constraint sigmas on the GPU
+/*! \param d_force Device memory to write computed forces
+    \param d_virial Device memory to write computed virials
+    \param virial_pitch
+    \param N number of particles
+    \param d_pos device array of particle positions
+    \param d_rtag device array of particle reverse tags
+    \param box Box dimensions (in GPU format) to use for periodic boundary conditions
+    \param d_sigma Device memory to write per paricle sigma
+    \param d_sigma_dash Device memory to write per particle sigma_dash
+    \param blist List of mesh bonds stored on the GPU
+    \param d_triangles device array of mesh triangles
+    \param n_bonds_list List of numbers of mesh bonds stored on the GPU
+    \param d_params K params packed as Scalar variables
+    \param n_bond_type number of mesh bond types
+    \param d_flags Flag allocated on the device for use in checking for bonds that cannot be
+*/
+__global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
+                                                           Scalar* d_virial,
+                                                           const size_t virial_pitch,
+                                                           const unsigned int N,
+                                                           const Scalar4* d_pos,
+                                                           const int3* d_image,
+                                                           BoxDim box,
+                                                           const Scalar* volume,
+                                                           const group_storage<3>* tlist,
+                                                           const unsigned int* tpos_list,
+                                                           const Index2D tlist_idx,
+                                                           const unsigned int* n_triangles_list,
+                                                           Scalar2* d_params,
+                                                           const unsigned int n_triangle_type,
+                                                           unsigned int* d_flags)
+    {
+    // start by identifying which particle we are to handle
+    int idx = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if (idx >= N)
+        return;
+
+
+    // load in the length of the list for this thread (MEM TRANSFER: 4 bytes)
+    int n_triangles = n_triangles_list[idx];
+
+    // read in the position of our b-particle from the a-b-c triplet. (MEM TRANSFER: 16 bytes)
+    Scalar4 postype = __ldg(d_pos + idx);
+    Scalar3 pos_a = make_scalar3(postype.x, postype.y, postype.z);
+    int3 image_a = d_image[idx];
+    pos_a = box.shift(pos_a, image_a);
+
+    Scalar4 force = make_scalar4(Scalar(0.0), Scalar(0.0), Scalar(0.0), Scalar(0.0));
+
+    // initialize the virial to 0
+    Scalar virial[6];
+    for (int i = 0; i < 6; i++)
+        virial[i] = Scalar(0.0);
+
+    // loop over all triangles
+    for (int triangle_idx = 0; triangle_idx < n_triangles; triangle_idx++)
+        {
+        group_storage<3> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
+
+        int cur_triangle_b = cur_triangle.idx[0];
+        int cur_triangle_c = cur_triangle.idx[1];
+        int cur_triangle_type = cur_triangle.idx[2];
+
+        // get the angle parameters (MEM TRANSFER: 8 bytes)
+        Scalar2 params = __ldg(d_params + cur_triangle_type);
+        Scalar K = params.x;
+        Scalar V0 = params.y;
+
+        Scalar VolDiff = volume[cur_triangle_type] - V0;
+
+        Scalar energy = K * VolDiff * VolDiff / (2 * V0 * N);
+
+        VolDiff = -K / V0 * VolDiff / 6.0;
+
+        int cur_triangle_abc = tpos_list[tlist_idx(idx, triangle_idx)];
+
+        // get the b-particle's position (MEM TRANSFER: 16 bytes)
+        Scalar4 bb_postype = d_pos[cur_triangle_b];
+        Scalar3 pos_b = make_scalar3(bb_postype.x, bb_postype.y, bb_postype.z);
+        int3 image_b = d_image[cur_triangle_b];
+        pos_b = box.shift(pos_b, image_b);
+
+        // get the c-particle's position (MEM TRANSFER: 16 bytes)
+        Scalar4 cc_postype = d_pos[cur_triangle_c];
+        Scalar3 pos_c = make_scalar3(cc_postype.x, cc_postype.y, cc_postype.z);
+        int3 image_c = d_image[cur_triangle_c];
+        pos_c = box.shift(pos_c, image_c);
+
+        vec3<Scalar> dVol;
+        if (cur_triangle_abc == 1)
+            {
+            dVol.x = pos_b.y * pos_c.z - pos_b.z * pos_c.y;
+            dVol.y = pos_b.z * pos_c.x - pos_b.x * pos_c.z;
+            dVol.z = pos_b.x * pos_c.y - pos_b.y * pos_c.x;
+            }
+        else
+            {
+            dVol.x = pos_c.y * pos_b.z - pos_c.z * pos_b.y;
+            dVol.y = pos_c.z * pos_b.x - pos_c.x * pos_b.z;
+            dVol.z = pos_c.x * pos_b.y - pos_c.y * pos_b.x;
+            }
+
+        Scalar3 Fa;
+
+        Fa.x = VolDiff * dVol.x;
+        Fa.y = VolDiff * dVol.y;
+        Fa.z = VolDiff * dVol.z;
+
+        force.x += Fa.x;
+        force.y += Fa.y;
+        force.z += Fa.z;
+        force.w = energy;
+
+        virial[0] += Scalar(1. / 2.) * pos_a.x * Fa.x; // xx
+        virial[1] += Scalar(1. / 2.) * pos_a.y * Fa.x; // xy
+        virial[2] += Scalar(1. / 2.) * pos_a.z * Fa.x; // xz
+        virial[3] += Scalar(1. / 2.) * pos_a.y * Fa.y; // yy
+        virial[4] += Scalar(1. / 2.) * pos_a.z * Fa.y; // yz
+        virial[5] += Scalar(1. / 2.) * pos_a.z * Fa.z; // zz
+        }
+
+    // now that the force calculation is complete, write out the result (MEM TRANSFER: 20 bytes)
+    d_force[idx] = force;
+
+    for (unsigned int i = 0; i < 6; i++)
+        d_virial[i * virial_pitch + idx] = virial[i];
+    }
+
+/*! \param d_force Device memory to write computed forces
+    \param d_virial Device memory to write computed virials
+    \param N number of particles
+    \param d_pos device array of particle positions
+    \param d_rtag device array of particle reverse tags
+    \param box Box dimensions (in GPU format) to use for periodic boundary conditions
+    \param d_sigma Device memory to write per paricle sigma
+    \param d_sigma_dash Device memory to write per particle sigma_dash
+    \param blist List of mesh bonds stored on the GPU
+    \param d_triangles device array of mesh triangles
+    \param n_bonds_list List of numbers of mesh bonds stored on the GPU
+    \param d_params K params packed as Scalar variables
+    \param n_bond_type number of mesh bond types
+    \param block_size Block size to use when performing calculations
+    \param d_flags Flag allocated on the device for use in checking for bonds that cannot be
+    \param compute_capability Device compute capability (200, 300, 350, ...)
+
+    \returns Any error code resulting from the kernel launch
+    \note Always returns hipSuccess in release builds to avoid the hipDeviceSynchronize()
+*/
+hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
+                                               Scalar* d_virial,
+                                               const size_t virial_pitch,
+                                               const unsigned int N,
+                                               const Scalar4* d_pos,
+                                               const int3* d_image,
+                                               const BoxDim& box,
+                                               const Scalar* volume,
+                                               const group_storage<3>* tlist,
+                                               const unsigned int* tpos_list,
+                                               const Index2D tlist_idx,
+                                               const unsigned int* n_triangles_list,
+                                               Scalar2* d_params,
+                                               const unsigned int n_triangle_type,
+                                               int block_size,
+                                               unsigned int* d_flags)
+    {
+    unsigned int max_block_size;
+    hipFuncAttributes attr;
+    hipFuncGetAttributes(&attr, (const void*)gpu_compute_volume_constraint_force_kernel);
+    max_block_size = attr.maxThreadsPerBlock;
+
+    unsigned int run_block_size = min(block_size, max_block_size);
+
+    // setup the grid to run the kernel
+    dim3 grid(N / run_block_size + 1, 1, 1);
+    dim3 threads(run_block_size, 1, 1);
+
+    // run the kernel
+    hipLaunchKernelGGL((gpu_compute_volume_constraint_force_kernel),
+                       dim3(grid),
+                       dim3(threads),
+                       0,
+                       0,
+                       d_force,
+                       d_virial,
+                       virial_pitch,
+                       N,
+                       d_pos,
+                       d_image,
+                       box,
+                       volume,
+                       tlist,
+                       tpos_list,
+                       tlist_idx,
+                       n_triangles_list,
+                       d_params,
+                       n_triangle_type,
+                       d_flags);
+
+    return hipSuccess;
+    }
+
+    } // end namespace kernel
+    } // end namespace md
+    } // end namespace hoomd
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
index 8c88cbc01a..7f39f14b4d 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
@@ -26,7 +26,6 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                                                 Scalar* d_sum_partial_volume,
                                                 const unsigned int N,
                                                 const unsigned int tN,
-                                                const unsigned int type_id,
                                                 const Scalar4* d_pos,
                                                 const int3* d_image,
                                                 const BoxDim& box,

From ee26c66a36ee8e30252e91ac2dd4f87b11bbb06f Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Thu, 2 Feb 2023 13:53:35 -0500
Subject: [PATCH 27/50] remove old file

---
 .../VolumeConservationMeshForceComputeGPU.cu1 | 460 ------------------
 1 file changed, 460 deletions(-)
 delete mode 100644 hoomd/md/VolumeConservationMeshForceComputeGPU.cu1

diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu1 b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu1
deleted file mode 100644
index b3bf55e106..0000000000
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu1
+++ /dev/null
@@ -1,460 +0,0 @@
-// Copyright (c) 2009-2022 The Regents of the University of Michigan.
-// Part of HOOMD-blue, released under the BSD 3-Clause License.
-
-#include "hip/hip_runtime.h"
-// Copyright (c) 2009-2022 The Regents of the University of Michigan.
-// Part of HOOMD-blue, released under the BSD 3-Clause License.
-
-#include "VolumeConservationMeshForceComputeGPU.cuh"
-#include "hoomd/TextureTools.h"
-#include "hoomd/VectorMath.h"
-
-#include <assert.h>
-
-#include <stdio.h>
-
-/*! \file MeshVolumeConservationGPU.cu
-    \brief Defines GPU kernel code for calculating the volume_constraint forces. Used by
-   MeshVolumeConservationComputeGPU.
-*/
-
-namespace hoomd
-    {
-namespace md
-    {
-namespace kernel
-    {
-//! Kernel for calculating volume_constraint sigmas on the GPU
-/*! \param d_sigma Device memory to write per paricle sigma
-    \param d_sigma_dash Device memory to write per particle sigma_dash
-    \param N number of particles
-    \param d_pos device array of particle positions
-    \param d_rtag device array of particle reverse tags
-    \param box Box dimensions (in GPU format) to use for periodic boundary conditions
-    \param blist List of mesh bonds stored on the GPU
-    \param d_triangles device array of mesh triangles
-    \param n_bonds_list List of numbers of mesh bonds stored on the GPU
-*/
-__global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_sum_volume,
-                                                            const unsigned int N,
-                                                            const unsigned int tN,
-                                                            const unsigned int type_id,
-                                                            const Scalar4* d_pos,
-                                                            const int3* d_image,
-                                                            BoxDim box,
-                                                            const group_storage<3>* tlist,
-                                                            const unsigned int* tpos_list,
-                                                            const Index2D tlist_idx,
-                                                            const unsigned int* n_triangles_list)
-    {
-    HIP_DYNAMIC_SHARED(char, s_data)
-    Scalar* volume_sdata = (Scalar*)&s_data[0];
-
-    int idx = blockIdx.x * blockDim.x + threadIdx.x;
-
-    Scalar *volume_transfer = (Scalar*)malloc(tN * sizeof *volume_transfer);
-
-    for( int i_types = 0; i_types < tN; i_types++)
-	    volume_transfer[i_types]=0;
-
-    if (idx < N)
-        {
-        int n_triangles = n_triangles_list[idx];
-        Scalar4 postype = __ldg(d_pos + idx);
-        Scalar3 pos_a = make_scalar3(postype.x, postype.y, postype.z);
-        int3 image_a = d_image[idx];
-        pos_a = box.shift(pos_a, image_a);
-    
-
-        for (int triangle_idx = 0; triangle_idx < n_triangles; triangle_idx++)
-            {
-            group_storage<3> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
-
-            int cur_triangle_type = cur_triangle.idx[2];
-
-            int cur_triangle_b = cur_triangle.idx[0];
-            int cur_triangle_c = cur_triangle.idx[1];
-
-            int cur_triangle_abc = tpos_list[tlist_idx(idx, triangle_idx)];
-
-            // get the b-particle's position (MEM TRANSFER: 16 bytes)
-            Scalar4 bb_postype = d_pos[cur_triangle_b];
-            Scalar3 pos_b = make_scalar3(bb_postype.x, bb_postype.y, bb_postype.z);
-            int3 image_b = d_image[cur_triangle_b];
-            pos_b = box.shift(pos_b, image_b);
-
-            // get the c-particle's position (MEM TRANSFER: 16 bytes)
-            Scalar4 cc_postype = d_pos[cur_triangle_c];
-            Scalar3 pos_c = make_scalar3(cc_postype.x, cc_postype.y, cc_postype.z);
-            int3 image_c = d_image[cur_triangle_c];
-            pos_c = box.shift(pos_c, image_c);
-
-            vec3<Scalar> dVol(0, 0, 0);
-            if (cur_triangle_abc == 1)
-                {
-                dVol.x = pos_b.y * pos_c.z - pos_b.z * pos_c.y;
-                dVol.y = pos_b.z * pos_c.x - pos_b.x * pos_c.z;
-                dVol.z = pos_b.x * pos_c.y - pos_b.y * pos_c.x;
-                }
-            else
-                {
-                dVol.x = pos_c.y * pos_b.z - pos_c.z * pos_b.y;
-                dVol.y = pos_c.z * pos_b.x - pos_c.x * pos_b.z;
-                dVol.z = pos_c.x * pos_b.y - pos_c.y * pos_b.x;
-                }
-            Scalar Vol = dVol.x * pos_a.x + dVol.y * pos_a.y + dVol.z * pos_a.z;
-            volume_transfer[cur_triangle_type] += Vol / 18.0;
-            }
-        }
-
-    for( int i_types = 0; i_types < tN; i_types++)
-        volume_sdata[threadIdx.x*tN+i_types] = volume_transfer[i_types];
-
-    __syncthreads();
-
-    // reduce the sum in parallel
-    int offs = blockDim.x >> 1;
-    while (offs > 0)
-        {
-        if (threadIdx.x < offs)
-            {
-            for( int i_types = 0; i_types < tN; i_types++)
-                 volume_sdata[threadIdx.x*tN + i_types] += volume_sdata[(threadIdx.x + offs)*tN + i_types];
-            }
-        offs >>= 1;
-        __syncthreads();
-        }
-
-    // write out our partial sum
-    if (threadIdx.x == 0)
-        {
-        for( int i_types = 0; i_types < tN; i_types++)
-            d_partial_sum_volume[blockIdx.x * tN + i_types] = volume_sdata[i_types];
-        }
-    }
-
-//! Kernel function for reducing a partial sum to a full sum (one value)
-/*! \param d_sum Placeholder for the sum
-    \param d_partial_sum Array containing the partial sum
-    \param num_blocks Number of blocks to execute
-*/
-__global__ void gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum,
-                                                     Scalar* d_partial_sum,
-                                                     unsigned int tN,
-                                                     unsigned int type_id,
-                                                     unsigned int num_blocks)
-    {
-    HIP_DYNAMIC_SHARED(char, s_data)
-    Scalar* volume_sdata = (Scalar*)&s_data[0];
-
-    Scalar *sum = (Scalar*)malloc(tN * sizeof *sum);
-
-    for( int i_types = 0; i_types < tN; i_types++)
-	    sum[i_types]=0;
-
-    for (int start = 0; start < num_blocks; start += blockDim.x)
-        {
-        __syncthreads();
-        for( int i_types = 0; i_types < tN; i_types++)
-           {
-           if (start + threadIdx.x < num_blocks)
-               volume_sdata[threadIdx.x*tN + i_types] = d_partial_sum[(start + threadIdx.x) * tN + i_types];
-           else
-               volume_sdata[threadIdx.x*tN + i_types] = Scalar(0.0);
-	   }
-        __syncthreads();
-
-        // reduce the sum in parallel
-        int offs = blockDim.x >> 1;
-        while (offs > 0)
-            {
-            if (threadIdx.x < offs)
-                for( int i_types = 0; i_types < tN; i_types++)
-                    volume_sdata[threadIdx.x*tN + i_types] += volume_sdata[(threadIdx.x + offs)*tN + i_types];
-            offs >>= 1;
-            }
-
-        // everybody sums up sum2K
-        for( int i_types = 0; i_types < tN; i_types++)
-            sum[i_types] += volume_sdata[i_types];
-        }
-
-    if (threadIdx.x == 0)
-	{
-        for( int i_types = 0; i_types < tN; i_types++)
-            d_sum[i_types] = sum[i_types];
-	}
-    }
-
-/*! \param d_sigma Device memory to write per paricle sigma
-    \param d_sigma_dash Device memory to write per particle sigma_dash
-    \param N number of particles
-    \param d_pos device array of particle positions
-    \param d_rtag device array of particle reverse tags
-    \param box Box dimensions (in GPU format) to use for periodic boundary conditions
-    \param blist List of mesh bonds stored on the GPU
-    \param d_triangles device array of mesh triangles
-    \param n_bonds_list List of numbers of mesh bonds stored on the GPU
-    \param block_size Block size to use when performing calculations
-    \param compute_capability Device compute capability (200, 300, 350, ...)
-
-    \returns Any error code resulting from the kernel launch
-    \note Always returns hipSuccess in release builds to avoid the hipDeviceSynchronize()
-*/
-hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
-                                                Scalar* d_sum_partial_volume,
-                                                const unsigned int N,
-                                                const unsigned int tN,
-                                                const unsigned int type_id,
-                                                const Scalar4* d_pos,
-                                                const int3* d_image,
-                                                const BoxDim& box,
-                                                const group_storage<3>* tlist,
-                                                const unsigned int* tpos_list,
-                                                const Index2D tlist_idx,
-                                                const unsigned int* n_triangles_list,
-                                                unsigned int block_size,
-                                                unsigned int num_blocks)
-    {
-    dim3 grid(num_blocks, 1, 1);
-    dim3 grid1(1, 1, 1);
-    dim3 threads(block_size, 1, 1);
-
-    // run the kernel
-    hipLaunchKernelGGL((gpu_compute_volume_constraint_volume_kernel),
-                       dim3(grid),
-                       dim3(threads),
-                       block_size * sizeof(Scalar),
-                       0,
-                       d_sum_partial_volume,
-                       N,
-                       tN,
-                       type_id,
-                       d_pos,
-                       d_image,
-                       box,
-                       tlist,
-                       tpos_list,
-                       tlist_idx,
-                       n_triangles_list);
-
-    hipLaunchKernelGGL((gpu_volume_reduce_partial_sum_kernel),
-                       dim3(grid1),
-                       dim3(threads),
-                       block_size * sizeof(Scalar),
-                       0,
-                       d_sum_volume,
-                       d_sum_partial_volume,
-                       tN,
-                       type_id,
-                       num_blocks);
-
-    return hipSuccess;
-    }
-
-//! Kernel for calculating volume_constraint sigmas on the GPU
-/*! \param d_force Device memory to write computed forces
-    \param d_virial Device memory to write computed virials
-    \param virial_pitch
-    \param N number of particles
-    \param d_pos device array of particle positions
-    \param d_rtag device array of particle reverse tags
-    \param box Box dimensions (in GPU format) to use for periodic boundary conditions
-    \param d_sigma Device memory to write per paricle sigma
-    \param d_sigma_dash Device memory to write per particle sigma_dash
-    \param blist List of mesh bonds stored on the GPU
-    \param d_triangles device array of mesh triangles
-    \param n_bonds_list List of numbers of mesh bonds stored on the GPU
-    \param d_params K params packed as Scalar variables
-    \param n_bond_type number of mesh bond types
-    \param d_flags Flag allocated on the device for use in checking for bonds that cannot be
-*/
-__global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
-                                                           Scalar* d_virial,
-                                                           const size_t virial_pitch,
-                                                           const unsigned int N,
-                                                           const Scalar4* d_pos,
-                                                           const int3* d_image,
-                                                           BoxDim box,
-                                                           const Scalar* volume,
-                                                           const group_storage<3>* tlist,
-                                                           const unsigned int* tpos_list,
-                                                           const Index2D tlist_idx,
-                                                           const unsigned int* n_triangles_list,
-                                                           Scalar2* d_params,
-                                                           const unsigned int n_triangle_type,
-                                                           unsigned int* d_flags)
-    {
-    // start by identifying which particle we are to handle
-    int idx = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if (idx >= N)
-        return;
-
-
-    // load in the length of the list for this thread (MEM TRANSFER: 4 bytes)
-    int n_triangles = n_triangles_list[idx];
-
-    // read in the position of our b-particle from the a-b-c triplet. (MEM TRANSFER: 16 bytes)
-    Scalar4 postype = __ldg(d_pos + idx);
-    Scalar3 pos_a = make_scalar3(postype.x, postype.y, postype.z);
-    int3 image_a = d_image[idx];
-    pos_a = box.shift(pos_a, image_a);
-
-    Scalar4 force = make_scalar4(Scalar(0.0), Scalar(0.0), Scalar(0.0), Scalar(0.0));
-
-    // initialize the virial to 0
-    Scalar virial[6];
-    for (int i = 0; i < 6; i++)
-        virial[i] = Scalar(0.0);
-
-    // loop over all triangles
-    for (int triangle_idx = 0; triangle_idx < n_triangles; triangle_idx++)
-        {
-        group_storage<3> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
-
-        int cur_triangle_b = cur_triangle.idx[0];
-        int cur_triangle_c = cur_triangle.idx[1];
-        int cur_triangle_type = cur_triangle.idx[2];
-
-        // get the angle parameters (MEM TRANSFER: 8 bytes)
-        Scalar2 params = __ldg(d_params + cur_triangle_type);
-        Scalar K = params.x;
-        Scalar V0 = params.y;
-
-        Scalar VolDiff = volume[cur_triangle_type] - V0;
-
-        Scalar energy = K * VolDiff * VolDiff / (2 * V0 * N);
-
-        VolDiff = -K / V0 * VolDiff / 6.0;
-
-        int cur_triangle_abc = tpos_list[tlist_idx(idx, triangle_idx)];
-
-        // get the b-particle's position (MEM TRANSFER: 16 bytes)
-        Scalar4 bb_postype = d_pos[cur_triangle_b];
-        Scalar3 pos_b = make_scalar3(bb_postype.x, bb_postype.y, bb_postype.z);
-        int3 image_b = d_image[cur_triangle_b];
-        pos_b = box.shift(pos_b, image_b);
-
-        // get the c-particle's position (MEM TRANSFER: 16 bytes)
-        Scalar4 cc_postype = d_pos[cur_triangle_c];
-        Scalar3 pos_c = make_scalar3(cc_postype.x, cc_postype.y, cc_postype.z);
-        int3 image_c = d_image[cur_triangle_c];
-        pos_c = box.shift(pos_c, image_c);
-
-        vec3<Scalar> dVol;
-        if (cur_triangle_abc == 1)
-            {
-            dVol.x = pos_b.y * pos_c.z - pos_b.z * pos_c.y;
-            dVol.y = pos_b.z * pos_c.x - pos_b.x * pos_c.z;
-            dVol.z = pos_b.x * pos_c.y - pos_b.y * pos_c.x;
-            }
-        else
-            {
-            dVol.x = pos_c.y * pos_b.z - pos_c.z * pos_b.y;
-            dVol.y = pos_c.z * pos_b.x - pos_c.x * pos_b.z;
-            dVol.z = pos_c.x * pos_b.y - pos_c.y * pos_b.x;
-            }
-
-        Scalar3 Fa;
-
-        Fa.x = VolDiff * dVol.x;
-        Fa.y = VolDiff * dVol.y;
-        Fa.z = VolDiff * dVol.z;
-
-        force.x += Fa.x;
-        force.y += Fa.y;
-        force.z += Fa.z;
-        force.w = energy;
-
-        virial[0] += Scalar(1. / 2.) * pos_a.x * Fa.x; // xx
-        virial[1] += Scalar(1. / 2.) * pos_a.y * Fa.x; // xy
-        virial[2] += Scalar(1. / 2.) * pos_a.z * Fa.x; // xz
-        virial[3] += Scalar(1. / 2.) * pos_a.y * Fa.y; // yy
-        virial[4] += Scalar(1. / 2.) * pos_a.z * Fa.y; // yz
-        virial[5] += Scalar(1. / 2.) * pos_a.z * Fa.z; // zz
-        }
-
-    // now that the force calculation is complete, write out the result (MEM TRANSFER: 20 bytes)
-    d_force[idx] = force;
-
-    for (unsigned int i = 0; i < 6; i++)
-        d_virial[i * virial_pitch + idx] = virial[i];
-    }
-
-/*! \param d_force Device memory to write computed forces
-    \param d_virial Device memory to write computed virials
-    \param N number of particles
-    \param d_pos device array of particle positions
-    \param d_rtag device array of particle reverse tags
-    \param box Box dimensions (in GPU format) to use for periodic boundary conditions
-    \param d_sigma Device memory to write per paricle sigma
-    \param d_sigma_dash Device memory to write per particle sigma_dash
-    \param blist List of mesh bonds stored on the GPU
-    \param d_triangles device array of mesh triangles
-    \param n_bonds_list List of numbers of mesh bonds stored on the GPU
-    \param d_params K params packed as Scalar variables
-    \param n_bond_type number of mesh bond types
-    \param block_size Block size to use when performing calculations
-    \param d_flags Flag allocated on the device for use in checking for bonds that cannot be
-    \param compute_capability Device compute capability (200, 300, 350, ...)
-
-    \returns Any error code resulting from the kernel launch
-    \note Always returns hipSuccess in release builds to avoid the hipDeviceSynchronize()
-*/
-hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
-                                               Scalar* d_virial,
-                                               const size_t virial_pitch,
-                                               const unsigned int N,
-                                               const Scalar4* d_pos,
-                                               const int3* d_image,
-                                               const BoxDim& box,
-                                               const Scalar* volume,
-                                               const group_storage<3>* tlist,
-                                               const unsigned int* tpos_list,
-                                               const Index2D tlist_idx,
-                                               const unsigned int* n_triangles_list,
-                                               Scalar2* d_params,
-                                               const unsigned int n_triangle_type,
-                                               int block_size,
-                                               unsigned int* d_flags)
-    {
-    unsigned int max_block_size;
-    hipFuncAttributes attr;
-    hipFuncGetAttributes(&attr, (const void*)gpu_compute_volume_constraint_force_kernel);
-    max_block_size = attr.maxThreadsPerBlock;
-
-    unsigned int run_block_size = min(block_size, max_block_size);
-
-    // setup the grid to run the kernel
-    dim3 grid(N / run_block_size + 1, 1, 1);
-    dim3 threads(run_block_size, 1, 1);
-
-    // run the kernel
-    hipLaunchKernelGGL((gpu_compute_volume_constraint_force_kernel),
-                       dim3(grid),
-                       dim3(threads),
-                       0,
-                       0,
-                       d_force,
-                       d_virial,
-                       virial_pitch,
-                       N,
-                       d_pos,
-                       d_image,
-                       box,
-                       volume,
-                       tlist,
-                       tpos_list,
-                       tlist_idx,
-                       n_triangles_list,
-                       d_params,
-                       n_triangle_type,
-                       d_flags);
-
-    return hipSuccess;
-    }
-
-    } // end namespace kernel
-    } // end namespace md
-    } // end namespace hoomd

From d45f0f0f95e8066a76c9a2bbf06202a4f3240f97 Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Tue, 28 Feb 2023 10:43:15 -0500
Subject: [PATCH 28/50] make VolumeConstraint MPI ready

---
 .../md/VolumeConservationMeshForceCompute.cc  | 39 +++++++++++++++++--
 1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceCompute.cc b/hoomd/md/VolumeConservationMeshForceCompute.cc
index eac324546d..7dce96a840 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.cc
+++ b/hoomd/md/VolumeConservationMeshForceCompute.cc
@@ -272,8 +272,10 @@ void VolumeConservationMeshForceCompute::computeVolume()
     // get a local copy of the simulation box too
     const BoxDim& box = m_pdata->getGlobalBox();
 
-    for (unsigned int i = 0; i < m_mesh_data->getMeshTriangleData()->getNTypes(); i++)
-        m_volume[i] = 0;
+    const unsigned int n_types = m_mesh_data->getMeshTriangleData()->getNTypes();
+    std::vector<Scalar> global_volume(n_types);
+    for (unsigned int i = 0; i < n_types; i++)
+        global_volume[i] = 0;
 
     // for each of the angles
     const unsigned int size = (unsigned int)m_mesh_data->getMeshTriangleData()->getN();
@@ -307,12 +309,41 @@ void VolumeConservationMeshForceCompute::computeVolume()
         pos_b = box.shift(pos_b, h_image.data[idx_b]);
         pos_c = box.shift(pos_c, h_image.data[idx_c]);
 
-        Scalar vol_tri = dot(cross(pos_c, pos_b), pos_a) / 6.0;
+        Scalar volume_tri = dot(cross(pos_c, pos_b), pos_a) / 6.0;
 
         unsigned int triangle_type = m_mesh_data->getMeshTriangleData()->getTypeByIndex(i);
 
-        m_volume[triangle_type] += vol_tri;
+#ifdef ENABLE_MPI
+        if (m_pdata->getDomainDecomposition())
+            {
+ 	    volume_tri /= 3;
+
+	    if(idx_a < m_pdata->getN()) global_volume[triangle_type] += volume_tri;
+	    if(idx_b < m_pdata->getN()) global_volume[triangle_type] += volume_tri;
+	    if(idx_c < m_pdata->getN()) global_volume[triangle_type] += volume_tri;
+            }
+        else
+#endif
+            {
+            global_volume[triangle_type] += volume_tri;
+	    }
         }
+
+#ifdef ENABLE_MPI
+        if (m_pdata->getDomainDecomposition())
+            {
+            MPI_Allreduce(MPI_IN_PLACE,
+                          &global_volume[0],
+                          n_types,
+                          MPI_HOOMD_SCALAR,
+                          MPI_SUM,
+                          m_exec_conf->getMPICommunicator());
+            }
+#endif
+
+    	for (unsigned int i = 0; i < n_types; i++)
+        	m_volume[i] = global_volume[i];
+
     }
 
 namespace detail

From 228c558eafdc87f509e5a9ffc977f04ebdcb2e68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Thu, 2 Mar 2023 18:34:49 -0500
Subject: [PATCH 29/50] fix GlobalN in volume conservation

---
 .../md/VolumeConservationMeshForceCompute.cc  |  44 +++----
 .../VolumeConservationMeshForceComputeGPU.cc  |   7 +-
 .../VolumeConservationMeshForceComputeGPU.cu  | 119 +++++++++---------
 .../VolumeConservationMeshForceComputeGPU.cuh |   5 +-
 4 files changed, 89 insertions(+), 86 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceCompute.cc b/hoomd/md/VolumeConservationMeshForceCompute.cc
index 7dce96a840..30c7e2e843 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.cc
+++ b/hoomd/md/VolumeConservationMeshForceCompute.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2009-2022 The Regents of the University of Michigan.
+// Copyright (c) 2009-2023 The Regents of the University of Michigan.
 // Part of HOOMD-blue, released under the BSD 3-Clause License.
 
 #include "VolumeConservationMeshForceCompute.h"
@@ -177,8 +177,8 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
 
         Scalar VolDiff = m_volume[triangle_type] - m_V0[triangle_type];
 
-        Scalar energy
-            = m_K[triangle_type] * VolDiff * VolDiff / (2 * m_V0[triangle_type] * m_pdata->getN());
+        Scalar energy = m_K[triangle_type] * VolDiff * VolDiff
+                        / (2 * m_V0[triangle_type] * m_pdata->getNGlobal());
 
         VolDiff = -m_K[triangle_type] / m_V0[triangle_type] * VolDiff / 6.0;
 
@@ -316,34 +316,36 @@ void VolumeConservationMeshForceCompute::computeVolume()
 #ifdef ENABLE_MPI
         if (m_pdata->getDomainDecomposition())
             {
- 	    volume_tri /= 3;
-
-	    if(idx_a < m_pdata->getN()) global_volume[triangle_type] += volume_tri;
-	    if(idx_b < m_pdata->getN()) global_volume[triangle_type] += volume_tri;
-	    if(idx_c < m_pdata->getN()) global_volume[triangle_type] += volume_tri;
+            volume_tri /= 3;
+
+            if (idx_a < m_pdata->getN())
+                global_volume[triangle_type] += volume_tri;
+            if (idx_b < m_pdata->getN())
+                global_volume[triangle_type] += volume_tri;
+            if (idx_c < m_pdata->getN())
+                global_volume[triangle_type] += volume_tri;
             }
         else
 #endif
             {
             global_volume[triangle_type] += volume_tri;
-	    }
+            }
         }
 
 #ifdef ENABLE_MPI
-        if (m_pdata->getDomainDecomposition())
-            {
-            MPI_Allreduce(MPI_IN_PLACE,
-                          &global_volume[0],
-                          n_types,
-                          MPI_HOOMD_SCALAR,
-                          MPI_SUM,
-                          m_exec_conf->getMPICommunicator());
-            }
+    if (m_pdata->getDomainDecomposition())
+        {
+        MPI_Allreduce(MPI_IN_PLACE,
+                      &global_volume[0],
+                      n_types,
+                      MPI_HOOMD_SCALAR,
+                      MPI_SUM,
+                      m_exec_conf->getMPICommunicator());
+        }
 #endif
 
-    	for (unsigned int i = 0; i < n_types; i++)
-        	m_volume[i] = global_volume[i];
-
+    for (unsigned int i = 0; i < n_types; i++)
+        m_volume[i] = global_volume[i];
     }
 
 namespace detail
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index faad184969..53b0752a31 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2009-2022 The Regents of the University of Michigan.
+// Copyright (c) 2009-2023 The Regents of the University of Michigan.
 // Part of HOOMD-blue, released under the BSD 3-Clause License.
 
 #include "VolumeConservationMeshForceComputeGPU.h"
@@ -115,6 +115,7 @@ void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
                                                 d_virial.data,
                                                 m_virial.getPitch(),
                                                 m_pdata->getN(),
+                                                m_pdata->getNGlobal(),
                                                 d_pos.data,
                                                 d_image.data,
                                                 box,
@@ -181,7 +182,7 @@ void VolumeConservationMeshForceComputeGPU::computeVolume()
                                          access_mode::overwrite);
     ArrayHandle<Scalar> d_sumVol(m_sum, access_location::device, access_mode::overwrite);
 
-    unsigned int NTypes =  m_mesh_data->getMeshTriangleData()->getNTypes();
+    unsigned int NTypes = m_mesh_data->getMeshTriangleData()->getNTypes();
 
     kernel::gpu_compute_volume_constraint_volume(d_sumVol.data,
                                                  d_partial_sumVol.data,
@@ -216,7 +217,7 @@ void VolumeConservationMeshForceComputeGPU::computeVolume()
         }
 #endif
     for (unsigned int i = 0; i < NTypes; i++)
-    	h_volume.data[i] = h_sumVol.data[i];
+        h_volume.data[i] = h_sumVol.data[i];
     }
 
 namespace detail
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index 3fda35f999..ce804e1acc 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -1,4 +1,4 @@
-// Copyright (c) 2009-2022 The Regents of the University of Michigan.
+// Copyright (c) 2009-2023 The Regents of the University of Michigan.
 // Part of HOOMD-blue, released under the BSD 3-Clause License.
 
 #include "hip/hip_runtime.h"
@@ -51,10 +51,10 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
 
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
 
-    Scalar *volume_transfer = (Scalar*)malloc(tN * sizeof *volume_transfer);
+    Scalar* volume_transfer = (Scalar*)malloc(tN * sizeof *volume_transfer);
 
-    for( int i_types = 0; i_types < tN; i_types++)
-	    volume_transfer[i_types]=0;
+    for (int i_types = 0; i_types < tN; i_types++)
+        volume_transfer[i_types] = 0;
 
     if (idx < N)
         {
@@ -63,7 +63,6 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
         Scalar3 pos_a = make_scalar3(postype.x, postype.y, postype.z);
         int3 image_a = d_image[idx];
         pos_a = box.shift(pos_a, image_a);
-    
 
         for (int triangle_idx = 0; triangle_idx < n_triangles; triangle_idx++)
             {
@@ -105,31 +104,30 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
             }
         }
 
+    for (int i_types = 0; i_types < tN; i_types++)
+        {
+        volume_sdata[threadIdx.x] = volume_transfer[i_types];
+
+        __syncthreads();
 
-    for( int i_types = 0; i_types < tN; i_types++)
-       {
-       volume_sdata[threadIdx.x]  = volume_transfer[i_types];
-
-       __syncthreads();
-
-       // reduce the sum in parallel
-       int offs = blockDim.x >> 1;
-       while (offs > 0)
-           {
-           if (threadIdx.x < offs)
-               {
-               volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
-               }
-           offs >>= 1;
-           __syncthreads();
-           }
-
-       // write out our partial sum
-       if (threadIdx.x == 0)
-           {
-           d_partial_sum_volume[blockIdx.x * tN + i_types] = volume_sdata[0];
-           }
-       }
+        // reduce the sum in parallel
+        int offs = blockDim.x >> 1;
+        while (offs > 0)
+            {
+            if (threadIdx.x < offs)
+                {
+                volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
+                }
+            offs >>= 1;
+            __syncthreads();
+            }
+
+        // write out our partial sum
+        if (threadIdx.x == 0)
+            {
+            d_partial_sum_volume[blockIdx.x * tN + i_types] = volume_sdata[0];
+            }
+        }
     }
 
 //! Kernel function for reducing a partial sum to a full sum (one value)
@@ -145,36 +143,35 @@ __global__ void gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum,
     HIP_DYNAMIC_SHARED(char, s_data)
     Scalar* volume_sdata = (Scalar*)&s_data[0];
 
+    for (int i_types = 0; i_types < tN; i_types++)
+        {
+        // sum up the values in the partial sum via a sliding window
+        Scalar sum = Scalar(0.0);
+        for (int start = 0; start < num_blocks; start += blockDim.x)
+            {
+            __syncthreads();
+            if (start + threadIdx.x < num_blocks)
+                volume_sdata[threadIdx.x] = d_partial_sum[(start + threadIdx.x) * tN + i_types];
+            else
+                volume_sdata[threadIdx.x] = Scalar(0.0);
+            __syncthreads();
+
+            // reduce the sum in parallel
+            int offs = blockDim.x >> 1;
+            while (offs > 0)
+                {
+                if (threadIdx.x < offs)
+                    volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
+                offs >>= 1;
+                }
+
+            // everybody sums up sum2K
+            sum += volume_sdata[0];
+            }
 
-    for( int i_types = 0; i_types < tN; i_types++)
-       {
-       // sum up the values in the partial sum via a sliding window
-       Scalar sum = Scalar(0.0);
-       for (int start = 0; start < num_blocks; start += blockDim.x)
-           {
-           __syncthreads();
-           if (start + threadIdx.x < num_blocks)
-               volume_sdata[threadIdx.x] = d_partial_sum[(start + threadIdx.x) * tN + i_types];
-           else
-               volume_sdata[threadIdx.x] = Scalar(0.0);
-           __syncthreads();
-
-           // reduce the sum in parallel
-           int offs = blockDim.x >> 1;
-           while (offs > 0)
-               {
-               if (threadIdx.x < offs)
-                   volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
-               offs >>= 1;
-               }
-
-           // everybody sums up sum2K
-           sum += volume_sdata[0];
-           }
-
-       if (threadIdx.x == 0)
-           d_sum[i_types] = sum;
-       }
+        if (threadIdx.x == 0)
+            d_sum[i_types] = sum;
+        }
     }
 
 /*! \param d_sigma Device memory to write per paricle sigma
@@ -261,6 +258,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
                                                            Scalar* d_virial,
                                                            const size_t virial_pitch,
                                                            const unsigned int N,
+                                                           const unsigned int gN,
                                                            const Scalar4* d_pos,
                                                            const int3* d_image,
                                                            BoxDim box,
@@ -279,7 +277,6 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
     if (idx >= N)
         return;
 
-
     // load in the length of the list for this thread (MEM TRANSFER: 4 bytes)
     int n_triangles = n_triangles_list[idx];
 
@@ -312,7 +309,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
 
         Scalar VolDiff = volume[cur_triangle_type] - V0;
 
-        Scalar energy = K * VolDiff * VolDiff / (2 * V0 * N);
+        Scalar energy = K * VolDiff * VolDiff / (2 * V0 * gN);
 
         VolDiff = -K / V0 * VolDiff / 6.0;
 
@@ -394,6 +391,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                                                Scalar* d_virial,
                                                const size_t virial_pitch,
                                                const unsigned int N,
+                                               const unsigned int gN,
                                                const Scalar4* d_pos,
                                                const int3* d_image,
                                                const BoxDim& box,
@@ -428,6 +426,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                        d_virial,
                        virial_pitch,
                        N,
+                       gN,
                        d_pos,
                        d_image,
                        box,
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
index 7f39f14b4d..0edcdc6adf 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
@@ -1,9 +1,9 @@
-// Copyright (c) 2009-2022 The Regents of the University of Michigan.
+// Copyright (c) 2009-2023 The Regents of the University of Michigan.
 // Part of HOOMD-blue, released under the BSD 3-Clause License.
 
+#include "hoomd/BondedGroupData.cuh"
 #include "hoomd/HOOMDMath.h"
 #include "hoomd/Index1D.h"
-#include "hoomd/BondedGroupData.cuh"
 #include "hoomd/ParticleData.cuh"
 #include <hip/hip_runtime.h>
 
@@ -41,6 +41,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                                                Scalar* d_virial,
                                                const size_t virial_pitch,
                                                const unsigned int N,
+                                               const unsigned int gN,
                                                const Scalar4* d_pos,
                                                const int3* d_image,
                                                const BoxDim& box,

From 12a8621fb0290360a53d1ec098e394b60aa6697f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Sch=C3=B6nh=C3=B6fer?=
 <philipp@Philipps-MacBook-Pro.local>
Date: Fri, 17 Mar 2023 18:20:30 -0400
Subject: [PATCH 30/50] free memory in Volume calculation

---
 hoomd/md/VolumeConservationMeshForceComputeGPU.cu | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index ce804e1acc..af2bf5501a 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -128,6 +128,7 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
             d_partial_sum_volume[blockIdx.x * tN + i_types] = volume_sdata[0];
             }
         }
+    free(volume_transfer);
     }
 
 //! Kernel function for reducing a partial sum to a full sum (one value)

From 7212e41aacd2887f980cb66803b5bedde2969515 Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Tue, 21 Mar 2023 15:56:47 -0400
Subject: [PATCH 31/50] fix Cuda issue  for large systems

---
 .../VolumeConservationMeshForceComputeGPU.cu  | 62 +++++++++++--------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index af2bf5501a..9b948a7c05 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -38,6 +38,7 @@ namespace kernel
 __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_sum_volume,
                                                             const unsigned int N,
                                                             const unsigned int tN,
+                                                            const unsigned int cN,
                                                             const Scalar4* d_pos,
                                                             const int3* d_image,
                                                             BoxDim box,
@@ -51,10 +52,11 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
 
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
 
-    Scalar* volume_transfer = (Scalar*)malloc(tN * sizeof *volume_transfer);
+    //Scalar* volume_transfer = (Scalar*)malloc(tN * sizeof *volume_transfer);
 
-    for (int i_types = 0; i_types < tN; i_types++)
-        volume_transfer[i_types] = 0;
+    //for (unsigned int i_types = 0; i_types < tN; i_types++)
+    //    volume_transfer[i_types] = 0;
+    Scalar volume_transfer = 0;
 
     if (idx < N)
         {
@@ -67,10 +69,12 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
         for (int triangle_idx = 0; triangle_idx < n_triangles; triangle_idx++)
             {
             group_storage<3> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
+            int cur_triangle_type = cur_triangle.idx[2];
+
+	    if(cur_triangle_type != cN) continue;
 
             int cur_triangle_b = cur_triangle.idx[0];
             int cur_triangle_c = cur_triangle.idx[1];
-            int cur_triangle_type = cur_triangle.idx[2];
 
             int cur_triangle_abc = tpos_list[tlist_idx(idx, triangle_idx)];
 
@@ -100,13 +104,14 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
                 dVol.z = pos_c.x * pos_b.y - pos_c.y * pos_b.x;
                 }
             Scalar Vol = dVol.x * pos_a.x + dVol.y * pos_a.y + dVol.z * pos_a.z;
-            volume_transfer[cur_triangle_type] += Vol / 18.0;
+            //volume_transfer[cur_triangle_type] += Vol / 18.0;
+            volume_transfer += Vol / 18.0;
             }
         }
 
-    for (int i_types = 0; i_types < tN; i_types++)
+    //for (unsigned int i_types = 0; i_types < tN; i_types++)
         {
-        volume_sdata[threadIdx.x] = volume_transfer[i_types];
+        volume_sdata[threadIdx.x] = volume_transfer;//[i_types];
 
         __syncthreads();
 
@@ -125,10 +130,11 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
         // write out our partial sum
         if (threadIdx.x == 0)
             {
-            d_partial_sum_volume[blockIdx.x * tN + i_types] = volume_sdata[0];
+            //d_partial_sum_volume[blockIdx.x * tN + i_types] = volume_sdata[0];
+            d_partial_sum_volume[blockIdx.x * tN + cN] = volume_sdata[0];
             }
         }
-    free(volume_transfer);
+    //free(volume_transfer);
     }
 
 //! Kernel function for reducing a partial sum to a full sum (one value)
@@ -144,7 +150,7 @@ __global__ void gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum,
     HIP_DYNAMIC_SHARED(char, s_data)
     Scalar* volume_sdata = (Scalar*)&s_data[0];
 
-    for (int i_types = 0; i_types < tN; i_types++)
+    for (unsigned int i_types = 0; i_types < tN; i_types++)
         {
         // sum up the values in the partial sum via a sliding window
         Scalar sum = Scalar(0.0);
@@ -208,22 +214,26 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
     dim3 grid1(1, 1, 1);
     dim3 threads(block_size, 1, 1);
 
-    // run the kernel
-    hipLaunchKernelGGL((gpu_compute_volume_constraint_volume_kernel),
-                       dim3(grid),
-                       dim3(threads),
-                       block_size * sizeof(Scalar),
-                       0,
-                       d_sum_partial_volume,
-                       N,
-                       tN,
-                       d_pos,
-                       d_image,
-                       box,
-                       tlist,
-                       tpos_list,
-                       tlist_idx,
-                       n_triangles_list);
+    for (unsigned int i_types = 0; i_types < tN; i_types++)
+        {
+        // run the kernel
+        hipLaunchKernelGGL((gpu_compute_volume_constraint_volume_kernel),
+                           dim3(grid),
+                           dim3(threads),
+                           block_size * sizeof(Scalar),
+                           0,
+                           d_sum_partial_volume,
+                           N,
+                           tN,
+			   i_types,
+                           d_pos,
+                           d_image,
+                           box,
+                           tlist,
+                           tpos_list,
+                           tlist_idx,
+                           n_triangles_list);
+        }
 
     hipLaunchKernelGGL((gpu_volume_reduce_partial_sum_kernel),
                        dim3(grid1),

From d28d7932bbebf66fefd7c10c7a815ec1579ff5cd Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Wed, 19 Jul 2023 11:25:20 -0400
Subject: [PATCH 32/50] change "alpha"to "default_gamma" in pytests

---
 hoomd/md/pytest/test_meshpotential.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hoomd/md/pytest/test_meshpotential.py b/hoomd/md/pytest/test_meshpotential.py
index d2c2ea35e5..64f61a0545 100644
--- a/hoomd/md/pytest/test_meshpotential.py
+++ b/hoomd/md/pytest/test_meshpotential.py
@@ -272,7 +272,7 @@ def test_volume(simulation_factory, tetrahedron_snapshot_factory):
 
     langevin = hoomd.md.methods.Langevin(kT=1,
                                          filter=hoomd.filter.All(),
-                                         alpha=0.1)
+                                         default_gamma=0.1)
     integrator.methods.append(langevin)
     sim.operations.integrator = integrator
 

From 9b8c1d1122c7d4016e292f9f4515f5d22ac57367 Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Thu, 28 Sep 2023 15:46:20 -0400
Subject: [PATCH 33/50] fix energy calculation and add ignore_type feature

---
 hoomd/MeshDefinition.cc                       | 13 +++++
 hoomd/MeshDefinition.h                        |  8 +++
 .../md/VolumeConservationMeshForceCompute.cc  | 51 +++++++++++++------
 hoomd/md/VolumeConservationMeshForceCompute.h |  5 +-
 .../VolumeConservationMeshForceComputeGPU.cc  | 44 +++++++++++-----
 .../VolumeConservationMeshForceComputeGPU.cu  | 21 +++++---
 .../VolumeConservationMeshForceComputeGPU.cuh |  5 +-
 .../VolumeConservationMeshForceComputeGPU.h   |  3 +-
 hoomd/md/mesh/conservation.py                 |  8 +--
 hoomd/md/mesh/potential.py                    | 37 +++++++++++++-
 hoomd/md/pytest/test_meshpotential.py         | 43 +++++++++++++---
 11 files changed, 185 insertions(+), 53 deletions(-)

diff --git a/hoomd/MeshDefinition.cc b/hoomd/MeshDefinition.cc
index 827a3e1b16..c54864eb80 100644
--- a/hoomd/MeshDefinition.cc
+++ b/hoomd/MeshDefinition.cc
@@ -28,10 +28,16 @@ MeshDefinition::MeshDefinition(std::shared_ptr<SystemDefinition> sysdef, unsigne
           std::shared_ptr<TriangleData>(new TriangleData(m_sysdef->getParticleData(), n_types)))
 
     {
+    // allocate the max number of neighbors per type allowed
+    GlobalArray<unsigned int> globalN(n_types, m_sysdef->getParticleData()->getExecConf());
+    m_globalN.swap(globalN);
+    TAG_ALLOCATION(m_globalN);
+
     }
 
 void MeshDefinition::setTypes(pybind11::list types)
     {
+    m_globalN.resize(len(types));
     for (unsigned int i = 0; i < len(types); i++)
         {
         m_meshbond_data->setTypeName(i, types[i].cast<string>());
@@ -106,6 +112,11 @@ void MeshDefinition::setTriangulationData(pybind11::dict triangulation)
     triangle_data.resize(static_cast<unsigned int>(len_triang));
     TriangleData::members_t triangle_new;
 
+    ArrayHandle<unsigned int> h_globalN(m_globalN, access_location::host, access_mode::overwrite);
+
+    for (unsigned int i = 0; i < m_meshtriangle_data->getNTypes(); i++)
+        h_globalN.data[i] = 0;
+
     for (size_t i = 0; i < len_triang; i++)
         {
         triangle_new.tag[0] = ptr1[i * 3];
@@ -113,6 +124,8 @@ void MeshDefinition::setTriangulationData(pybind11::dict triangulation)
         triangle_new.tag[2] = ptr1[i * 3 + 2];
         triangle_data.groups[i] = triangle_new;
         triangle_data.type_id[i] = ptr2[i];
+
+        h_globalN.data[triangle_data.type_id[i]] += 1;
         }
 
     m_meshtriangle_data = std::shared_ptr<TriangleData>(
diff --git a/hoomd/MeshDefinition.h b/hoomd/MeshDefinition.h
index 5cefccd2f7..fe71822df2 100644
--- a/hoomd/MeshDefinition.h
+++ b/hoomd/MeshDefinition.h
@@ -9,6 +9,8 @@
 #error This header cannot be compiled by nvcc
 #endif
 
+#include "hoomd/GlobalArray.h"
+
 #include "BondedGroupData.h"
 #include "MeshGroupData.h"
 #include "SystemDefinition.h"
@@ -85,6 +87,11 @@ class PYBIND11_EXPORT MeshDefinition
         return triangles.getSize();
         }
 
+    const GlobalArray<unsigned int>& getPerTypeSize() const
+        {
+        return m_globalN;
+        }
+
     void setTypes(pybind11::list types);
 
     BondData::Snapshot getBondData();
@@ -96,6 +103,7 @@ class PYBIND11_EXPORT MeshDefinition
     void setTriangulationData(pybind11::dict triangulation);
 
     private:
+    GlobalArray<unsigned int> m_globalN;
     std::shared_ptr<SystemDefinition>
         m_sysdef; //!< System definition later needed for dynamic bonding
     std::shared_ptr<MeshBondData> m_meshbond_data;     //!< Bond data for the mesh
diff --git a/hoomd/md/VolumeConservationMeshForceCompute.cc b/hoomd/md/VolumeConservationMeshForceCompute.cc
index 30c7e2e843..790b8b1110 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.cc
+++ b/hoomd/md/VolumeConservationMeshForceCompute.cc
@@ -23,13 +23,17 @@ namespace md
 */
 VolumeConservationMeshForceCompute::VolumeConservationMeshForceCompute(
     std::shared_ptr<SystemDefinition> sysdef,
-    std::shared_ptr<MeshDefinition> meshdef)
-    : ForceCompute(sysdef), m_K(NULL), m_V0(NULL), m_mesh_data(meshdef), m_volume(0)
+    std::shared_ptr<MeshDefinition> meshdef,
+    bool ignore_type)
+    : ForceCompute(sysdef), m_K(NULL), m_V0(NULL), m_mesh_data(meshdef), m_volume(0), 
+	m_ignore_type(ignore_type)
     {
     m_exec_conf->msg->notice(5) << "Constructing VolumeConservationMeshForceCompute" << endl;
 
     unsigned int n_types = m_mesh_data->getMeshTriangleData()->getNTypes();
 
+    if(m_ignore_type) n_types = 1;
+
     // allocate the parameters
     m_K = new Scalar[n_types];
 
@@ -58,14 +62,17 @@ VolumeConservationMeshForceCompute::~VolumeConservationMeshForceCompute()
 */
 void VolumeConservationMeshForceCompute::setParams(unsigned int type, Scalar K, Scalar V0)
     {
-    m_K[type] = K;
-    m_V0[type] = V0;
-
-    // check for some silly errors a user could make
-    if (K <= 0)
-        m_exec_conf->msg->warning() << "volume: specified K <= 0" << endl;
-    if (V0 <= 0)
-        m_exec_conf->msg->warning() << "volume: specified V0 <= 0" << endl;
+    if(!m_ignore_type || type == 0 ) 
+    	{
+        m_K[type] = K;
+        m_V0[type] = V0;
+
+        // check for some silly errors a user could make
+        if (K <= 0)
+            m_exec_conf->msg->warning() << "volume: specified K <= 0" << endl;
+        if (V0 <= 0)
+            m_exec_conf->msg->warning() << "volume: specified V0 <= 0" << endl;
+	}
     }
 
 void VolumeConservationMeshForceCompute::setParamsPython(std::string type, pybind11::dict params)
@@ -83,6 +90,7 @@ pybind11::dict VolumeConservationMeshForceCompute::getParams(std::string type)
         m_exec_conf->msg->error() << "mesh.helfrich: Invalid mesh type specified" << endl;
         throw runtime_error("Error setting parameters in VolumeConservationMeshForceCompute");
         }
+    if(m_ignore_type) typ = 0;
     pybind11::dict params;
     params["k"] = m_K[typ];
     params["V0"] = m_V0[typ];
@@ -129,6 +137,8 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
     PDataFlags flags = m_pdata->getFlags();
     bool compute_virial = flags[pdata_flag::pressure_tensor];
 
+    ArrayHandle<unsigned int> h_pts(m_mesh_data->getPerTypeSize(), access_location::host, access_mode::read);
+
     Scalar helfrich_virial[6];
     for (unsigned int i = 0; i < 6; i++)
         helfrich_virial[i] = Scalar(0.0);
@@ -174,11 +184,15 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
         Scalar3 Fa, Fb, Fc;
 
         unsigned int triangle_type = m_mesh_data->getMeshTriangleData()->getTypeByIndex(i);
+  
+	if(m_ignore_type) triangle_type = 0;
 
+	unsigned int trin3 = h_pts.data[triangle_type]*3;
+  
         Scalar VolDiff = m_volume[triangle_type] - m_V0[triangle_type];
 
         Scalar energy = m_K[triangle_type] * VolDiff * VolDiff
-                        / (2 * m_V0[triangle_type] * m_pdata->getNGlobal());
+                        / (2 * m_V0[triangle_type] * trin3);
 
         VolDiff = -m_K[triangle_type] / m_V0[triangle_type] * VolDiff / 6.0;
 
@@ -203,7 +217,7 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
             h_force.data[idx_a].x += Fa.x;
             h_force.data[idx_a].y += Fa.y;
             h_force.data[idx_a].z += Fa.z;
-            h_force.data[idx_a].w = energy;
+            h_force.data[idx_a].w += energy;
             for (int j = 0; j < 6; j++)
                 h_virial.data[j * virial_pitch + idx_a] += helfrich_virial[j];
             }
@@ -227,7 +241,7 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
             h_force.data[idx_b].x += Fb.x;
             h_force.data[idx_b].y += Fb.y;
             h_force.data[idx_b].z += Fb.z;
-            h_force.data[idx_b].w = energy;
+            h_force.data[idx_b].w += energy;
             for (int j = 0; j < 6; j++)
                 h_virial.data[j * virial_pitch + idx_b] += helfrich_virial[j];
             }
@@ -251,7 +265,7 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
             h_force.data[idx_c].x += Fc.x;
             h_force.data[idx_c].y += Fc.y;
             h_force.data[idx_c].z += Fc.z;
-            h_force.data[idx_c].w = energy;
+            h_force.data[idx_c].w += energy;
             for (int j = 0; j < 6; j++)
                 h_virial.data[j * virial_pitch + idx_c] += helfrich_virial[j];
             }
@@ -272,7 +286,10 @@ void VolumeConservationMeshForceCompute::computeVolume()
     // get a local copy of the simulation box too
     const BoxDim& box = m_pdata->getGlobalBox();
 
-    const unsigned int n_types = m_mesh_data->getMeshTriangleData()->getNTypes();
+    unsigned int n_types = m_mesh_data->getMeshTriangleData()->getNTypes();
+
+    if(m_ignore_type) n_types = 1;
+
     std::vector<Scalar> global_volume(n_types);
     for (unsigned int i = 0; i < n_types; i++)
         global_volume[i] = 0;
@@ -313,6 +330,8 @@ void VolumeConservationMeshForceCompute::computeVolume()
 
         unsigned int triangle_type = m_mesh_data->getMeshTriangleData()->getTypeByIndex(i);
 
+	if(m_ignore_type) triangle_type = 0;
+
 #ifdef ENABLE_MPI
         if (m_pdata->getDomainDecomposition())
             {
@@ -357,7 +376,7 @@ void export_VolumeConservationMeshForceCompute(pybind11::module& m)
                      std::shared_ptr<VolumeConservationMeshForceCompute>>(
         m,
         "VolumeConservationMeshForceCompute")
-        .def(pybind11::init<std::shared_ptr<SystemDefinition>, std::shared_ptr<MeshDefinition>>())
+        .def(pybind11::init<std::shared_ptr<SystemDefinition>, std::shared_ptr<MeshDefinition>, bool>())
         .def("setParams", &VolumeConservationMeshForceCompute::setParamsPython)
         .def("getParams", &VolumeConservationMeshForceCompute::getParams)
         .def("getVolume", &VolumeConservationMeshForceCompute::getVolume);
diff --git a/hoomd/md/VolumeConservationMeshForceCompute.h b/hoomd/md/VolumeConservationMeshForceCompute.h
index 320070c788..2ac0b17739 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.h
+++ b/hoomd/md/VolumeConservationMeshForceCompute.h
@@ -63,7 +63,8 @@ class PYBIND11_EXPORT VolumeConservationMeshForceCompute : public ForceCompute
     public:
     //! Constructs the compute
     VolumeConservationMeshForceCompute(std::shared_ptr<SystemDefinition> sysdef,
-                                       std::shared_ptr<MeshDefinition> meshdef);
+                                       std::shared_ptr<MeshDefinition> meshdef,
+				       bool ignore_type);
 
     //! Destructor
     virtual ~VolumeConservationMeshForceCompute();
@@ -102,6 +103,8 @@ class PYBIND11_EXPORT VolumeConservationMeshForceCompute : public ForceCompute
     std::shared_ptr<MeshDefinition> m_mesh_data; //!< Mesh data to use in computing helfich energy
 
     Scalar* m_volume; //! sum of the triangle areas within the mesh
+		      
+    bool m_ignore_type; //! do we ignore type to calculate global area
 
     //! Actually compute the forces
     virtual void computeForces(uint64_t timestep);
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index 53b0752a31..2a63b118ba 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -18,8 +18,9 @@ namespace md
 */
 VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
     std::shared_ptr<SystemDefinition> sysdef,
-    std::shared_ptr<MeshDefinition> meshdef)
-    : VolumeConservationMeshForceCompute(sysdef, meshdef)
+    std::shared_ptr<MeshDefinition> meshdef,
+    bool ignore_type)
+    : VolumeConservationMeshForceCompute(sysdef, meshdef, ignore_type)
     {
     if (!m_exec_conf->isCUDAEnabled())
         {
@@ -29,12 +30,16 @@ VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
         throw std::runtime_error("Error initializing VolumeConservationMeshForceComputeGPU");
         }
 
+    unsigned int NTypes = this->m_mesh_data->getMeshTriangleData()->getNTypes();
+
+    if(this->m_ignore_type) NTypes=1;
+
     // allocate and zero device memory
-    GPUArray<Scalar2> params(this->m_mesh_data->getMeshTriangleData()->getNTypes(), m_exec_conf);
+    GPUArray<Scalar2> params(NTypes, m_exec_conf);
     m_params.swap(params);
 
     // allocate and zero device memory
-    GPUArray<Scalar> volume_GPU(this->m_mesh_data->getMeshTriangleData()->getNTypes(), m_exec_conf);
+    GPUArray<Scalar> volume_GPU(NTypes, m_exec_conf);
     m_volume_GPU.swap(volume_GPU);
 
     // allocate flags storage on the GPU
@@ -45,14 +50,14 @@ VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
     ArrayHandle<unsigned int> h_flags(m_flags, access_location::host, access_mode::overwrite);
     h_flags.data[0] = 0;
 
-    GPUArray<Scalar> sum(this->m_mesh_data->getMeshTriangleData()->getNTypes(), m_exec_conf);
+    GPUArray<Scalar> sum(NTypes, m_exec_conf);
     m_sum.swap(sum);
 
     m_block_size = 256;
     unsigned int group_size = m_pdata->getN();
     m_num_blocks = group_size / m_block_size;
     m_num_blocks += 1;
-    m_num_blocks *= this->m_mesh_data->getMeshTriangleData()->getNTypes();
+    m_num_blocks *= NTypes;
     GPUArray<Scalar> partial_sum(m_num_blocks, m_exec_conf);
     m_partial_sum.swap(partial_sum);
 
@@ -64,11 +69,14 @@ VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
 
 void VolumeConservationMeshForceComputeGPU::setParams(unsigned int type, Scalar K, Scalar V0)
     {
-    VolumeConservationMeshForceCompute::setParams(type, K, V0);
-
-    ArrayHandle<Scalar2> h_params(m_params, access_location::host, access_mode::readwrite);
-    // update the local copy of the memory
-    h_params.data[type] = make_scalar2(K, V0);
+    if(!this->m_ignore_type || type == 0 ) 
+    	{
+        VolumeConservationMeshForceCompute::setParams(type, K, V0);
+
+        ArrayHandle<Scalar2> h_params(m_params, access_location::host, access_mode::readwrite);
+        // update the local copy of the memory
+        h_params.data[type] = make_scalar2(K, V0);
+	}
     }
 
 /*! Actually perform the force computation
@@ -101,6 +109,11 @@ void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
         access_location::device,
         access_mode::read);
 
+    ArrayHandle<unsigned int> d_pts(
+        this->m_mesh_data->getPerTypeSize(),
+        access_location::device,
+        access_mode::read);
+
     ArrayHandle<Scalar4> d_force(m_force, access_location::device, access_mode::overwrite);
     ArrayHandle<Scalar> d_virial(m_virial, access_location::device, access_mode::overwrite);
     ArrayHandle<Scalar2> d_params(m_params, access_location::device, access_mode::read);
@@ -115,7 +128,7 @@ void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
                                                 d_virial.data,
                                                 m_virial.getPitch(),
                                                 m_pdata->getN(),
-                                                m_pdata->getNGlobal(),
+                                                d_pts.data,
                                                 d_pos.data,
                                                 d_image.data,
                                                 box,
@@ -125,7 +138,7 @@ void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
                                                 gpu_table_indexer,
                                                 d_gpu_n_meshtriangle.data,
                                                 d_params.data,
-                                                m_mesh_data->getMeshTriangleData()->getNTypes(),
+                                                this->m_ignore_type,
                                                 m_tuner->getParam()[0],
                                                 d_flags.data);
 
@@ -184,6 +197,8 @@ void VolumeConservationMeshForceComputeGPU::computeVolume()
 
     unsigned int NTypes = m_mesh_data->getMeshTriangleData()->getNTypes();
 
+    if(this->m_ignore_type) NTypes = 1;
+
     kernel::gpu_compute_volume_constraint_volume(d_sumVol.data,
                                                  d_partial_sumVol.data,
                                                  m_pdata->getN(),
@@ -194,6 +209,7 @@ void VolumeConservationMeshForceComputeGPU::computeVolume()
                                                  d_gpu_meshtrianglelist.data,
                                                  d_gpu_meshtriangle_pos_list.data,
                                                  gpu_table_indexer,
+						 this->m_ignore_type,
                                                  d_gpu_n_meshtriangle.data,
                                                  m_block_size,
                                                  m_num_blocks);
@@ -229,7 +245,7 @@ void export_VolumeConservationMeshForceComputeGPU(pybind11::module& m)
                      std::shared_ptr<VolumeConservationMeshForceComputeGPU>>(
         m,
         "VolumeConservationMeshForceComputeGPU")
-        .def(pybind11::init<std::shared_ptr<SystemDefinition>, std::shared_ptr<MeshDefinition>>());
+        .def(pybind11::init<std::shared_ptr<SystemDefinition>, std::shared_ptr<MeshDefinition>, bool>());
     }
 
     } // end namespace detail
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index 9b948a7c05..e1fd1f57c7 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -45,6 +45,7 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
                                                             const group_storage<3>* tlist,
                                                             const unsigned int* tpos_list,
                                                             const Index2D tlist_idx,
+                                             		    const bool ignore_type,
                                                             const unsigned int* n_triangles_list)
     {
     HIP_DYNAMIC_SHARED(char, s_data)
@@ -71,6 +72,8 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
             group_storage<3> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
             int cur_triangle_type = cur_triangle.idx[2];
 
+            if(ignore_type) cur_triangle_type = 0;
+
 	    if(cur_triangle_type != cN) continue;
 
             int cur_triangle_b = cur_triangle.idx[0];
@@ -206,6 +209,7 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                                                 const group_storage<3>* tlist,
                                                 const unsigned int* tpos_list,
                                                 const Index2D tlist_idx,
+                                                const bool ignore_type,
                                                 const unsigned int* n_triangles_list,
                                                 unsigned int block_size,
                                                 unsigned int num_blocks)
@@ -232,6 +236,7 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                            tlist,
                            tpos_list,
                            tlist_idx,
+                           ignore_type,
                            n_triangles_list);
         }
 
@@ -269,7 +274,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
                                                            Scalar* d_virial,
                                                            const size_t virial_pitch,
                                                            const unsigned int N,
-                                                           const unsigned int gN,
+                                                           const unsigned int* gN,
                                                            const Scalar4* d_pos,
                                                            const int3* d_image,
                                                            BoxDim box,
@@ -279,7 +284,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
                                                            const Index2D tlist_idx,
                                                            const unsigned int* n_triangles_list,
                                                            Scalar2* d_params,
-                                                           const unsigned int n_triangle_type,
+                                                           const bool ignore_type,
                                                            unsigned int* d_flags)
     {
     // start by identifying which particle we are to handle
@@ -313,6 +318,8 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
         int cur_triangle_c = cur_triangle.idx[1];
         int cur_triangle_type = cur_triangle.idx[2];
 
+	if(ignore_type) cur_triangle_type = 0;
+
         // get the angle parameters (MEM TRANSFER: 8 bytes)
         Scalar2 params = __ldg(d_params + cur_triangle_type);
         Scalar K = params.x;
@@ -320,7 +327,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
 
         Scalar VolDiff = volume[cur_triangle_type] - V0;
 
-        Scalar energy = K * VolDiff * VolDiff / (2 * V0 * gN);
+        Scalar energy = K * VolDiff * VolDiff / (2 * V0 * 3 * gN[cur_triangle_type]);
 
         VolDiff = -K / V0 * VolDiff / 6.0;
 
@@ -361,7 +368,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
         force.x += Fa.x;
         force.y += Fa.y;
         force.z += Fa.z;
-        force.w = energy;
+        force.w += energy;
 
         virial[0] += Scalar(1. / 2.) * pos_a.x * Fa.x; // xx
         virial[1] += Scalar(1. / 2.) * pos_a.y * Fa.x; // xy
@@ -402,7 +409,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                                                Scalar* d_virial,
                                                const size_t virial_pitch,
                                                const unsigned int N,
-                                               const unsigned int gN,
+                                               const unsigned int* gN,
                                                const Scalar4* d_pos,
                                                const int3* d_image,
                                                const BoxDim& box,
@@ -412,7 +419,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                                                const Index2D tlist_idx,
                                                const unsigned int* n_triangles_list,
                                                Scalar2* d_params,
-                                               const unsigned int n_triangle_type,
+                                               const bool ignore_type,
                                                int block_size,
                                                unsigned int* d_flags)
     {
@@ -447,7 +454,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                        tlist_idx,
                        n_triangles_list,
                        d_params,
-                       n_triangle_type,
+                       ignore_type,
                        d_flags);
 
     return hipSuccess;
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
index 0edcdc6adf..df9999364c 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
@@ -32,6 +32,7 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                                                 const group_storage<3>* tlist,
                                                 const unsigned int* tpos_list,
                                                 const Index2D tlist_idx,
+						const bool ignore_type,
                                                 const unsigned int* n_triangles_list,
                                                 unsigned int block_size,
                                                 unsigned int num_blocks);
@@ -41,7 +42,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                                                Scalar* d_virial,
                                                const size_t virial_pitch,
                                                const unsigned int N,
-                                               const unsigned int gN,
+                                               const unsigned int* gN,
                                                const Scalar4* d_pos,
                                                const int3* d_image,
                                                const BoxDim& box,
@@ -51,7 +52,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                                                const Index2D tlist_idx,
                                                const unsigned int* n_triangles_list,
                                                Scalar2* d_params,
-                                               const unsigned int n_triangle_type,
+                                               const bool ignore_type,
                                                int block_size,
                                                unsigned int* d_flags);
     } // end namespace kernel
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.h b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
index fd1b2a83aa..e78642fad9 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.h
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
@@ -34,7 +34,8 @@ class PYBIND11_EXPORT VolumeConservationMeshForceComputeGPU
     public:
     //! Constructs the compute
     VolumeConservationMeshForceComputeGPU(std::shared_ptr<SystemDefinition> sysdef,
-                                          std::shared_ptr<MeshDefinition> meshdef);
+                                          std::shared_ptr<MeshDefinition> meshdef,
+					  bool ignore_type);
 
     //! Set the parameters
     virtual void setParams(unsigned int type, Scalar K, Scalar V0);
diff --git a/hoomd/md/mesh/conservation.py b/hoomd/md/mesh/conservation.py
index b659f3f017..0f8cd1ecc1 100644
--- a/hoomd/md/mesh/conservation.py
+++ b/hoomd/md/mesh/conservation.py
@@ -3,13 +3,13 @@
 
 """Mesh Bond potentials."""
 
-from hoomd.md.mesh.potential import MeshPotential
+from hoomd.md.mesh.potential import MeshConvervationPotential
 from hoomd.data.typeparam import TypeParameter
 from hoomd.data.parameterdicts import TypeParameterDict
 from hoomd.logging import log
 
 
-class Volume(MeshPotential):
+class Volume(MeshConvervationPotential):
     r"""Volume conservation potential.
 
     :py:class:`Volume` specifies a volume constraint on the whole mesh
@@ -41,12 +41,12 @@ class Volume(MeshPotential):
     """
     _cpp_class_name = "VolumeConservationMeshForceCompute"
 
-    def __init__(self, mesh):
+    def __init__(self, mesh, ignore_type=False):
         params = TypeParameter("params", "types",
                                TypeParameterDict(k=float, V0=float, len_keys=1))
         self._add_typeparam(params)
 
-        super().__init__(mesh)
+        super().__init__(mesh,ignore_type)
 
     @log(requires_run=True)
     def volume(self):
diff --git a/hoomd/md/mesh/potential.py b/hoomd/md/mesh/potential.py
index 36efda1293..808496407c 100644
--- a/hoomd/md/mesh/potential.py
+++ b/hoomd/md/mesh/potential.py
@@ -15,9 +15,9 @@
 
 
 class MeshPotential(Force):
-    """Constructs the bond potential applied to a mesh.
+    """Constructs the potential applied to a mesh.
 
-    `MeshPotential` is the base class for all bond potentials applied to meshes.
+    `MeshPotential` is the base class for all potentials applied to meshes.
 
     Warning:
         This class should not be instantiated by users. The class can be used
@@ -70,3 +70,36 @@ def mesh(self, value):
                 "mesh cannot be set after calling Simulation.run().")
         mesh = validate_mesh(value)
         self._mesh = mesh
+
+class MeshConvervationPotential(MeshPotential):
+    """Constructs the bond potential applied to a mesh.
+
+    `MeshPotential` is the base class for global conservation potentials applied to meshes.
+
+    Warning:
+        This class should not be instantiated by users. The class can be used
+        for `isinstance` or `issubclass` checks.
+    """
+
+    def __init__(self, mesh,ignore_type):
+        super().__init__(mesh)
+        self._ignore_type = ignore_type
+
+    def _attach_hook(self):
+        """Create the c++ mirror class."""
+        if self._mesh._attached and self._simulation != self._mesh._simulation:
+            warnings.warn(
+                f"{self} object is creating a new equivalent mesh structure."
+                f" This is happending since the force is moving to a new "
+                f"simulation. To supress the warning explicitly set new mesh.",
+                RuntimeWarning)
+            self._mesh = copy.deepcopy(self._mesh)
+        self.mesh._attach(self._simulation)
+
+        if isinstance(self._simulation.device, hoomd.device.CPU):
+            cpp_cls = getattr(_md, self._cpp_class_name)
+        else:
+            cpp_cls = getattr(_md, self._cpp_class_name + "GPU")
+
+        self._cpp_obj = cpp_cls(self._simulation.state._cpp_sys_def,
+                                self._mesh._cpp_obj, self._ignore_type)
diff --git a/hoomd/md/pytest/test_meshpotential.py b/hoomd/md/pytest/test_meshpotential.py
index 64f61a0545..63ad4ff39a 100644
--- a/hoomd/md/pytest/test_meshpotential.py
+++ b/hoomd/md/pytest/test_meshpotential.py
@@ -259,12 +259,43 @@ def test_volume(simulation_factory, tetrahedron_snapshot_factory):
     sim = simulation_factory(snap)
 
     mesh = hoomd.mesh.Mesh()
-    type_ids = [0, 0, 0, 0]
+    mesh.types = ["mesh", "patch"]
+    type_ids = [0, 0, 1, 0]
     triangles = [[2, 1, 0], [0, 1, 3], [2, 0, 3], [1, 2, 3]]
     mesh.triangulation = dict(type_ids=type_ids, triangles=triangles)
 
     mesh_potential = hoomd.md.mesh.conservation.Volume(mesh)
-    mesh_potential.params["mesh"] = dict(k=1, V0=1)
+    mesh_potential.params.default = dict(k=1, V0=1)
+
+    integrator = hoomd.md.Integrator(dt=0.005)
+
+    integrator.forces.append(mesh_potential)
+
+    langevin = hoomd.md.methods.Langevin(kT=1,
+                                         filter=hoomd.filter.All(),
+                                         default_gamma=0.1)
+    integrator.methods.append(langevin)
+    sim.operations.integrator = integrator
+
+    sim.run(0)
+
+    np.testing.assert_allclose(mesh_potential.volume,
+                        [0.08042,0.026807],
+                        rtol=1e-2,
+                        atol=1e-5)
+
+def test_volume_ignore_type(simulation_factory, tetrahedron_snapshot_factory):
+    snap = tetrahedron_snapshot_factory(d=0.969, L=5)
+    sim = simulation_factory(snap)
+
+    mesh = hoomd.mesh.Mesh()
+    mesh.types = ["mesh", "patch"]
+    type_ids = [0, 0, 1, 0]
+    triangles = [[2, 1, 0], [0, 1, 3], [2, 0, 3], [1, 2, 3]]
+    mesh.triangulation = dict(type_ids=type_ids, triangles=triangles)
+
+    mesh_potential = hoomd.md.mesh.conservation.Volume(mesh,ignore_type=True)
+    mesh_potential.params.default = dict(k=1, V0=1)
 
     integrator = hoomd.md.Integrator(dt=0.005)
 
@@ -278,10 +309,10 @@ def test_volume(simulation_factory, tetrahedron_snapshot_factory):
 
     sim.run(0)
 
-    assert math.isclose(mesh_potential.volume,
-                        0.107227,
-                        rel_tol=1e-2,
-                        abs_tol=1e-5)
+    np.testing.assert_allclose(mesh_potential.volume,
+                        [0.107227, 0.0],
+                        rtol=1e-2,
+                        atol=1e-5)
 
 
 def test_auto_detach_simulation(simulation_factory,

From bbea2c2d94394d0cffd8253926ec743516ea0701 Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Fri, 29 Sep 2023 14:55:41 -0400
Subject: [PATCH 34/50] fix energy when ignore_type

---
 hoomd/md/VolumeConservationMeshForceCompute.cc     | 8 +++++---
 hoomd/md/VolumeConservationMeshForceComputeGPU.cc  | 1 +
 hoomd/md/VolumeConservationMeshForceComputeGPU.cu  | 8 +++++++-
 hoomd/md/VolumeConservationMeshForceComputeGPU.cuh | 1 +
 4 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceCompute.cc b/hoomd/md/VolumeConservationMeshForceCompute.cc
index 790b8b1110..c69eacb08a 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.cc
+++ b/hoomd/md/VolumeConservationMeshForceCompute.cc
@@ -143,6 +143,9 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
     for (unsigned int i = 0; i < 6; i++)
         helfrich_virial[i] = Scalar(0.0);
 
+
+    unsigned int triN = m_mesh_data->getSize();
+
     // for each of the angles
     const unsigned int size = (unsigned int)m_mesh_data->getMeshTriangleData()->getN();
     for (unsigned int i = 0; i < size; i++)
@@ -186,13 +189,12 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
         unsigned int triangle_type = m_mesh_data->getMeshTriangleData()->getTypeByIndex(i);
   
 	if(m_ignore_type) triangle_type = 0;
-
-	unsigned int trin3 = h_pts.data[triangle_type]*3;
+	else triN = h_pts.data[triangle_type];
   
         Scalar VolDiff = m_volume[triangle_type] - m_V0[triangle_type];
 
         Scalar energy = m_K[triangle_type] * VolDiff * VolDiff
-                        / (2 * m_V0[triangle_type] * trin3);
+                        / (6 * m_V0[triangle_type] * triN);
 
         VolDiff = -m_K[triangle_type] / m_V0[triangle_type] * VolDiff / 6.0;
 
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index 2a63b118ba..bc743d878e 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -129,6 +129,7 @@ void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
                                                 m_virial.getPitch(),
                                                 m_pdata->getN(),
                                                 d_pts.data,
+						this->m_mesh_data->getSize(),
                                                 d_pos.data,
                                                 d_image.data,
                                                 box,
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index e1fd1f57c7..5405aa9c8d 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -275,6 +275,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
                                                            const size_t virial_pitch,
                                                            const unsigned int N,
                                                            const unsigned int* gN,
+                                               		   const unsigned int aN,
                                                            const Scalar4* d_pos,
                                                            const int3* d_image,
                                                            BoxDim box,
@@ -309,6 +310,8 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
     for (int i = 0; i < 6; i++)
         virial[i] = Scalar(0.0);
 
+    unsigned int triN = 1*aN;
+
     // loop over all triangles
     for (int triangle_idx = 0; triangle_idx < n_triangles; triangle_idx++)
         {
@@ -319,6 +322,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
         int cur_triangle_type = cur_triangle.idx[2];
 
 	if(ignore_type) cur_triangle_type = 0;
+	else triN = gN[cur_triangle_type];
 
         // get the angle parameters (MEM TRANSFER: 8 bytes)
         Scalar2 params = __ldg(d_params + cur_triangle_type);
@@ -327,7 +331,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
 
         Scalar VolDiff = volume[cur_triangle_type] - V0;
 
-        Scalar energy = K * VolDiff * VolDiff / (2 * V0 * 3 * gN[cur_triangle_type]);
+        Scalar energy = K * VolDiff * VolDiff / (6 * V0 * triN);
 
         VolDiff = -K / V0 * VolDiff / 6.0;
 
@@ -410,6 +414,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                                                const size_t virial_pitch,
                                                const unsigned int N,
                                                const unsigned int* gN,
+                                               const unsigned int aN,
                                                const Scalar4* d_pos,
                                                const int3* d_image,
                                                const BoxDim& box,
@@ -445,6 +450,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                        virial_pitch,
                        N,
                        gN,
+		       aN,
                        d_pos,
                        d_image,
                        box,
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
index df9999364c..a8baa98dd3 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
@@ -43,6 +43,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                                                const size_t virial_pitch,
                                                const unsigned int N,
                                                const unsigned int* gN,
+                                               const unsigned int aN,
                                                const Scalar4* d_pos,
                                                const int3* d_image,
                                                const BoxDim& box,

From e2b55d603147e933c7f1245cf1b217c592cfa49d Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Fri, 31 May 2024 18:00:36 -0400
Subject: [PATCH 35/50] add conservation class description

---
 .../md/VolumeConservationMeshForceCompute.cc  |  7 +-
 hoomd/md/VolumeConservationMeshForceCompute.h |  2 +-
 .../VolumeConservationMeshForceComputeGPU.cc  |  4 +-
 .../VolumeConservationMeshForceComputeGPU.cu  | 78 +++++++++----------
 .../VolumeConservationMeshForceComputeGPU.cuh |  4 +-
 .../VolumeConservationMeshForceComputeGPU.h   |  6 +-
 hoomd/md/mesh/conservation.py                 | 22 +++++-
 7 files changed, 69 insertions(+), 54 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceCompute.cc b/hoomd/md/VolumeConservationMeshForceCompute.cc
index c69eacb08a..47479c01d2 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.cc
+++ b/hoomd/md/VolumeConservationMeshForceCompute.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2009-2023 The Regents of the University of Michigan.
+// Copyright (c) 2009-2024 The Regents of the University of Michigan.
 // Part of HOOMD-blue, released under the BSD 3-Clause License.
 
 #include "VolumeConservationMeshForceCompute.h"
@@ -19,6 +19,8 @@ namespace hoomd
 namespace md
     {
 /*! \param sysdef System to compute forces on
+    \param meshdef Mesh triangulation 
+    \param ignore_type boolean whether to ignore types
     \post Memory is allocated, and forces are zeroed.
 */
 VolumeConservationMeshForceCompute::VolumeConservationMeshForceCompute(
@@ -57,6 +59,7 @@ VolumeConservationMeshForceCompute::~VolumeConservationMeshForceCompute()
 
 /*! \param type Type of the angle to set parameters for
     \param K Stiffness parameter for the force computation
+    \param V0 desired volume to maintain for the force computation
 
     Sets parameters for the potential of a particular angle type
 */
@@ -87,7 +90,7 @@ pybind11::dict VolumeConservationMeshForceCompute::getParams(std::string type)
     auto typ = m_mesh_data->getMeshBondData()->getTypeByName(type);
     if (typ >= m_mesh_data->getMeshBondData()->getNTypes())
         {
-        m_exec_conf->msg->error() << "mesh.helfrich: Invalid mesh type specified" << endl;
+        m_exec_conf->msg->error() << "mesh.volume: Invalid mesh type specified" << endl;
         throw runtime_error("Error setting parameters in VolumeConservationMeshForceCompute");
         }
     if(m_ignore_type) typ = 0;
diff --git a/hoomd/md/VolumeConservationMeshForceCompute.h b/hoomd/md/VolumeConservationMeshForceCompute.h
index 2ac0b17739..b9a3fbe3c7 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.h
+++ b/hoomd/md/VolumeConservationMeshForceCompute.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2009-2022 The Regents of the University of Michigan.
+// Copyright (c) 2009-2024 The Regents of the University of Michigan.
 // Part of HOOMD-blue, released under the BSD 3-Clause License.
 
 #include "hoomd/ForceCompute.h"
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index bc743d878e..21c8835480 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2009-2023 The Regents of the University of Michigan.
+// Copyright (c) 2009-2024 The Regents of the University of Michigan.
 // Part of HOOMD-blue, released under the BSD 3-Clause License.
 
 #include "VolumeConservationMeshForceComputeGPU.h"
@@ -14,6 +14,8 @@ namespace hoomd
 namespace md
     {
 /*! \param sysdef System to compute forces on
+    \param meshdef Mesh triangulation 
+    \param ignore_type boolean whether to ignore types 
     \post Memory is allocated, and forces are zeroed.
 */
 VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index 5405aa9c8d..086ddfdc91 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -1,10 +1,7 @@
-// Copyright (c) 2009-2023 The Regents of the University of Michigan.
+// Copyright (c) 2009-2024 The Regents of the University of Michigan.
 // Part of HOOMD-blue, released under the BSD 3-Clause License.
 
 #include "hip/hip_runtime.h"
-// Copyright (c) 2009-2022 The Regents of the University of Michigan.
-// Part of HOOMD-blue, released under the BSD 3-Clause License.
-
 #include "VolumeConservationMeshForceComputeGPU.cuh"
 #include "hoomd/TextureTools.h"
 #include "hoomd/VectorMath.h"
@@ -25,15 +22,17 @@ namespace md
 namespace kernel
     {
 //! Kernel for calculating volume_constraint sigmas on the GPU
-/*! \param d_sigma Device memory to write per paricle sigma
-    \param d_sigma_dash Device memory to write per particle sigma_dash
+/*! \param d_partial_sum_volume Device memory to write partial meah volume
     \param N number of particles
+    \param tN number of mesh types
+    \param cN current mesh type index
     \param d_pos device array of particle positions
-    \param d_rtag device array of particle reverse tags
+    \param d_image device array of particle images
     \param box Box dimensions (in GPU format) to use for periodic boundary conditions
-    \param blist List of mesh bonds stored on the GPU
-    \param d_triangles device array of mesh triangles
-    \param n_bonds_list List of numbers of mesh bonds stored on the GPU
+    \param tlist List of mesh triangle indices stored on the GPU
+    \param tpos_list Position of current index in list of mesh triangles stored on the GPU
+    \param ignore_type ignores mesh type if true
+    \param n_triangles_list List of mesh triangles stored on the GPU
 */
 __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_sum_volume,
                                                             const unsigned int N,
@@ -52,11 +51,6 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
     Scalar* volume_sdata = (Scalar*)&s_data[0];
 
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
-
-    //Scalar* volume_transfer = (Scalar*)malloc(tN * sizeof *volume_transfer);
-
-    //for (unsigned int i_types = 0; i_types < tN; i_types++)
-    //    volume_transfer[i_types] = 0;
     Scalar volume_transfer = 0;
 
     if (idx < N)
@@ -107,7 +101,6 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
                 dVol.z = pos_c.x * pos_b.y - pos_c.y * pos_b.x;
                 }
             Scalar Vol = dVol.x * pos_a.x + dVol.y * pos_a.y + dVol.z * pos_a.z;
-            //volume_transfer[cur_triangle_type] += Vol / 18.0;
             volume_transfer += Vol / 18.0;
             }
         }
@@ -184,15 +177,17 @@ __global__ void gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum,
         }
     }
 
-/*! \param d_sigma Device memory to write per paricle sigma
-    \param d_sigma_dash Device memory to write per particle sigma_dash
+/*! \param d_partial_sum_volume Device memory to write partial meah volume
     \param N number of particles
+    \param tN number of mesh types
+    \param cN current mesh type index
     \param d_pos device array of particle positions
-    \param d_rtag device array of particle reverse tags
+    \param d_image device array of particle images
     \param box Box dimensions (in GPU format) to use for periodic boundary conditions
-    \param blist List of mesh bonds stored on the GPU
-    \param d_triangles device array of mesh triangles
-    \param n_bonds_list List of numbers of mesh bonds stored on the GPU
+    \param tlist List of mesh triangle indices stored on the GPU
+    \param tpos_list Position of current index in list of mesh triangles stored on the GPU
+    \param ignore_type ignores mesh type if true
+    \param n_triangles_list List of mesh triangles stored on the GPU
     \param block_size Block size to use when performing calculations
     \param compute_capability Device compute capability (200, 300, 350, ...)
 
@@ -257,18 +252,18 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
 /*! \param d_force Device memory to write computed forces
     \param d_virial Device memory to write computed virials
     \param virial_pitch
-    \param N number of particles
+    \param N Number of particles
+    \param gN Number of triangles of a triangle type
+    \param aN Total global number of triangles
     \param d_pos device array of particle positions
-    \param d_rtag device array of particle reverse tags
+    \param d_magep device array of particle images
     \param box Box dimensions (in GPU format) to use for periodic boundary conditions
-    \param d_sigma Device memory to write per paricle sigma
-    \param d_sigma_dash Device memory to write per particle sigma_dash
-    \param blist List of mesh bonds stored on the GPU
-    \param d_triangles device array of mesh triangles
-    \param n_bonds_list List of numbers of mesh bonds stored on the GPU
-    \param d_params K params packed as Scalar variables
-    \param n_bond_type number of mesh bond types
-    \param d_flags Flag allocated on the device for use in checking for bonds that cannot be
+    \param area Total instantaneous area per mesh type
+    \param tlist List of mesh triangle indices stored on the GPU
+    \param tpos_list Position of current index in list of mesh triangles stored on the GPU
+    \param n_triangles_list total group number of triangles
+    \param d_params K, V0 params packed as Scalar variables
+    \param ignore_type ignores mesh type if true
 */
 __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
                                                            Scalar* d_virial,
@@ -391,17 +386,18 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
 
 /*! \param d_force Device memory to write computed forces
     \param d_virial Device memory to write computed virials
-    \param N number of particles
+    \param N Number of particles
+    \param gN Number of triangles of a triangle type
+    \param aN Total global number of triangles
     \param d_pos device array of particle positions
-    \param d_rtag device array of particle reverse tags
+    \param d_magep device array of particle images
     \param box Box dimensions (in GPU format) to use for periodic boundary conditions
-    \param d_sigma Device memory to write per paricle sigma
-    \param d_sigma_dash Device memory to write per particle sigma_dash
-    \param blist List of mesh bonds stored on the GPU
-    \param d_triangles device array of mesh triangles
-    \param n_bonds_list List of numbers of mesh bonds stored on the GPU
-    \param d_params K params packed as Scalar variables
-    \param n_bond_type number of mesh bond types
+    \param area Total instantaneous area per mesh type
+    \param tlist List of mesh triangle indices stored on the GPU
+    \param tpos_list Position of current index in list of mesh triangles stored on the GPU
+    \param n_triangles_list total group number of triangles
+    \param d_params K, V0 params packed as Scalar variables
+    \param ignore_type ignores mesh type if true
     \param block_size Block size to use when performing calculations
     \param d_flags Flag allocated on the device for use in checking for bonds that cannot be
     \param compute_capability Device compute capability (200, 300, 350, ...)
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
index a8baa98dd3..d15bc6ff8c 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
@@ -1,4 +1,4 @@
-// Copyright (c) 2009-2023 The Regents of the University of Michigan.
+// Copyright (c) 2009-2024 The Regents of the University of Michigan.
 // Part of HOOMD-blue, released under the BSD 3-Clause License.
 
 #include "hoomd/BondedGroupData.cuh"
@@ -8,7 +8,7 @@
 #include <hip/hip_runtime.h>
 
 /*! \file MeshVolumeConservationGPU.cuh
-    \brief Declares GPU kernel code for calculating the volume cnstraint forces. Used by
+    \brief Declares GPU kernel code for calculating the volume constraint forces. Used by
    MeshVolumeConservationGPU.
 */
 
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.h b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
index e78642fad9..fda6145fce 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.h
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2009-2022 The Regents of the University of Michigan.
+// Copyright (c) 2009-2024 The Regents of the University of Michigan.
 // Part of HOOMD-blue, released under the BSD 3-Clause License.
 
 #include "VolumeConservationMeshForceCompute.h"
@@ -23,8 +23,8 @@ namespace hoomd
 namespace md
     {
 
-//! Computes helfrich energy forces on the mesh on the GPU
-/*! Helfrich energy forces are computed on every particle in a mesh.
+//! Computes volume conservation energy forces on the mesh on the GPU
+/*! Volume energy forces are computed on every particle in a mesh.
 
     \ingroup computes
 */
diff --git a/hoomd/md/mesh/conservation.py b/hoomd/md/mesh/conservation.py
index 0f8cd1ecc1..604fe92bcd 100644
--- a/hoomd/md/mesh/conservation.py
+++ b/hoomd/md/mesh/conservation.py
@@ -1,7 +1,22 @@
-# Copyright (c) 2009-2022 The Regents of the University of Michigan.
+# Copyright (c) 2009-2024 The Regents of the University of Michigan.
 # Part of HOOMD-blue, released under the BSD 3-Clause License.
 
-"""Mesh Bond potentials."""
+"""Mesh Conservation potential.
+
+Mesh conservation force classes apply a force and virial to every mesh vertex
+particle based on a global or local quantity :math:`A` of the given mesh 
+triangulation :math:`T`.
+
+.. math::
+
+    U_\mathrm{conservation} = U(A(T))
+
+See Also:
+   See the documentation in `hoomd.mesh.Mesh` for more information on the
+   initialization of the mesh object.
+
+"""
+
 
 from hoomd.md.mesh.potential import MeshConvervationPotential
 from hoomd.data.typeparam import TypeParameter
@@ -25,8 +40,7 @@ class Volume(MeshConvervationPotential):
     Attributes:
         parameter (TypeParameter[dict]):
             The parameter of the volume constraint for the defined mesh.
-            A type name does not have to be stated as the mesh can only
-            have one type. The dictionary has the following keys:
+            The dictionary has the following keys:
 
             * ``k`` (`float`, **required**) - potential constant
               :math:`[\mathrm{energy} \cdot \mathrm{length}^{-3}]`

From f2f659a2b33dd9cffeba3b3ed3c63f5df6a6b804 Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Sat, 1 Jun 2024 13:33:23 -0400
Subject: [PATCH 36/50] remove commented out code

---
 .../VolumeConservationMeshForceComputeGPU.cu  | 42 +++++++++----------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index 086ddfdc91..07da26c57c 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -105,32 +105,28 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
             }
         }
 
-    //for (unsigned int i_types = 0; i_types < tN; i_types++)
+    volume_sdata[threadIdx.x] = volume_transfer;//[i_types];
+     
+    __syncthreads();
+     
+    // reduce the sum in parallel
+    int offs = blockDim.x >> 1;
+    while (offs > 0)
         {
-        volume_sdata[threadIdx.x] = volume_transfer;//[i_types];
-
+        if (threadIdx.x < offs)
+    	{
+    	volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
+    	}
+        offs >>= 1;
         __syncthreads();
-
-        // reduce the sum in parallel
-        int offs = blockDim.x >> 1;
-        while (offs > 0)
-            {
-            if (threadIdx.x < offs)
-                {
-                volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
-                }
-            offs >>= 1;
-            __syncthreads();
-            }
-
-        // write out our partial sum
-        if (threadIdx.x == 0)
-            {
-            //d_partial_sum_volume[blockIdx.x * tN + i_types] = volume_sdata[0];
-            d_partial_sum_volume[blockIdx.x * tN + cN] = volume_sdata[0];
-            }
         }
-    //free(volume_transfer);
+    
+    // write out our partial sum
+    if (threadIdx.x == 0)
+        {
+        //d_partial_sum_volume[blockIdx.x * tN + i_types] = volume_sdata[0];
+        d_partial_sum_volume[blockIdx.x * tN + cN] = volume_sdata[0];
+        }
     }
 
 //! Kernel function for reducing a partial sum to a full sum (one value)

From 1cb4bf075f6c03bc575bd0320534c73f4be248f6 Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Tue, 8 Oct 2024 23:29:59 -0400
Subject: [PATCH 37/50] fix docstrings

---
 sphinx-doc/module-md-mesh-conservation.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sphinx-doc/module-md-mesh-conservation.rst b/sphinx-doc/module-md-mesh-conservation.rst
index 2f2b9bee13..d10468589e 100644
--- a/sphinx-doc/module-md-mesh-conservation.rst
+++ b/sphinx-doc/module-md-mesh-conservation.rst
@@ -2,7 +2,7 @@
 .. Part of HOOMD-blue, released under the BSD 3-Clause License.
 
 md.mesh.conservation
-------------
+--------------------
 
 .. rubric:: Overview
 
@@ -17,6 +17,6 @@ md.mesh.conservation
 
 .. automodule:: hoomd.md.mesh.conservation
     :synopsis: Constraints applied to a mesh data structure.
-    :members: Volume
     :no-inherited-members:
     :show-inheritance:
+    :members: Volume

From 270bcc009ea784b0c186aa47a493c6a6283d6184 Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Tue, 8 Oct 2024 23:32:04 -0400
Subject: [PATCH 38/50] fix docs

---
 hoomd/md/mesh/potential.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hoomd/md/mesh/potential.py b/hoomd/md/mesh/potential.py
index fe9c048224..17f7b26f53 100644
--- a/hoomd/md/mesh/potential.py
+++ b/hoomd/md/mesh/potential.py
@@ -74,7 +74,8 @@ def mesh(self, value):
 class MeshConvervationPotential(MeshPotential):
     """Constructs the bond potential applied to a mesh.
 
-    `MeshPotential` is the base class for global conservation potentials applied to meshes.
+    `MeshConvervationPotential` is the base class for global conservation 
+    potentials applied to meshes.
 
     Warning:
         This class should not be instantiated by users. The class can be used

From 69a81741f995a7df5129ee6855e5245a47fb9878 Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Tue, 8 Oct 2024 23:34:11 -0400
Subject: [PATCH 39/50] remove flags

---
 .../VolumeConservationMeshForceComputeGPU.cc  | 27 +------------------
 .../VolumeConservationMeshForceComputeGPU.cu  | 10 +++----
 .../VolumeConservationMeshForceComputeGPU.cuh |  3 +--
 .../VolumeConservationMeshForceComputeGPU.h   |  1 -
 4 files changed, 5 insertions(+), 36 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index 21c8835480..16d0273c3b 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -44,14 +44,6 @@ VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
     GPUArray<Scalar> volume_GPU(NTypes, m_exec_conf);
     m_volume_GPU.swap(volume_GPU);
 
-    // allocate flags storage on the GPU
-    GPUArray<unsigned int> flags(1, this->m_exec_conf);
-    m_flags.swap(flags);
-
-    // reset flags
-    ArrayHandle<unsigned int> h_flags(m_flags, access_location::host, access_mode::overwrite);
-    h_flags.data[0] = 0;
-
     GPUArray<Scalar> sum(NTypes, m_exec_conf);
     m_sum.swap(sum);
 
@@ -122,9 +114,6 @@ void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
 
     ArrayHandle<Scalar> d_volume(m_volume_GPU, access_location::device, access_mode::read);
 
-    // access the flags array for overwriting
-    ArrayHandle<unsigned int> d_flags(m_flags, access_location::device, access_mode::readwrite);
-
     m_tuner->begin();
     kernel::gpu_compute_volume_constraint_force(d_force.data,
                                                 d_virial.data,
@@ -142,24 +131,10 @@ void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
                                                 d_gpu_n_meshtriangle.data,
                                                 d_params.data,
                                                 this->m_ignore_type,
-                                                m_tuner->getParam()[0],
-                                                d_flags.data);
+                                                m_tuner->getParam()[0]);
 
     if (this->m_exec_conf->isCUDAErrorCheckingEnabled())
-        {
         CHECK_CUDA_ERROR();
-
-        // check the flags for any errors
-        ArrayHandle<unsigned int> h_flags(m_flags, access_location::host, access_mode::read);
-
-        if (h_flags.data[0] & 1)
-            {
-            this->m_exec_conf->msg->error() << "volume constraint: triangle out of bounds ("
-                                            << h_flags.data[0] << ")" << std::endl
-                                            << std::endl;
-            throw std::runtime_error("Error in meshtriangle calculation");
-            }
-        }
     m_tuner->end();
     }
 
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index 07da26c57c..11f91edb93 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -276,8 +276,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
                                                            const Index2D tlist_idx,
                                                            const unsigned int* n_triangles_list,
                                                            Scalar2* d_params,
-                                                           const bool ignore_type,
-                                                           unsigned int* d_flags)
+                                                           const bool ignore_type)
     {
     // start by identifying which particle we are to handle
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
@@ -395,7 +394,6 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
     \param d_params K, V0 params packed as Scalar variables
     \param ignore_type ignores mesh type if true
     \param block_size Block size to use when performing calculations
-    \param d_flags Flag allocated on the device for use in checking for bonds that cannot be
     \param compute_capability Device compute capability (200, 300, 350, ...)
 
     \returns Any error code resulting from the kernel launch
@@ -417,8 +415,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                                                const unsigned int* n_triangles_list,
                                                Scalar2* d_params,
                                                const bool ignore_type,
-                                               int block_size,
-                                               unsigned int* d_flags)
+                                               int block_size)
     {
     unsigned int max_block_size;
     hipFuncAttributes attr;
@@ -452,8 +449,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                        tlist_idx,
                        n_triangles_list,
                        d_params,
-                       ignore_type,
-                       d_flags);
+                       ignore_type);
 
     return hipSuccess;
     }
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
index d15bc6ff8c..7065549509 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
@@ -54,8 +54,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                                                const unsigned int* n_triangles_list,
                                                Scalar2* d_params,
                                                const bool ignore_type,
-                                               int block_size,
-                                               unsigned int* d_flags);
+                                               int block_size);
     } // end namespace kernel
     } // end namespace md
     } // end namespace hoomd
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.h b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
index fda6145fce..5acc745e25 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.h
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
@@ -51,7 +51,6 @@ class PYBIND11_EXPORT VolumeConservationMeshForceComputeGPU
     unsigned int m_num_blocks;       //!< number of memory blocks reserved for partial sum memory
 
     std::shared_ptr<Autotuner<1>> m_tuner; //!< Autotuner for block size
-    GPUArray<unsigned int> m_flags;     //!< Flags set during the kernel execution
     GPUArray<Scalar2> m_params;          //!< Parameters stored on the GPU
 
     GPUArray<Scalar> m_partial_sum; //!< memory space for partial sum over volume

From 2e2490a5e33b6fbd7ac971bbd7e174e27bcf26d3 Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Tue, 8 Oct 2024 23:51:02 -0400
Subject: [PATCH 40/50] update params

---
 .../md/VolumeConservationMeshForceCompute.cc  | 37 +++++------
 hoomd/md/VolumeConservationMeshForceCompute.h | 62 +++++++++----------
 .../VolumeConservationMeshForceComputeGPU.cc  | 12 ----
 .../VolumeConservationMeshForceComputeGPU.h   |  4 --
 4 files changed, 44 insertions(+), 71 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceCompute.cc b/hoomd/md/VolumeConservationMeshForceCompute.cc
index 47479c01d2..d9955eee3a 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.cc
+++ b/hoomd/md/VolumeConservationMeshForceCompute.cc
@@ -4,8 +4,6 @@
 #include "VolumeConservationMeshForceCompute.h"
 
 #include <iostream>
-#include <math.h>
-#include <sstream>
 #include <stdexcept>
 
 using namespace std;
@@ -27,7 +25,7 @@ VolumeConservationMeshForceCompute::VolumeConservationMeshForceCompute(
     std::shared_ptr<SystemDefinition> sysdef,
     std::shared_ptr<MeshDefinition> meshdef,
     bool ignore_type)
-    : ForceCompute(sysdef), m_K(NULL), m_V0(NULL), m_mesh_data(meshdef), m_volume(0), 
+    : ForceCompute(sysdef), m_mesh_data(meshdef), 
 	m_ignore_type(ignore_type)
     {
     m_exec_conf->msg->notice(5) << "Constructing VolumeConservationMeshForceCompute" << endl;
@@ -36,11 +34,8 @@ VolumeConservationMeshForceCompute::VolumeConservationMeshForceCompute(
 
     if(m_ignore_type) n_types = 1;
 
-    // allocate the parameters
-    m_K = new Scalar[n_types];
-
-    // allocate the parameters
-    m_V0 = new Scalar[n_types];
+    GPUArray<Scalar2> params(n_types, m_exec_conf);
+    m_params.swap(params);
 
     m_volume = new Scalar[n_types];
     }
@@ -49,11 +44,7 @@ VolumeConservationMeshForceCompute::~VolumeConservationMeshForceCompute()
     {
     m_exec_conf->msg->notice(5) << "Destroying VolumeConservationMeshForceCompute" << endl;
 
-    delete[] m_K;
-    delete[] m_V0;
     delete[] m_volume;
-    m_K = NULL;
-    m_V0 = NULL;
     m_volume = NULL;
     }
 
@@ -67,8 +58,9 @@ void VolumeConservationMeshForceCompute::setParams(unsigned int type, Scalar K,
     {
     if(!m_ignore_type || type == 0 ) 
     	{
-        m_K[type] = K;
-        m_V0[type] = V0;
+        ArrayHandle<Scalar2> h_params(m_params, access_location::host, access_mode::readwrite);
+        // update the local copy of the memory
+        h_params.data[type] = make_scalar2(K, V0);
 
         // check for some silly errors a user could make
         if (K <= 0)
@@ -81,7 +73,7 @@ void VolumeConservationMeshForceCompute::setParams(unsigned int type, Scalar K,
 void VolumeConservationMeshForceCompute::setParamsPython(std::string type, pybind11::dict params)
     {
     auto typ = m_mesh_data->getMeshBondData()->getTypeByName(type);
-    auto _params = vconstraint_params(params);
+    auto _params = volume_conservation_params(params);
     setParams(typ, _params.k, _params.V0);
     }
 
@@ -93,10 +85,10 @@ pybind11::dict VolumeConservationMeshForceCompute::getParams(std::string type)
         m_exec_conf->msg->error() << "mesh.volume: Invalid mesh type specified" << endl;
         throw runtime_error("Error setting parameters in VolumeConservationMeshForceCompute");
         }
-    if(m_ignore_type) typ = 0;
+    ArrayHandle<Scalar2> h_params(m_params, access_location::host, access_mode::read);
     pybind11::dict params;
-    params["k"] = m_K[typ];
-    params["V0"] = m_V0[typ];
+    params["k"] = h_params.data[typ].x;
+    params["V0"] = h_params.data[typ].y;
     return params;
     }
 
@@ -117,6 +109,7 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
     ArrayHandle<Scalar4> h_force(m_force, access_location::host, access_mode::overwrite);
     ArrayHandle<Scalar> h_virial(m_virial, access_location::host, access_mode::overwrite);
     size_t virial_pitch = m_virial.getPitch();
+    ArrayHandle<Scalar2> h_params(m_params, access_location::host, access_mode::read);
 
     ArrayHandle<typename Angle::members_t> h_triangles(
         m_mesh_data->getMeshTriangleData()->getMembersArray(),
@@ -194,12 +187,12 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
 	if(m_ignore_type) triangle_type = 0;
 	else triN = h_pts.data[triangle_type];
   
-        Scalar VolDiff = m_volume[triangle_type] - m_V0[triangle_type];
+        Scalar VolDiff = m_volume[triangle_type] - h_params.data[triangle_type].y;
 
-        Scalar energy = m_K[triangle_type] * VolDiff * VolDiff
-                        / (6 * m_V0[triangle_type] * triN);
+        Scalar energy = h_params.data[triangle_type].x * VolDiff * VolDiff
+                        / (6 * h_params.data[triangle_type].y * triN);
 
-        VolDiff = -m_K[triangle_type] / m_V0[triangle_type] * VolDiff / 6.0;
+        VolDiff = -h_params.data[triangle_type].x / h_params.data[triangle_type].y * VolDiff / 6.0;
 
         Fa.x = VolDiff * dVol_a.x;
         Fa.y = VolDiff * dVol_a.y;
diff --git a/hoomd/md/VolumeConservationMeshForceCompute.h b/hoomd/md/VolumeConservationMeshForceCompute.h
index b9a3fbe3c7..28ce168ad0 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.h
+++ b/hoomd/md/VolumeConservationMeshForceCompute.h
@@ -6,8 +6,6 @@
 
 #include <memory>
 
-#include <vector>
-
 /*! \file VolumeConservationMeshForceCompute.h
     \brief Declares a class for computing volume constraint forces
 */
@@ -25,33 +23,6 @@ namespace hoomd
     {
 namespace md
     {
-struct vconstraint_params
-    {
-    Scalar k;
-    Scalar V0;
-
-#ifndef __HIPCC__
-    vconstraint_params() : k(0), V0(0) { }
-
-    vconstraint_params(pybind11::dict params)
-        : k(params["k"].cast<Scalar>()), V0(params["V0"].cast<Scalar>())
-        {
-        }
-
-    pybind11::dict asDict()
-        {
-        pybind11::dict v;
-        v["k"] = k;
-        v["V0"] = V0;
-        return v;
-        }
-#endif
-    }
-#ifdef SINGLE_PRECISION
-    __attribute__((aligned(8)));
-#else
-    __attribute__((aligned(16)));
-#endif
 
 //! Computes volume constraint forces on the mesh
 /*! Volume constraint forces are computed on every particle in a mesh.
@@ -60,6 +31,33 @@ struct vconstraint_params
 */
 class PYBIND11_EXPORT VolumeConservationMeshForceCompute : public ForceCompute
     {
+    struct volume_conservation_params
+        {
+        Scalar k;
+        Scalar V0;
+   
+#ifndef __HIPCC__
+        volume_conservation_params() : k(0), V0(0) { }
+    
+        volume_conservation_params(pybind11::dict params)
+            : k(params["k"].cast<Scalar>()), V0(params["V0"].cast<Scalar>())
+            {
+            }
+    
+        pybind11::dict asDict()
+            {
+            pybind11::dict v;
+            v["k"] = k;
+            v["V0"] = V0;
+            return v;
+            }
+#endif
+        }
+#if HOOMD_LONGREAL_SIZE == 32
+        __attribute__((aligned(4)));
+#else
+        __attribute__((aligned(8)));
+#endif
     public:
     //! Constructs the compute
     VolumeConservationMeshForceCompute(std::shared_ptr<SystemDefinition> sysdef,
@@ -96,11 +94,9 @@ class PYBIND11_EXPORT VolumeConservationMeshForceCompute : public ForceCompute
 #endif
 
     protected:
-    Scalar* m_K; //!< K parameter for multiple mesh triangles
-
-    Scalar* m_V0;
+    GPUArray<Scalar2> m_params; //!< Parameters
 
-    std::shared_ptr<MeshDefinition> m_mesh_data; //!< Mesh data to use in computing helfich energy
+    std::shared_ptr<MeshDefinition> m_mesh_data; //!< Mesh data to use in computing volume energy
 
     Scalar* m_volume; //! sum of the triangle areas within the mesh
 		      
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index 16d0273c3b..f311b0a4f7 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -61,18 +61,6 @@ VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
     m_autotuners.push_back(m_tuner);
     }
 
-void VolumeConservationMeshForceComputeGPU::setParams(unsigned int type, Scalar K, Scalar V0)
-    {
-    if(!this->m_ignore_type || type == 0 ) 
-    	{
-        VolumeConservationMeshForceCompute::setParams(type, K, V0);
-
-        ArrayHandle<Scalar2> h_params(m_params, access_location::host, access_mode::readwrite);
-        // update the local copy of the memory
-        h_params.data[type] = make_scalar2(K, V0);
-	}
-    }
-
 /*! Actually perform the force computation
     \param timestep Current time step
  */
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.h b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
index 5acc745e25..39662a6a5b 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.h
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
@@ -37,9 +37,6 @@ class PYBIND11_EXPORT VolumeConservationMeshForceComputeGPU
                                           std::shared_ptr<MeshDefinition> meshdef,
 					  bool ignore_type);
 
-    //! Set the parameters
-    virtual void setParams(unsigned int type, Scalar K, Scalar V0);
-
     virtual pybind11::array_t<Scalar> getVolume()
         {
         ArrayHandle<Scalar> h_volume(m_volume_GPU, access_location::host, access_mode::read);
@@ -51,7 +48,6 @@ class PYBIND11_EXPORT VolumeConservationMeshForceComputeGPU
     unsigned int m_num_blocks;       //!< number of memory blocks reserved for partial sum memory
 
     std::shared_ptr<Autotuner<1>> m_tuner; //!< Autotuner for block size
-    GPUArray<Scalar2> m_params;          //!< Parameters stored on the GPU
 
     GPUArray<Scalar> m_partial_sum; //!< memory space for partial sum over volume
     GPUArray<Scalar> m_sum;          //!< memory space for sum over volume

From f55b2da633bde3a9a22c36cf02901ebfd2bacabf Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Tue, 8 Oct 2024 23:53:19 -0400
Subject: [PATCH 41/50] remove comments

---
 hoomd/md/VolumeConservationMeshForceCompute.cc | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceCompute.cc b/hoomd/md/VolumeConservationMeshForceCompute.cc
index d9955eee3a..feab7c7828 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.cc
+++ b/hoomd/md/VolumeConservationMeshForceCompute.cc
@@ -59,10 +59,8 @@ void VolumeConservationMeshForceCompute::setParams(unsigned int type, Scalar K,
     if(!m_ignore_type || type == 0 ) 
     	{
         ArrayHandle<Scalar2> h_params(m_params, access_location::host, access_mode::readwrite);
-        // update the local copy of the memory
         h_params.data[type] = make_scalar2(K, V0);
 
-        // check for some silly errors a user could make
         if (K <= 0)
             m_exec_conf->msg->warning() << "volume: specified K <= 0" << endl;
         if (V0 <= 0)
@@ -100,7 +98,6 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
     computeVolume(); // precompute volume
 
     assert(m_pdata);
-    // access the particle data arrays
     ArrayHandle<Scalar4> h_pos(m_pdata->getPositions(), access_location::host, access_mode::read);
 
     ArrayHandle<unsigned int> h_rtag(m_pdata->getRTags(), access_location::host, access_mode::read);
@@ -116,18 +113,15 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
         access_location::host,
         access_mode::read);
 
-    // there are enough other checks on the input data: but it doesn't hurt to be safe
     assert(h_force.data);
     assert(h_virial.data);
     assert(h_pos.data);
     assert(h_rtag.data);
     assert(h_triangles.data);
 
-    // Zero data for force calculation.
     memset((void*)h_force.data, 0, sizeof(Scalar4) * m_force.getNumElements());
     memset((void*)h_virial.data, 0, sizeof(Scalar) * m_virial.getNumElements());
 
-    // get a local copy of the simulation box too
     const BoxDim& box = m_pdata->getGlobalBox();
 
     PDataFlags flags = m_pdata->getFlags();
@@ -142,11 +136,9 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
 
     unsigned int triN = m_mesh_data->getSize();
 
-    // for each of the angles
     const unsigned int size = (unsigned int)m_mesh_data->getMeshTriangleData()->getN();
     for (unsigned int i = 0; i < size; i++)
         {
-        // lookup the tag of each of the particles participating in the bond
         const typename Angle::members_t& triangle = h_triangles.data[i];
 
         unsigned int ttag_a = triangle.tag[0];
@@ -156,8 +148,6 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
         unsigned int ttag_c = triangle.tag[2];
         assert(ttag_c < m_pdata->getMaximumTag() + 1);
 
-        // transform a and b into indices into the particle data arrays
-        // (MEM TRANSFER: 4 integers)
         unsigned int idx_a = h_rtag.data[ttag_a];
         unsigned int idx_b = h_rtag.data[ttag_b];
         unsigned int idx_c = h_rtag.data[ttag_c];
@@ -208,8 +198,6 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
             helfrich_virial[5] = Scalar(1. / 2.) * h_pos.data[idx_a].z * Fa.z; // zz
             }
 
-        // Now, apply the force to each individual atom a,b,c, and accumulate the energy/virial
-        // do not update ghost particles
         if (idx_a < m_pdata->getN())
             {
             h_force.data[idx_a].x += Fa.x;
@@ -281,7 +269,6 @@ void VolumeConservationMeshForceCompute::computeVolume()
         access_location::host,
         access_mode::read);
 
-    // get a local copy of the simulation box too
     const BoxDim& box = m_pdata->getGlobalBox();
 
     unsigned int n_types = m_mesh_data->getMeshTriangleData()->getNTypes();
@@ -292,11 +279,9 @@ void VolumeConservationMeshForceCompute::computeVolume()
     for (unsigned int i = 0; i < n_types; i++)
         global_volume[i] = 0;
 
-    // for each of the angles
     const unsigned int size = (unsigned int)m_mesh_data->getMeshTriangleData()->getN();
     for (unsigned int i = 0; i < size; i++)
         {
-        // lookup the tag of each of the particles participating in the bond
         const typename Angle::members_t& triangle = h_triangles.data[i];
 
         unsigned int ttag_a = triangle.tag[0];
@@ -306,8 +291,6 @@ void VolumeConservationMeshForceCompute::computeVolume()
         unsigned int ttag_c = triangle.tag[2];
         assert(ttag_c < m_pdata->getMaximumTag() + 1);
 
-        // transform a and b into indices into the particle data arrays
-        // (MEM TRANSFER: 4 integers)
         unsigned int idx_a = h_rtag.data[ttag_a];
         unsigned int idx_b = h_rtag.data[ttag_b];
         unsigned int idx_c = h_rtag.data[ttag_c];

From 5cae06b170679921927bdfe4dbdcec10297d8fb8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 9 Oct 2024 03:57:51 +0000
Subject: [PATCH 42/50] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 hoomd/MeshDefinition.cc                       |  1 -
 .../md/VolumeConservationMeshForceCompute.cc  | 38 ++++++++++--------
 hoomd/md/VolumeConservationMeshForceCompute.h | 10 ++---
 .../VolumeConservationMeshForceComputeGPU.cc  | 24 ++++++-----
 .../VolumeConservationMeshForceComputeGPU.cu  | 40 ++++++++++---------
 .../VolumeConservationMeshForceComputeGPU.cuh |  2 +-
 .../VolumeConservationMeshForceComputeGPU.h   |  9 ++---
 hoomd/md/mesh/conservation.py                 |  5 +--
 hoomd/md/mesh/potential.py                    |  5 ++-
 hoomd/md/pytest/test_meshpotential.py         | 18 ++++-----
 sphinx-doc/module-md-mesh-conservation.rst    |  2 +-
 11 files changed, 82 insertions(+), 72 deletions(-)

diff --git a/hoomd/MeshDefinition.cc b/hoomd/MeshDefinition.cc
index 682cf048f1..cfe6d4df78 100644
--- a/hoomd/MeshDefinition.cc
+++ b/hoomd/MeshDefinition.cc
@@ -32,7 +32,6 @@ MeshDefinition::MeshDefinition(std::shared_ptr<SystemDefinition> sysdef, unsigne
     GlobalArray<unsigned int> globalN(n_types, m_sysdef->getParticleData()->getExecConf());
     m_globalN.swap(globalN);
     TAG_ALLOCATION(m_globalN);
-
     }
 
 void MeshDefinition::setTypes(pybind11::list types)
diff --git a/hoomd/md/VolumeConservationMeshForceCompute.cc b/hoomd/md/VolumeConservationMeshForceCompute.cc
index feab7c7828..aa3a7ff5d4 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.cc
+++ b/hoomd/md/VolumeConservationMeshForceCompute.cc
@@ -17,7 +17,7 @@ namespace hoomd
 namespace md
     {
 /*! \param sysdef System to compute forces on
-    \param meshdef Mesh triangulation 
+    \param meshdef Mesh triangulation
     \param ignore_type boolean whether to ignore types
     \post Memory is allocated, and forces are zeroed.
 */
@@ -25,14 +25,14 @@ VolumeConservationMeshForceCompute::VolumeConservationMeshForceCompute(
     std::shared_ptr<SystemDefinition> sysdef,
     std::shared_ptr<MeshDefinition> meshdef,
     bool ignore_type)
-    : ForceCompute(sysdef), m_mesh_data(meshdef), 
-	m_ignore_type(ignore_type)
+    : ForceCompute(sysdef), m_mesh_data(meshdef), m_ignore_type(ignore_type)
     {
     m_exec_conf->msg->notice(5) << "Constructing VolumeConservationMeshForceCompute" << endl;
 
     unsigned int n_types = m_mesh_data->getMeshTriangleData()->getNTypes();
 
-    if(m_ignore_type) n_types = 1;
+    if (m_ignore_type)
+        n_types = 1;
 
     GPUArray<Scalar2> params(n_types, m_exec_conf);
     m_params.swap(params);
@@ -56,8 +56,8 @@ VolumeConservationMeshForceCompute::~VolumeConservationMeshForceCompute()
 */
 void VolumeConservationMeshForceCompute::setParams(unsigned int type, Scalar K, Scalar V0)
     {
-    if(!m_ignore_type || type == 0 ) 
-    	{
+    if (!m_ignore_type || type == 0)
+        {
         ArrayHandle<Scalar2> h_params(m_params, access_location::host, access_mode::readwrite);
         h_params.data[type] = make_scalar2(K, V0);
 
@@ -65,7 +65,7 @@ void VolumeConservationMeshForceCompute::setParams(unsigned int type, Scalar K,
             m_exec_conf->msg->warning() << "volume: specified K <= 0" << endl;
         if (V0 <= 0)
             m_exec_conf->msg->warning() << "volume: specified V0 <= 0" << endl;
-	}
+        }
     }
 
 void VolumeConservationMeshForceCompute::setParamsPython(std::string type, pybind11::dict params)
@@ -127,13 +127,14 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
     PDataFlags flags = m_pdata->getFlags();
     bool compute_virial = flags[pdata_flag::pressure_tensor];
 
-    ArrayHandle<unsigned int> h_pts(m_mesh_data->getPerTypeSize(), access_location::host, access_mode::read);
+    ArrayHandle<unsigned int> h_pts(m_mesh_data->getPerTypeSize(),
+                                    access_location::host,
+                                    access_mode::read);
 
     Scalar helfrich_virial[6];
     for (unsigned int i = 0; i < 6; i++)
         helfrich_virial[i] = Scalar(0.0);
 
-
     unsigned int triN = m_mesh_data->getSize();
 
     const unsigned int size = (unsigned int)m_mesh_data->getMeshTriangleData()->getN();
@@ -173,10 +174,12 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
         Scalar3 Fa, Fb, Fc;
 
         unsigned int triangle_type = m_mesh_data->getMeshTriangleData()->getTypeByIndex(i);
-  
-	if(m_ignore_type) triangle_type = 0;
-	else triN = h_pts.data[triangle_type];
-  
+
+        if (m_ignore_type)
+            triangle_type = 0;
+        else
+            triN = h_pts.data[triangle_type];
+
         Scalar VolDiff = m_volume[triangle_type] - h_params.data[triangle_type].y;
 
         Scalar energy = h_params.data[triangle_type].x * VolDiff * VolDiff
@@ -273,7 +276,8 @@ void VolumeConservationMeshForceCompute::computeVolume()
 
     unsigned int n_types = m_mesh_data->getMeshTriangleData()->getNTypes();
 
-    if(m_ignore_type) n_types = 1;
+    if (m_ignore_type)
+        n_types = 1;
 
     std::vector<Scalar> global_volume(n_types);
     for (unsigned int i = 0; i < n_types; i++)
@@ -311,7 +315,8 @@ void VolumeConservationMeshForceCompute::computeVolume()
 
         unsigned int triangle_type = m_mesh_data->getMeshTriangleData()->getTypeByIndex(i);
 
-	if(m_ignore_type) triangle_type = 0;
+        if (m_ignore_type)
+            triangle_type = 0;
 
 #ifdef ENABLE_MPI
         if (m_pdata->getDomainDecomposition())
@@ -357,7 +362,8 @@ void export_VolumeConservationMeshForceCompute(pybind11::module& m)
                      std::shared_ptr<VolumeConservationMeshForceCompute>>(
         m,
         "VolumeConservationMeshForceCompute")
-        .def(pybind11::init<std::shared_ptr<SystemDefinition>, std::shared_ptr<MeshDefinition>, bool>())
+        .def(pybind11::
+                 init<std::shared_ptr<SystemDefinition>, std::shared_ptr<MeshDefinition>, bool>())
         .def("setParams", &VolumeConservationMeshForceCompute::setParamsPython)
         .def("getParams", &VolumeConservationMeshForceCompute::getParams)
         .def("getVolume", &VolumeConservationMeshForceCompute::getVolume);
diff --git a/hoomd/md/VolumeConservationMeshForceCompute.h b/hoomd/md/VolumeConservationMeshForceCompute.h
index 28ce168ad0..b082ab2c31 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.h
+++ b/hoomd/md/VolumeConservationMeshForceCompute.h
@@ -35,15 +35,15 @@ class PYBIND11_EXPORT VolumeConservationMeshForceCompute : public ForceCompute
         {
         Scalar k;
         Scalar V0;
-   
+
 #ifndef __HIPCC__
         volume_conservation_params() : k(0), V0(0) { }
-    
+
         volume_conservation_params(pybind11::dict params)
             : k(params["k"].cast<Scalar>()), V0(params["V0"].cast<Scalar>())
             {
             }
-    
+
         pybind11::dict asDict()
             {
             pybind11::dict v;
@@ -62,7 +62,7 @@ class PYBIND11_EXPORT VolumeConservationMeshForceCompute : public ForceCompute
     //! Constructs the compute
     VolumeConservationMeshForceCompute(std::shared_ptr<SystemDefinition> sysdef,
                                        std::shared_ptr<MeshDefinition> meshdef,
-				       bool ignore_type);
+                                       bool ignore_type);
 
     //! Destructor
     virtual ~VolumeConservationMeshForceCompute();
@@ -99,7 +99,7 @@ class PYBIND11_EXPORT VolumeConservationMeshForceCompute : public ForceCompute
     std::shared_ptr<MeshDefinition> m_mesh_data; //!< Mesh data to use in computing volume energy
 
     Scalar* m_volume; //! sum of the triangle areas within the mesh
-		      
+
     bool m_ignore_type; //! do we ignore type to calculate global area
 
     //! Actually compute the forces
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index f311b0a4f7..46bd4a79da 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -14,8 +14,8 @@ namespace hoomd
 namespace md
     {
 /*! \param sysdef System to compute forces on
-    \param meshdef Mesh triangulation 
-    \param ignore_type boolean whether to ignore types 
+    \param meshdef Mesh triangulation
+    \param ignore_type boolean whether to ignore types
     \post Memory is allocated, and forces are zeroed.
 */
 VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
@@ -34,7 +34,8 @@ VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
 
     unsigned int NTypes = this->m_mesh_data->getMeshTriangleData()->getNTypes();
 
-    if(this->m_ignore_type) NTypes=1;
+    if (this->m_ignore_type)
+        NTypes = 1;
 
     // allocate and zero device memory
     GPUArray<Scalar2> params(NTypes, m_exec_conf);
@@ -91,10 +92,9 @@ void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
         access_location::device,
         access_mode::read);
 
-    ArrayHandle<unsigned int> d_pts(
-        this->m_mesh_data->getPerTypeSize(),
-        access_location::device,
-        access_mode::read);
+    ArrayHandle<unsigned int> d_pts(this->m_mesh_data->getPerTypeSize(),
+                                    access_location::device,
+                                    access_mode::read);
 
     ArrayHandle<Scalar4> d_force(m_force, access_location::device, access_mode::overwrite);
     ArrayHandle<Scalar> d_virial(m_virial, access_location::device, access_mode::overwrite);
@@ -108,7 +108,7 @@ void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
                                                 m_virial.getPitch(),
                                                 m_pdata->getN(),
                                                 d_pts.data,
-						this->m_mesh_data->getSize(),
+                                                this->m_mesh_data->getSize(),
                                                 d_pos.data,
                                                 d_image.data,
                                                 box,
@@ -163,7 +163,8 @@ void VolumeConservationMeshForceComputeGPU::computeVolume()
 
     unsigned int NTypes = m_mesh_data->getMeshTriangleData()->getNTypes();
 
-    if(this->m_ignore_type) NTypes = 1;
+    if (this->m_ignore_type)
+        NTypes = 1;
 
     kernel::gpu_compute_volume_constraint_volume(d_sumVol.data,
                                                  d_partial_sumVol.data,
@@ -175,7 +176,7 @@ void VolumeConservationMeshForceComputeGPU::computeVolume()
                                                  d_gpu_meshtrianglelist.data,
                                                  d_gpu_meshtriangle_pos_list.data,
                                                  gpu_table_indexer,
-						 this->m_ignore_type,
+                                                 this->m_ignore_type,
                                                  d_gpu_n_meshtriangle.data,
                                                  m_block_size,
                                                  m_num_blocks);
@@ -211,7 +212,8 @@ void export_VolumeConservationMeshForceComputeGPU(pybind11::module& m)
                      std::shared_ptr<VolumeConservationMeshForceComputeGPU>>(
         m,
         "VolumeConservationMeshForceComputeGPU")
-        .def(pybind11::init<std::shared_ptr<SystemDefinition>, std::shared_ptr<MeshDefinition>, bool>());
+        .def(pybind11::
+                 init<std::shared_ptr<SystemDefinition>, std::shared_ptr<MeshDefinition>, bool>());
     }
 
     } // end namespace detail
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
index 11f91edb93..08e7467212 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu
@@ -1,8 +1,8 @@
 // Copyright (c) 2009-2024 The Regents of the University of Michigan.
 // Part of HOOMD-blue, released under the BSD 3-Clause License.
 
-#include "hip/hip_runtime.h"
 #include "VolumeConservationMeshForceComputeGPU.cuh"
+#include "hip/hip_runtime.h"
 #include "hoomd/TextureTools.h"
 #include "hoomd/VectorMath.h"
 
@@ -44,7 +44,7 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
                                                             const group_storage<3>* tlist,
                                                             const unsigned int* tpos_list,
                                                             const Index2D tlist_idx,
-                                             		    const bool ignore_type,
+                                                            const bool ignore_type,
                                                             const unsigned int* n_triangles_list)
     {
     HIP_DYNAMIC_SHARED(char, s_data)
@@ -66,9 +66,11 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
             group_storage<3> cur_triangle = tlist[tlist_idx(idx, triangle_idx)];
             int cur_triangle_type = cur_triangle.idx[2];
 
-            if(ignore_type) cur_triangle_type = 0;
+            if (ignore_type)
+                cur_triangle_type = 0;
 
-	    if(cur_triangle_type != cN) continue;
+            if (cur_triangle_type != cN)
+                continue;
 
             int cur_triangle_b = cur_triangle.idx[0];
             int cur_triangle_c = cur_triangle.idx[1];
@@ -105,26 +107,26 @@ __global__ void gpu_compute_volume_constraint_volume_kernel(Scalar* d_partial_su
             }
         }
 
-    volume_sdata[threadIdx.x] = volume_transfer;//[i_types];
-     
+    volume_sdata[threadIdx.x] = volume_transfer; //[i_types];
+
     __syncthreads();
-     
+
     // reduce the sum in parallel
     int offs = blockDim.x >> 1;
     while (offs > 0)
         {
         if (threadIdx.x < offs)
-    	{
-    	volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
-    	}
+            {
+            volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs];
+            }
         offs >>= 1;
         __syncthreads();
         }
-    
+
     // write out our partial sum
     if (threadIdx.x == 0)
         {
-        //d_partial_sum_volume[blockIdx.x * tN + i_types] = volume_sdata[0];
+        // d_partial_sum_volume[blockIdx.x * tN + i_types] = volume_sdata[0];
         d_partial_sum_volume[blockIdx.x * tN + cN] = volume_sdata[0];
         }
     }
@@ -220,7 +222,7 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                            d_sum_partial_volume,
                            N,
                            tN,
-			   i_types,
+                           i_types,
                            d_pos,
                            d_image,
                            box,
@@ -266,7 +268,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
                                                            const size_t virial_pitch,
                                                            const unsigned int N,
                                                            const unsigned int* gN,
-                                               		   const unsigned int aN,
+                                                           const unsigned int aN,
                                                            const Scalar4* d_pos,
                                                            const int3* d_image,
                                                            BoxDim box,
@@ -300,7 +302,7 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
     for (int i = 0; i < 6; i++)
         virial[i] = Scalar(0.0);
 
-    unsigned int triN = 1*aN;
+    unsigned int triN = 1 * aN;
 
     // loop over all triangles
     for (int triangle_idx = 0; triangle_idx < n_triangles; triangle_idx++)
@@ -311,8 +313,10 @@ __global__ void gpu_compute_volume_constraint_force_kernel(Scalar4* d_force,
         int cur_triangle_c = cur_triangle.idx[1];
         int cur_triangle_type = cur_triangle.idx[2];
 
-	if(ignore_type) cur_triangle_type = 0;
-	else triN = gN[cur_triangle_type];
+        if (ignore_type)
+            cur_triangle_type = 0;
+        else
+            triN = gN[cur_triangle_type];
 
         // get the angle parameters (MEM TRANSFER: 8 bytes)
         Scalar2 params = __ldg(d_params + cur_triangle_type);
@@ -439,7 +443,7 @@ hipError_t gpu_compute_volume_constraint_force(Scalar4* d_force,
                        virial_pitch,
                        N,
                        gN,
-		       aN,
+                       aN,
                        d_pos,
                        d_image,
                        box,
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
index 7065549509..36088c1f7d 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cuh
@@ -32,7 +32,7 @@ hipError_t gpu_compute_volume_constraint_volume(Scalar* d_sum_volume,
                                                 const group_storage<3>* tlist,
                                                 const unsigned int* tpos_list,
                                                 const Index2D tlist_idx,
-						const bool ignore_type,
+                                                const bool ignore_type,
                                                 const unsigned int* n_triangles_list,
                                                 unsigned int block_size,
                                                 unsigned int num_blocks);
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.h b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
index 39662a6a5b..564a635f6c 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.h
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
@@ -35,7 +35,7 @@ class PYBIND11_EXPORT VolumeConservationMeshForceComputeGPU
     //! Constructs the compute
     VolumeConservationMeshForceComputeGPU(std::shared_ptr<SystemDefinition> sysdef,
                                           std::shared_ptr<MeshDefinition> meshdef,
-					  bool ignore_type);
+                                          bool ignore_type);
 
     virtual pybind11::array_t<Scalar> getVolume()
         {
@@ -45,21 +45,20 @@ class PYBIND11_EXPORT VolumeConservationMeshForceComputeGPU
 
     protected:
     unsigned int m_block_size; //!< block size for partial sum memory
-    unsigned int m_num_blocks;       //!< number of memory blocks reserved for partial sum memory
+    unsigned int m_num_blocks; //!< number of memory blocks reserved for partial sum memory
 
     std::shared_ptr<Autotuner<1>> m_tuner; //!< Autotuner for block size
 
     GPUArray<Scalar> m_partial_sum; //!< memory space for partial sum over volume
-    GPUArray<Scalar> m_sum;          //!< memory space for sum over volume
+    GPUArray<Scalar> m_sum;         //!< memory space for sum over volume
 
-    GPUArray<Scalar> m_volume_GPU;          //!< memory space for sum over volume
+    GPUArray<Scalar> m_volume_GPU; //!< memory space for sum over volume
 
     //! Actually compute the forces
     virtual void computeForces(uint64_t timestep);
 
     //! compute volumes
     virtual void computeVolume();
-
     };
 
 namespace detail
diff --git a/hoomd/md/mesh/conservation.py b/hoomd/md/mesh/conservation.py
index 604fe92bcd..c24b7d656d 100644
--- a/hoomd/md/mesh/conservation.py
+++ b/hoomd/md/mesh/conservation.py
@@ -4,7 +4,7 @@
 """Mesh Conservation potential.
 
 Mesh conservation force classes apply a force and virial to every mesh vertex
-particle based on a global or local quantity :math:`A` of the given mesh 
+particle based on a global or local quantity :math:`A` of the given mesh
 triangulation :math:`T`.
 
 .. math::
@@ -17,7 +17,6 @@
 
 """
 
-
 from hoomd.md.mesh.potential import MeshConvervationPotential
 from hoomd.data.typeparam import TypeParameter
 from hoomd.data.parameterdicts import TypeParameterDict
@@ -60,7 +59,7 @@ def __init__(self, mesh, ignore_type=False):
                                TypeParameterDict(k=float, V0=float, len_keys=1))
         self._add_typeparam(params)
 
-        super().__init__(mesh,ignore_type)
+        super().__init__(mesh, ignore_type)
 
     @log(requires_run=True)
     def volume(self):
diff --git a/hoomd/md/mesh/potential.py b/hoomd/md/mesh/potential.py
index 17f7b26f53..2d6e4adc17 100644
--- a/hoomd/md/mesh/potential.py
+++ b/hoomd/md/mesh/potential.py
@@ -71,10 +71,11 @@ def mesh(self, value):
         mesh = validate_mesh(value)
         self._mesh = mesh
 
+
 class MeshConvervationPotential(MeshPotential):
     """Constructs the bond potential applied to a mesh.
 
-    `MeshConvervationPotential` is the base class for global conservation 
+    `MeshConvervationPotential` is the base class for global conservation
     potentials applied to meshes.
 
     Warning:
@@ -82,7 +83,7 @@ class MeshConvervationPotential(MeshPotential):
         for `isinstance` or `issubclass` checks.
     """
 
-    def __init__(self, mesh,ignore_type):
+    def __init__(self, mesh, ignore_type):
         super().__init__(mesh)
         self._ignore_type = ignore_type
 
diff --git a/hoomd/md/pytest/test_meshpotential.py b/hoomd/md/pytest/test_meshpotential.py
index eadcceab4b..66e2be37af 100644
--- a/hoomd/md/pytest/test_meshpotential.py
+++ b/hoomd/md/pytest/test_meshpotential.py
@@ -42,6 +42,7 @@
                               dict(zip(_BendingRigidity_args, val)))
                              for val in zip(*_BendingRigidity_args.values())]
 
+
 def get_mesh_potential_and_args():
     return (_harmonic_arg_list + _FENE_arg_list + _Tether_arg_list
             + _Volume_arg_list + _BendingRigidity_arg_list)
@@ -295,10 +296,10 @@ def test_volume(simulation_factory, tetrahedron_snapshot_factory):
 
     sim.run(0)
 
-    np.testing.assert_allclose(mesh_potential.volume,
-                        [0.08042,0.026807],
-                        rtol=1e-2,
-                        atol=1e-5)
+    np.testing.assert_allclose(mesh_potential.volume, [0.08042, 0.026807],
+                               rtol=1e-2,
+                               atol=1e-5)
+
 
 def test_volume_ignore_type(simulation_factory, tetrahedron_snapshot_factory):
     snap = tetrahedron_snapshot_factory(d=0.969, L=5)
@@ -310,7 +311,7 @@ def test_volume_ignore_type(simulation_factory, tetrahedron_snapshot_factory):
     triangles = [[2, 1, 0], [0, 1, 3], [2, 0, 3], [1, 2, 3]]
     mesh.triangulation = dict(type_ids=type_ids, triangles=triangles)
 
-    mesh_potential = hoomd.md.mesh.conservation.Volume(mesh,ignore_type=True)
+    mesh_potential = hoomd.md.mesh.conservation.Volume(mesh, ignore_type=True)
     mesh_potential.params.default = dict(k=1, V0=1)
 
     integrator = hoomd.md.Integrator(dt=0.005)
@@ -325,10 +326,9 @@ def test_volume_ignore_type(simulation_factory, tetrahedron_snapshot_factory):
 
     sim.run(0)
 
-    np.testing.assert_allclose(mesh_potential.volume,
-                        [0.107227, 0.0],
-                        rtol=1e-2,
-                        atol=1e-5)
+    np.testing.assert_allclose(mesh_potential.volume, [0.107227, 0.0],
+                               rtol=1e-2,
+                               atol=1e-5)
 
 
 def test_auto_detach_simulation(simulation_factory,
diff --git a/sphinx-doc/module-md-mesh-conservation.rst b/sphinx-doc/module-md-mesh-conservation.rst
index d10468589e..97ed6d8c79 100644
--- a/sphinx-doc/module-md-mesh-conservation.rst
+++ b/sphinx-doc/module-md-mesh-conservation.rst
@@ -1,4 +1,4 @@
-.. Copyright (c) 2009-2022 The Regents of the University of Michigan.
+.. Copyright (c) 2009-2024 The Regents of the University of Michigan.
 .. Part of HOOMD-blue, released under the BSD 3-Clause License.
 
 md.mesh.conservation

From 788d2282a0df739e00dd632542c59d9a54db3e97 Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Wed, 9 Oct 2024 00:10:51 -0400
Subject: [PATCH 43/50] fix docstrings

---
 hoomd/md/mesh/conservation.py         | 2 +-
 hoomd/md/pytest/test_meshpotential.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/hoomd/md/mesh/conservation.py b/hoomd/md/mesh/conservation.py
index c24b7d656d..1883e7112a 100644
--- a/hoomd/md/mesh/conservation.py
+++ b/hoomd/md/mesh/conservation.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2009-2024 The Regents of the University of Michigan.
 # Part of HOOMD-blue, released under the BSD 3-Clause License.
 
-"""Mesh Conservation potential.
+r"""Mesh Conservation potential.
 
 Mesh conservation force classes apply a force and virial to every mesh vertex
 particle based on a global or local quantity :math:`A` of the given mesh
diff --git a/hoomd/md/pytest/test_meshpotential.py b/hoomd/md/pytest/test_meshpotential.py
index 66e2be37af..3318a7a17a 100644
--- a/hoomd/md/pytest/test_meshpotential.py
+++ b/hoomd/md/pytest/test_meshpotential.py
@@ -3,7 +3,6 @@
 
 import hoomd
 import pytest
-import math
 import numpy as np
 
 _harmonic_args = {'k': [30.0, 25.0, 20.0], 'r0': [1.6, 1.7, 1.8]}

From 1374506a77ebe154492feffe7c941edfc031123c Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Wed, 16 Oct 2024 12:37:32 -0400
Subject: [PATCH 44/50] add documentation for ignore_type

---
 hoomd/md/mesh/conservation.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hoomd/md/mesh/conservation.py b/hoomd/md/mesh/conservation.py
index 1883e7112a..d7693ff2c7 100644
--- a/hoomd/md/mesh/conservation.py
+++ b/hoomd/md/mesh/conservation.py
@@ -35,6 +35,9 @@ class Volume(MeshConvervationPotential):
 
     Args:
         mesh (:py:mod:`hoomd.mesh.Mesh`): Mesh data structure constraint.
+        ignore_type (`bool`, optional): Flag to detemine if the volume 
+            constraint is applied to all mesh triangles (``True``) or 
+            per mesh type (``False``). Defaults to ``False``.
 
     Attributes:
         parameter (TypeParameter[dict]):

From 2d3ee45adc0e249809ba08a38d4a14e98c2700b7 Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Wed, 16 Oct 2024 12:49:41 -0400
Subject: [PATCH 45/50] combine m_volume and m_volume_GPU

---
 hoomd/md/VolumeConservationMeshForceCompute.cc    | 12 ++++++------
 hoomd/md/VolumeConservationMeshForceCompute.h     |  5 +++--
 hoomd/md/VolumeConservationMeshForceComputeGPU.cc |  8 ++------
 hoomd/md/VolumeConservationMeshForceComputeGPU.h  |  8 --------
 4 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/hoomd/md/VolumeConservationMeshForceCompute.cc b/hoomd/md/VolumeConservationMeshForceCompute.cc
index aa3a7ff5d4..7b9f2594da 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.cc
+++ b/hoomd/md/VolumeConservationMeshForceCompute.cc
@@ -37,15 +37,13 @@ VolumeConservationMeshForceCompute::VolumeConservationMeshForceCompute(
     GPUArray<Scalar2> params(n_types, m_exec_conf);
     m_params.swap(params);
 
-    m_volume = new Scalar[n_types];
+    GPUArray<Scalar> volume(n_types, m_exec_conf);
+    m_volume.swap(volume);
     }
 
 VolumeConservationMeshForceCompute::~VolumeConservationMeshForceCompute()
     {
     m_exec_conf->msg->notice(5) << "Destroying VolumeConservationMeshForceCompute" << endl;
-
-    delete[] m_volume;
-    m_volume = NULL;
     }
 
 /*! \param type Type of the angle to set parameters for
@@ -107,6 +105,7 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
     ArrayHandle<Scalar> h_virial(m_virial, access_location::host, access_mode::overwrite);
     size_t virial_pitch = m_virial.getPitch();
     ArrayHandle<Scalar2> h_params(m_params, access_location::host, access_mode::read);
+    ArrayHandle<Scalar> h_volume(m_volume, access_location::host, access_mode::read);
 
     ArrayHandle<typename Angle::members_t> h_triangles(
         m_mesh_data->getMeshTriangleData()->getMembersArray(),
@@ -180,7 +179,7 @@ void VolumeConservationMeshForceCompute::computeForces(uint64_t timestep)
         else
             triN = h_pts.data[triangle_type];
 
-        Scalar VolDiff = m_volume[triangle_type] - h_params.data[triangle_type].y;
+        Scalar VolDiff = h_volume.data[triangle_type] - h_params.data[triangle_type].y;
 
         Scalar energy = h_params.data[triangle_type].x * VolDiff * VolDiff
                         / (6 * h_params.data[triangle_type].y * triN);
@@ -349,8 +348,9 @@ void VolumeConservationMeshForceCompute::computeVolume()
         }
 #endif
 
+    ArrayHandle<Scalar> h_volume(m_volume, access_location::host, access_mode::overwrite);
     for (unsigned int i = 0; i < n_types; i++)
-        m_volume[i] = global_volume[i];
+        h_volume.data[i] = global_volume[i];
     }
 
 namespace detail
diff --git a/hoomd/md/VolumeConservationMeshForceCompute.h b/hoomd/md/VolumeConservationMeshForceCompute.h
index b082ab2c31..64e0b7f090 100644
--- a/hoomd/md/VolumeConservationMeshForceCompute.h
+++ b/hoomd/md/VolumeConservationMeshForceCompute.h
@@ -77,7 +77,8 @@ class PYBIND11_EXPORT VolumeConservationMeshForceCompute : public ForceCompute
 
     virtual pybind11::array_t<Scalar> getVolume()
         {
-        return pybind11::array(m_mesh_data->getMeshTriangleData()->getNTypes(), m_volume);
+        ArrayHandle<Scalar> h_volume(m_volume, access_location::host, access_mode::read);
+        return pybind11::array(m_mesh_data->getMeshTriangleData()->getNTypes(), h_volume.data);
         }
 
 #ifdef ENABLE_MPI
@@ -98,7 +99,7 @@ class PYBIND11_EXPORT VolumeConservationMeshForceCompute : public ForceCompute
 
     std::shared_ptr<MeshDefinition> m_mesh_data; //!< Mesh data to use in computing volume energy
 
-    Scalar* m_volume; //! sum of the triangle areas within the mesh
+    GPUArray<Scalar> m_volume; //!< memory space for volume
 
     bool m_ignore_type; //! do we ignore type to calculate global area
 
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
index 46bd4a79da..8c95336fd7 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cc
@@ -41,10 +41,6 @@ VolumeConservationMeshForceComputeGPU::VolumeConservationMeshForceComputeGPU(
     GPUArray<Scalar2> params(NTypes, m_exec_conf);
     m_params.swap(params);
 
-    // allocate and zero device memory
-    GPUArray<Scalar> volume_GPU(NTypes, m_exec_conf);
-    m_volume_GPU.swap(volume_GPU);
-
     GPUArray<Scalar> sum(NTypes, m_exec_conf);
     m_sum.swap(sum);
 
@@ -100,7 +96,7 @@ void VolumeConservationMeshForceComputeGPU::computeForces(uint64_t timestep)
     ArrayHandle<Scalar> d_virial(m_virial, access_location::device, access_mode::overwrite);
     ArrayHandle<Scalar2> d_params(m_params, access_location::device, access_mode::read);
 
-    ArrayHandle<Scalar> d_volume(m_volume_GPU, access_location::device, access_mode::read);
+    ArrayHandle<Scalar> d_volume(m_volume, access_location::device, access_mode::read);
 
     m_tuner->begin();
     kernel::gpu_compute_volume_constraint_force(d_force.data,
@@ -187,7 +183,7 @@ void VolumeConservationMeshForceComputeGPU::computeVolume()
         }
 
     ArrayHandle<Scalar> h_sumVol(m_sum, access_location::host, access_mode::read);
-    ArrayHandle<Scalar> h_volume(m_volume_GPU, access_location::host, access_mode::overwrite);
+    ArrayHandle<Scalar> h_volume(m_volume, access_location::host, access_mode::overwrite);
 #ifdef ENABLE_MPI
     if (m_sysdef->isDomainDecomposed())
         {
diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.h b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
index 564a635f6c..852b34152a 100644
--- a/hoomd/md/VolumeConservationMeshForceComputeGPU.h
+++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.h
@@ -37,12 +37,6 @@ class PYBIND11_EXPORT VolumeConservationMeshForceComputeGPU
                                           std::shared_ptr<MeshDefinition> meshdef,
                                           bool ignore_type);
 
-    virtual pybind11::array_t<Scalar> getVolume()
-        {
-        ArrayHandle<Scalar> h_volume(m_volume_GPU, access_location::host, access_mode::read);
-        return pybind11::array(m_mesh_data->getMeshTriangleData()->getNTypes(), h_volume.data);
-        }
-
     protected:
     unsigned int m_block_size; //!< block size for partial sum memory
     unsigned int m_num_blocks; //!< number of memory blocks reserved for partial sum memory
@@ -52,8 +46,6 @@ class PYBIND11_EXPORT VolumeConservationMeshForceComputeGPU
     GPUArray<Scalar> m_partial_sum; //!< memory space for partial sum over volume
     GPUArray<Scalar> m_sum;         //!< memory space for sum over volume
 
-    GPUArray<Scalar> m_volume_GPU; //!< memory space for sum over volume
-
     //! Actually compute the forces
     virtual void computeForces(uint64_t timestep);
 

From 9dbc29c7bef2fb950a38cbffe112e86b9b726f95 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 16 Oct 2024 16:54:58 +0000
Subject: [PATCH 46/50] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 hoomd/md/mesh/conservation.py         | 4 ++--
 hoomd/md/pytest/test_meshpotential.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/hoomd/md/mesh/conservation.py b/hoomd/md/mesh/conservation.py
index d7693ff2c7..3e185987af 100644
--- a/hoomd/md/mesh/conservation.py
+++ b/hoomd/md/mesh/conservation.py
@@ -35,8 +35,8 @@ class Volume(MeshConvervationPotential):
 
     Args:
         mesh (:py:mod:`hoomd.mesh.Mesh`): Mesh data structure constraint.
-        ignore_type (`bool`, optional): Flag to detemine if the volume 
-            constraint is applied to all mesh triangles (``True``) or 
+        ignore_type (`bool`, optional): Flag to detemine if the volume
+            constraint is applied to all mesh triangles (``True``) or
             per mesh type (``False``). Defaults to ``False``.
 
     Attributes:
diff --git a/hoomd/md/pytest/test_meshpotential.py b/hoomd/md/pytest/test_meshpotential.py
index b4bb6f64a5..1679225e51 100644
--- a/hoomd/md/pytest/test_meshpotential.py
+++ b/hoomd/md/pytest/test_meshpotential.py
@@ -52,8 +52,8 @@
 
 def get_mesh_potential_and_args():
     return (_harmonic_arg_list + _FENE_arg_list + _Tether_arg_list
-            + _BendingRigidity_arg_list + _Helfrich_arg_list
-            + _Volume_arg_list)
+            + _BendingRigidity_arg_list + _Helfrich_arg_list + _Volume_arg_list)
+
 
 def get_mesh_potential_args_forces_and_energies():
     harmonic_forces = [[[37.86, 0., -26.771063], [-37.86, 0., -26.771063],

From 6e9f421821a7538c86d3ad5949710ced8eadca31 Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Wed, 16 Oct 2024 12:59:35 -0400
Subject: [PATCH 47/50] rephrase docs

---
 hoomd/md/mesh/conservation.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/hoomd/md/mesh/conservation.py b/hoomd/md/mesh/conservation.py
index 3e185987af..ba48f9d11b 100644
--- a/hoomd/md/mesh/conservation.py
+++ b/hoomd/md/mesh/conservation.py
@@ -35,9 +35,10 @@ class Volume(MeshConvervationPotential):
 
     Args:
         mesh (:py:mod:`hoomd.mesh.Mesh`): Mesh data structure constraint.
-        ignore_type (`bool`, optional): Flag to detemine if the volume
-            constraint is applied to all mesh triangles (``True``) or
-            per mesh type (``False``). Defaults to ``False``.
+        ignore_type (`bool`, optional): Flag to detemine if a single volume
+            constraint is applied to all mesh triangles regardless of mesh 
+            type (``True``) or a volume constraints per mesh type 
+            (``False``). Defaults to ``False``.
 
     Attributes:
         parameter (TypeParameter[dict]):

From add9a710778a0dd1e9cfbb309b8df1e2b38f77d8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 16 Oct 2024 17:01:47 +0000
Subject: [PATCH 48/50] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 hoomd/md/mesh/conservation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hoomd/md/mesh/conservation.py b/hoomd/md/mesh/conservation.py
index ba48f9d11b..e743dc0ac5 100644
--- a/hoomd/md/mesh/conservation.py
+++ b/hoomd/md/mesh/conservation.py
@@ -36,8 +36,8 @@ class Volume(MeshConvervationPotential):
     Args:
         mesh (:py:mod:`hoomd.mesh.Mesh`): Mesh data structure constraint.
         ignore_type (`bool`, optional): Flag to detemine if a single volume
-            constraint is applied to all mesh triangles regardless of mesh 
-            type (``True``) or a volume constraints per mesh type 
+            constraint is applied to all mesh triangles regardless of mesh
+            type (``True``) or a volume constraints per mesh type
             (``False``). Defaults to ``False``.
 
     Attributes:

From e3c0c72e6ced174cdce75757b27173e0f57ae16f Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Wed, 16 Oct 2024 13:03:13 -0400
Subject: [PATCH 49/50] fix typo

---
 hoomd/md/mesh/conservation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hoomd/md/mesh/conservation.py b/hoomd/md/mesh/conservation.py
index ba48f9d11b..e099f93fd8 100644
--- a/hoomd/md/mesh/conservation.py
+++ b/hoomd/md/mesh/conservation.py
@@ -37,7 +37,7 @@ class Volume(MeshConvervationPotential):
         mesh (:py:mod:`hoomd.mesh.Mesh`): Mesh data structure constraint.
         ignore_type (`bool`, optional): Flag to detemine if a single volume
             constraint is applied to all mesh triangles regardless of mesh 
-            type (``True``) or a volume constraints per mesh type 
+            type (``True``) or a volume constraint per mesh type 
             (``False``). Defaults to ``False``.
 
     Attributes:

From 35ffca679d84f6198716b4b29509eeb6f503c37e Mon Sep 17 00:00:00 2001
From: SchoeniPhlippsn <pschoenh@cheme-hodges.engin.umich.edu>
Date: Wed, 16 Oct 2024 13:49:46 -0400
Subject: [PATCH 50/50] final rephrasing

---
 hoomd/md/mesh/conservation.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/hoomd/md/mesh/conservation.py b/hoomd/md/mesh/conservation.py
index 925d477911..fbff1cfc00 100644
--- a/hoomd/md/mesh/conservation.py
+++ b/hoomd/md/mesh/conservation.py
@@ -35,10 +35,9 @@ class Volume(MeshConvervationPotential):
 
     Args:
         mesh (:py:mod:`hoomd.mesh.Mesh`): Mesh data structure constraint.
-        ignore_type (`bool`, optional): Flag to detemine if a single volume
-            constraint is applied to all mesh triangles regardless of mesh
-            type (``True``) or a volume constraint per mesh type
-            (``False``). Defaults to ``False``.
+        ignore_type (`bool`, *optional*): ignores mesh type if set to `True`
+            and calculates the conservation energy considering all triangles in
+            the mesh. Defaults to `False`.
 
     Attributes:
         parameter (TypeParameter[dict]):