From 8696247f9d6baa1a0feb9103640bd41c8a35379e Mon Sep 17 00:00:00 2001 From: Kyle Bayes Date: Sat, 18 Mar 2023 13:03:11 -0700 Subject: [PATCH] Add mj_stackAllocInt to get correct size for allocating ints on mjData stack. Reducing stack memory usage by 10% - 15%. PiperOrigin-RevId: 517672195 Change-Id: Ie14e6c9c99e8b9e1c5d3686b77631f2df0651c67 --- doc/APIreference/functions.rst | 11 +++++++- doc/changelog.rst | 2 ++ doc/includes/references.h | 1 + include/mujoco/mujoco.h | 5 +++- introspect/functions.py | 22 +++++++++++++++- src/engine/engine_collision_driver.c | 5 ++-- src/engine/engine_core_constraint.c | 26 +++++++++---------- src/engine/engine_core_smooth.c | 6 ++--- src/engine/engine_derivative.c | 6 ++--- src/engine/engine_derivative_fd.c | 2 +- src/engine/engine_io.c | 17 ++++++++++-- src/engine/engine_io.h | 5 +++- src/engine/engine_solver.c | 12 ++++----- src/engine/engine_support.c | 2 +- src/engine/engine_util_sparse.c | 6 ++--- .../engine_util_sparse_benchmark_test.cc | 20 +++++++------- unity/Runtime/Bindings/MjBindings.cs | 3 +++ 17 files changed, 103 insertions(+), 48 deletions(-) diff --git a/doc/APIreference/functions.rst b/doc/APIreference/functions.rst index de3e67cf40..3585e9612e 100644 --- a/doc/APIreference/functions.rst +++ b/doc/APIreference/functions.rst @@ -1187,7 +1187,16 @@ mj_stackAlloc .. mujoco-include:: mj_stackAlloc -Allocate array of specified size on :ref:`mjData` stack. Call mju_error on stack overflow. +Allocate array of mjtNums on :ref:`mjData` stack. Call mju_error on stack overflow. + +.. _mj_stackAllocInt: + +mj_stackAllocInt +~~~~~~~~~~~~~~~~ + +.. mujoco-include:: mj_stackAllocInt + +Allocate array of ints on :ref:`mjData` stack. Call mju_error on stack overflow. .. _mj_deleteData: diff --git a/doc/changelog.rst b/doc/changelog.rst index 1be00a5a14..09f40bccce 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -48,6 +48,8 @@ General - Modified :ref:`mju_error` and :ref:`mju_warning` to be variadic functions (support for printf-like arguments). The functions :ref:`mju_error_i`, :ref:`mju_error_s`, :ref:`mju_warning_i`, and :ref:`mju_warning_s` are now deprecated. - Implemented a performant ``mju_sqrMatTDSparse`` function that doesn't require dense memory allocation. +- Added ``mj_stackAllocInt`` to get correct size for allocating ints on mjData stack. Reducing stack memory usage + by 10% - 15%. Python bindings diff --git a/doc/includes/references.h b/doc/includes/references.h index 6c7c5c7e34..553560ec5d 100644 --- a/doc/includes/references.h +++ b/doc/includes/references.h @@ -1873,6 +1873,7 @@ void mj_resetData(const mjModel* m, mjData* d); void mj_resetDataDebug(const mjModel* m, mjData* d, unsigned char debug_value); void mj_resetDataKeyframe(const mjModel* m, mjData* d, int key); mjtNum* mj_stackAlloc(mjData* d, int size); +int* mj_stackAllocInt(mjData* d, int size); void mj_deleteData(mjData* d); void mj_resetCallbacks(void); void mj_setConst(mjModel* m, mjData* d); diff --git a/include/mujoco/mujoco.h b/include/mujoco/mujoco.h index c1d18ec281..587cf7b9db 100644 --- a/include/mujoco/mujoco.h +++ b/include/mujoco/mujoco.h @@ -201,9 +201,12 @@ MJAPI void mj_resetDataDebug(const mjModel* m, mjData* d, unsigned char debug_va // Reset data, set fields from specified keyframe. MJAPI void mj_resetDataKeyframe(const mjModel* m, mjData* d, int key); -// Allocate array of specified size on mjData stack. Call mju_error on stack overflow. +// Allocate array of mjtNums on mjData stack. Call mju_error on stack overflow. MJAPI mjtNum* mj_stackAlloc(mjData* d, int size); +// Allocate array of ints on mjData stack. Call mju_error on stack overflow. +MJAPI int* mj_stackAllocInt(mjData* d, int size); + // Free memory allocation in mjData. MJAPI void mj_deleteData(mjData* d); diff --git a/introspect/functions.py b/introspect/functions.py index 764cc07ef5..b60d81cbaa 100644 --- a/introspect/functions.py +++ b/introspect/functions.py @@ -695,7 +695,27 @@ type=ValueType(name='int'), ), ), - doc='Allocate array of specified size on mjData stack. Call mju_error on stack overflow.', # pylint: disable=line-too-long + doc='Allocate array of mjtNums on mjData stack. Call mju_error on stack overflow.', # pylint: disable=line-too-long + )), + ('mj_stackAllocInt', + FunctionDecl( + name='mj_stackAllocInt', + return_type=PointerType( + inner_type=ValueType(name='int'), + ), + parameters=( + FunctionParameterDecl( + name='d', + type=PointerType( + inner_type=ValueType(name='mjData'), + ), + ), + FunctionParameterDecl( + name='size', + type=ValueType(name='int'), + ), + ), + doc='Allocate array of ints on mjData stack. Call mju_error on stack overflow.', # pylint: disable=line-too-long )), ('mj_deleteData', FunctionDecl( diff --git a/src/engine/engine_collision_driver.c b/src/engine/engine_collision_driver.c index 1b1e9c4e4a..4ec9bb7e0f 100644 --- a/src/engine/engine_collision_driver.c +++ b/src/engine/engine_collision_driver.c @@ -388,8 +388,9 @@ void mj_collision(const mjModel* m, mjData* d) { // dynamic only or merge; apply exclude else { // call broadphase collision detector - broadphasepair = (int*)mj_stackAlloc(d, (m->nbody*(m->nbody-1))/2); - nbodypair = mj_broadphase(m, d, broadphasepair, (m->nbody*(m->nbody-1))/2); + int npairs = (m->nbody*(m->nbody - 1))/2; + broadphasepair = mj_stackAllocInt(d, npairs); + nbodypair = mj_broadphase(m, d, broadphasepair, npairs); unsigned int last_signature = -1; // loop over body pairs (broadphase or all) diff --git a/src/engine/engine_core_constraint.c b/src/engine/engine_core_constraint.c index 60e0937d1e..c83b228c62 100644 --- a/src/engine/engine_core_constraint.c +++ b/src/engine/engine_core_constraint.c @@ -374,9 +374,9 @@ void mj_instantiateEquality(const mjModel* m, mjData* d) { jac[1] = mj_stackAlloc(d, 6*nv); jacdif = mj_stackAlloc(d, 6*nv); if (issparse) { - chain = (int*)mj_stackAlloc(d, nv); - chain2 = (int*)mj_stackAlloc(d, nv); - buf_ind = (int*)mj_stackAlloc(d, nv); + chain = mj_stackAllocInt(d, nv); + chain2 = mj_stackAllocInt(d, nv); + buf_ind = mj_stackAllocInt(d, nv); sparse_buf = mj_stackAlloc(d, nv); } @@ -775,7 +775,7 @@ void mj_instantiateContact(const mjModel* m, mjData* d) { jac1r = mj_stackAlloc(d, 3*NV); jac2r = mj_stackAlloc(d, 3*NV); if (issparse) { - chain = (int*)mj_stackAlloc(d, NV); + chain = mj_stackAllocInt(d, NV); } // find contacts to be included @@ -1321,8 +1321,8 @@ static inline int mj_ne(const mjModel* m, mjData* d, int* nnz) { mjMARKSTACK; if (nnz) { - chain = (int*)mj_stackAlloc(d, nv); - chain2 = (int*)mj_stackAlloc(d, nv); + chain = mj_stackAllocInt(d, nv); + chain2 = mj_stackAllocInt(d, nv); } // find active equality constraints @@ -1692,13 +1692,13 @@ void mj_projectConstraint(const mjModel* m, mjData* d) { // sparse if (mj_isSparse(m)) { // space for JM2 and JM2T indices - int* rownnz = (int*)mj_stackAlloc(d, nefc); - int* rowadr = (int*)mj_stackAlloc(d, nefc); - int* colind = (int*)mj_stackAlloc(d, nefc*nv); - int* rowsuper = (int*)mj_stackAlloc(d, nefc); - int* rownnzT = (int*)mj_stackAlloc(d, nv); - int* rowadrT = (int*)mj_stackAlloc(d, nv); - int* colindT = (int*)mj_stackAlloc(d, nv*nefc); + int* rownnz = mj_stackAllocInt(d, nefc); + int* rowadr = mj_stackAllocInt(d, nefc); + int* colind = mj_stackAllocInt(d, nefc*nv); + int* rowsuper = mj_stackAllocInt(d, nefc); + int* rownnzT = mj_stackAllocInt(d, nv); + int* rowadrT = mj_stackAllocInt(d, nv); + int* colindT = mj_stackAllocInt(d, nv*nefc); // construct JM2 = backsubM2(J')' by rows for (int r=0; rbody_inertia + 3*i; diff --git a/src/engine/engine_derivative_fd.c b/src/engine/engine_derivative_fd.c index b08863a6d9..cf8049adea 100644 --- a/src/engine/engine_derivative_fd.c +++ b/src/engine/engine_derivative_fd.c @@ -185,7 +185,7 @@ void mjd_passive_velFD(const mjModel* m, mjData* d, mjtNum eps) { mjMARKSTACK; mjtNum* qfrc_passive = mj_stackAlloc(d, nv); mjtNum* fd = mj_stackAlloc(d, nv); - int* cnt = (int*)mj_stackAlloc(d, nv); + int* cnt = mj_stackAllocInt(d, nv); // clear row counters memset(cnt, 0, nv*sizeof(int)); diff --git a/src/engine/engine_io.c b/src/engine/engine_io.c index 11450a5eb6..087d22b185 100644 --- a/src/engine/engine_io.c +++ b/src/engine/engine_io.c @@ -819,7 +819,7 @@ static void makeDSparse(const mjModel* m, mjData* d) { int* colind = d->D_colind; mjMARKSTACK; - int* remaining = (int*)mj_stackAlloc(d, nv); + int* remaining = mj_stackAllocInt(d, nv); // compute rownnz memset(rownnz, 0, nv * sizeof(int)); @@ -912,7 +912,7 @@ static void makeBSparse(const mjModel* m, mjData* d) { // allocate and clear incremental row counts mjMARKSTACK; - int* cnt = (int*)mj_stackAlloc(d, nbody); + int* cnt = mj_stackAllocInt(d, nbody); memset(cnt, 0, sizeof(int) * nbody); // add subtree dofs to colind @@ -1232,6 +1232,19 @@ mjtNum* mj_stackAlloc(mjData* d, int size) { +int* mj_stackAllocInt(mjData* d, int size) { + // optimize for mjtNum being twice the size of int + if (2*sizeof(int) == sizeof(mjtNum)) { + return (int*)mj_stackAlloc(d, (size + 1) >> 1); + } + + // arbitrary bytes sizes + int new_size = (sizeof(int)*size + sizeof(mjtNum) - 1) / sizeof(mjtNum); + return (int*)mj_stackAlloc(d, new_size); +} + + + // clear data, set defaults static void _resetData(const mjModel* m, mjData* d, unsigned char debug_value) { //------------------------------ save plugin state and data diff --git a/src/engine/engine_io.h b/src/engine/engine_io.h index 3fe66c0f2b..09603059c5 100644 --- a/src/engine/engine_io.h +++ b/src/engine/engine_io.h @@ -102,9 +102,12 @@ MJAPI void mj_resetDataKeyframe(const mjModel* m, mjData* d, int key); // mjData arena allocate void* mj_arenaAlloc(mjData* d, int bytes, int alignment); -// mjData stack allocate +// mjData stack allocate for array of mjtNums MJAPI mjtNum* mj_stackAlloc(mjData* d, int size); +// mjData stack allocate for array of ints +MJAPI int* mj_stackAllocInt(mjData* d, int size); + // de-allocate data MJAPI void mj_deleteData(mjData* d); diff --git a/src/engine/engine_solver.c b/src/engine/engine_solver.c index e29c729bd9..afa806b6b6 100644 --- a/src/engine/engine_solver.c +++ b/src/engine/engine_solver.c @@ -309,7 +309,7 @@ void mj_solPGS(const mjModel* m, mjData* d, int maxiter) { mjContact* con; mjMARKSTACK; mjtNum* ARinv = mj_stackAlloc(d, nefc); - int* oldstate = (int*)mj_stackAlloc(d, nefc); + int* oldstate = mj_stackAllocInt(d, nefc); // precompute inverse diagonal of AR ARdiaginv(m, d, ARinv, 0); @@ -511,7 +511,7 @@ void mj_solNoSlip(const mjModel* m, mjData* d, int maxiter) { mjContact* con; mjMARKSTACK; mjtNum* ARinv = mj_stackAlloc(d, nefc); - int* oldstate = (int*)mj_stackAlloc(d, nefc); + int* oldstate = mj_stackAllocInt(d, nefc); // precompute inverse diagonal of A ARdiaginv(m, d, ARinv, 1); @@ -768,9 +768,9 @@ static void CGallocate(const mjModel* m, mjData* d, if (flg_Newton) { ctx->H = mj_stackAlloc(d, nv*nv); ctx->Hcone = mj_stackAlloc(d, nv*nv); - ctx->rownnz = (int*)mj_stackAlloc(d, nv); - ctx->rowadr = (int*)mj_stackAlloc(d, nv); - ctx->colind = (int*)mj_stackAlloc(d, nv*nv); + ctx->rownnz = mj_stackAllocInt(d, nv); + ctx->rowadr = mj_stackAllocInt(d, nv); + ctx->colind = mj_stackAllocInt(d, nv*nv); } } @@ -1507,7 +1507,7 @@ static void mj_solCGNewton(const mjModel* m, mjData* d, int maxiter, int flg_New Mgradold = mj_stackAlloc(d, nv); Mgraddif = mj_stackAlloc(d, nv); } - int* oldstate = (int*)mj_stackAlloc(d, nefc); + int* oldstate = mj_stackAllocInt(d, nefc); // initialize matrix-vector products mj_mulM(m, d, ctx.Ma, d->qacc); diff --git a/src/engine/engine_support.c b/src/engine/engine_support.c index bcc8978115..1e5a75c4c0 100644 --- a/src/engine/engine_support.c +++ b/src/engine/engine_support.c @@ -984,7 +984,7 @@ void mj_copyM2DSparse(const mjModel* m, mjData* d, mjtNum* dst, const mjtNum* sr mjMARKSTACK; // init remaining - int* remaining = (int*)mj_stackAlloc(d, nv); + int* remaining = mj_stackAllocInt(d, nv); memcpy(remaining, d->D_rownnz, nv * sizeof(int)); // copy data diff --git a/src/engine/engine_util_sparse.c b/src/engine/engine_util_sparse.c index 3308069854..770f75c3be 100644 --- a/src/engine/engine_util_sparse.c +++ b/src/engine/engine_util_sparse.c @@ -418,8 +418,8 @@ void mju_sqrMatTDSparse(mjtNum* res, const mjtNum* mat, const mjtNum* matT, int* res_rownnz, int* res_rowadr, int* res_colind, const int* rownnz, const int* rowadr, const int* colind, const int* rowsuper, - const int* rownnzT, const int* rowadrT, - const int* colindT, const int* rowsuperT, + const int* rownnzT, const int* rowadrT, + const int* colindT, const int* rowsuperT, mjData* d) { // allocate space for accumulation buffer and matT mjMARKSTACK; @@ -434,7 +434,7 @@ void mju_sqrMatTDSparse(mjtNum* res, const mjtNum* mat, const mjtNum* matT, // these mark the currently set columns in the dense row buffer, // used for when creating the resulting sparse row - int* markers = (int*) mj_stackAlloc(d, nc); + int* markers = mj_stackAllocInt(d, nc); for (int i=0; inv*m->nv); - int* rownnz = (int*)mj_stackAlloc(d, m->nv); - int* rowadr = (int*)mj_stackAlloc(d, m->nv); - int* colind = (int*)mj_stackAlloc(d, m->nv*m->nv); + int* rownnz = mj_stackAllocInt(d, m->nv); + int* rowadr = mj_stackAllocInt(d, m->nv); + int* colind = mj_stackAllocInt(d, m->nv*m->nv); // compute D corresponding to quad states mjtNum* D = mj_stackAlloc(d, d->nefc); @@ -511,9 +511,9 @@ static void BM_transposeSparse(benchmark::State& state, TransposeFuncPtr func) { // need uncompressed layout mjtNum* res = mj_stackAlloc(d, m->nv * d->nefc); - int* res_rownnz = (int*)mj_stackAlloc(d, m->nv); - int* res_rowadr = (int*)mj_stackAlloc(d, m->nv); - int* res_colind = (int*)mj_stackAlloc(d, m->nv * d->nefc); + int* res_rownnz = mj_stackAllocInt(d, m->nv); + int* res_rowadr = mj_stackAllocInt(d, m->nv); + int* res_colind = mj_stackAllocInt(d, m->nv * d->nefc); // time benchmark for (auto s : state) { @@ -555,9 +555,9 @@ static void BM_sqrMatTDSparse(benchmark::State& state, SqrMatTDFuncPtr func) { // allocate mjMARKSTACK; mjtNum* H = mj_stackAlloc(d, m->nv * m->nv); - int* rownnz = (int*)mj_stackAlloc(d, m->nv); - int* rowadr = (int*)mj_stackAlloc(d, m->nv); - int* colind = (int*)mj_stackAlloc(d, m->nv * m->nv); + int* rownnz = mj_stackAllocInt(d, m->nv); + int* rowadr = mj_stackAllocInt(d, m->nv); + int* colind = mj_stackAllocInt(d, m->nv * m->nv); // compute D corresponding to quad states mjtNum* D = mj_stackAlloc(d, d->nefc); diff --git a/unity/Runtime/Bindings/MjBindings.cs b/unity/Runtime/Bindings/MjBindings.cs index 31894b5d61..e7666ec326 100644 --- a/unity/Runtime/Bindings/MjBindings.cs +++ b/unity/Runtime/Bindings/MjBindings.cs @@ -2833,6 +2833,9 @@ public unsafe struct mjvFigure_ { [DllImport("mujoco", CallingConvention = CallingConvention.Cdecl)] public static unsafe extern double* mj_stackAlloc(mjData_* d, int size); +[DllImport("mujoco", CallingConvention = CallingConvention.Cdecl)] +public static unsafe extern int* mj_stackAllocInt(mjData_* d, int size); + [DllImport("mujoco", CallingConvention = CallingConvention.Cdecl)] public static unsafe extern void mj_deleteData(mjData_* d);