Skip to content

Commit

Permalink
Add mj_stackAllocInt to get correct size for allocating ints on mjDat…
Browse files Browse the repository at this point in the history
…a stack. Reducing stack memory usage by 10% - 15%.

PiperOrigin-RevId: 517672195
Change-Id: Ie14e6c9c99e8b9e1c5d3686b77631f2df0651c67
  • Loading branch information
kbayes authored and copybara-github committed Mar 18, 2023
1 parent fae5e82 commit 8696247
Show file tree
Hide file tree
Showing 17 changed files with 103 additions and 48 deletions.
11 changes: 10 additions & 1 deletion doc/APIreference/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1187,7 +1187,16 @@ mj_stackAlloc

.. mujoco-include:: mj_stackAlloc

Allocate array of specified size on :ref:`mjData` stack. Call mju_error on stack overflow.
Allocate array of mjtNums on :ref:`mjData` stack. Call mju_error on stack overflow.

.. _mj_stackAllocInt:

mj_stackAllocInt
~~~~~~~~~~~~~~~~

.. mujoco-include:: mj_stackAllocInt

Allocate array of ints on :ref:`mjData` stack. Call mju_error on stack overflow.

.. _mj_deleteData:

Expand Down
2 changes: 2 additions & 0 deletions doc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ General
- Modified :ref:`mju_error` and :ref:`mju_warning` to be variadic functions (support for printf-like arguments). The
functions :ref:`mju_error_i`, :ref:`mju_error_s`, :ref:`mju_warning_i`, and :ref:`mju_warning_s` are now deprecated.
- Implemented a performant ``mju_sqrMatTDSparse`` function that doesn't require dense memory allocation.
- Added ``mj_stackAllocInt`` to get correct size for allocating ints on mjData stack. Reducing stack memory usage
by 10% - 15%.


Python bindings
Expand Down
1 change: 1 addition & 0 deletions doc/includes/references.h
Original file line number Diff line number Diff line change
Expand Up @@ -1873,6 +1873,7 @@ void mj_resetData(const mjModel* m, mjData* d);
void mj_resetDataDebug(const mjModel* m, mjData* d, unsigned char debug_value);
void mj_resetDataKeyframe(const mjModel* m, mjData* d, int key);
mjtNum* mj_stackAlloc(mjData* d, int size);
int* mj_stackAllocInt(mjData* d, int size);
void mj_deleteData(mjData* d);
void mj_resetCallbacks(void);
void mj_setConst(mjModel* m, mjData* d);
Expand Down
5 changes: 4 additions & 1 deletion include/mujoco/mujoco.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,12 @@ MJAPI void mj_resetDataDebug(const mjModel* m, mjData* d, unsigned char debug_va
// Reset data, set fields from specified keyframe.
MJAPI void mj_resetDataKeyframe(const mjModel* m, mjData* d, int key);

// Allocate array of specified size on mjData stack. Call mju_error on stack overflow.
// Allocate array of mjtNums on mjData stack. Call mju_error on stack overflow.
MJAPI mjtNum* mj_stackAlloc(mjData* d, int size);

// Allocate array of ints on mjData stack. Call mju_error on stack overflow.
MJAPI int* mj_stackAllocInt(mjData* d, int size);

// Free memory allocation in mjData.
MJAPI void mj_deleteData(mjData* d);

Expand Down
22 changes: 21 additions & 1 deletion introspect/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,27 @@
type=ValueType(name='int'),
),
),
doc='Allocate array of specified size on mjData stack. Call mju_error on stack overflow.', # pylint: disable=line-too-long
doc='Allocate array of mjtNums on mjData stack. Call mju_error on stack overflow.', # pylint: disable=line-too-long
)),
('mj_stackAllocInt',
FunctionDecl(
name='mj_stackAllocInt',
return_type=PointerType(
inner_type=ValueType(name='int'),
),
parameters=(
FunctionParameterDecl(
name='d',
type=PointerType(
inner_type=ValueType(name='mjData'),
),
),
FunctionParameterDecl(
name='size',
type=ValueType(name='int'),
),
),
doc='Allocate array of ints on mjData stack. Call mju_error on stack overflow.', # pylint: disable=line-too-long
)),
('mj_deleteData',
FunctionDecl(
Expand Down
5 changes: 3 additions & 2 deletions src/engine/engine_collision_driver.c
Original file line number Diff line number Diff line change
Expand Up @@ -388,8 +388,9 @@ void mj_collision(const mjModel* m, mjData* d) {
// dynamic only or merge; apply exclude
else {
// call broadphase collision detector
broadphasepair = (int*)mj_stackAlloc(d, (m->nbody*(m->nbody-1))/2);
nbodypair = mj_broadphase(m, d, broadphasepair, (m->nbody*(m->nbody-1))/2);
int npairs = (m->nbody*(m->nbody - 1))/2;
broadphasepair = mj_stackAllocInt(d, npairs);
nbodypair = mj_broadphase(m, d, broadphasepair, npairs);
unsigned int last_signature = -1;

// loop over body pairs (broadphase or all)
Expand Down
26 changes: 13 additions & 13 deletions src/engine/engine_core_constraint.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,9 +374,9 @@ void mj_instantiateEquality(const mjModel* m, mjData* d) {
jac[1] = mj_stackAlloc(d, 6*nv);
jacdif = mj_stackAlloc(d, 6*nv);
if (issparse) {
chain = (int*)mj_stackAlloc(d, nv);
chain2 = (int*)mj_stackAlloc(d, nv);
buf_ind = (int*)mj_stackAlloc(d, nv);
chain = mj_stackAllocInt(d, nv);
chain2 = mj_stackAllocInt(d, nv);
buf_ind = mj_stackAllocInt(d, nv);
sparse_buf = mj_stackAlloc(d, nv);
}

Expand Down Expand Up @@ -775,7 +775,7 @@ void mj_instantiateContact(const mjModel* m, mjData* d) {
jac1r = mj_stackAlloc(d, 3*NV);
jac2r = mj_stackAlloc(d, 3*NV);
if (issparse) {
chain = (int*)mj_stackAlloc(d, NV);
chain = mj_stackAllocInt(d, NV);
}

// find contacts to be included
Expand Down Expand Up @@ -1321,8 +1321,8 @@ static inline int mj_ne(const mjModel* m, mjData* d, int* nnz) {
mjMARKSTACK;

if (nnz) {
chain = (int*)mj_stackAlloc(d, nv);
chain2 = (int*)mj_stackAlloc(d, nv);
chain = mj_stackAllocInt(d, nv);
chain2 = mj_stackAllocInt(d, nv);
}

// find active equality constraints
Expand Down Expand Up @@ -1692,13 +1692,13 @@ void mj_projectConstraint(const mjModel* m, mjData* d) {
// sparse
if (mj_isSparse(m)) {
// space for JM2 and JM2T indices
int* rownnz = (int*)mj_stackAlloc(d, nefc);
int* rowadr = (int*)mj_stackAlloc(d, nefc);
int* colind = (int*)mj_stackAlloc(d, nefc*nv);
int* rowsuper = (int*)mj_stackAlloc(d, nefc);
int* rownnzT = (int*)mj_stackAlloc(d, nv);
int* rowadrT = (int*)mj_stackAlloc(d, nv);
int* colindT = (int*)mj_stackAlloc(d, nv*nefc);
int* rownnz = mj_stackAllocInt(d, nefc);
int* rowadr = mj_stackAllocInt(d, nefc);
int* colind = mj_stackAllocInt(d, nefc*nv);
int* rowsuper = mj_stackAllocInt(d, nefc);
int* rownnzT = mj_stackAllocInt(d, nv);
int* rowadrT = mj_stackAllocInt(d, nv);
int* colindT = mj_stackAllocInt(d, nv*nefc);

// construct JM2 = backsubM2(J')' by rows
for (int r=0; r<nefc; r++) {
Expand Down
6 changes: 3 additions & 3 deletions src/engine/engine_core_smooth.c
Original file line number Diff line number Diff line change
Expand Up @@ -399,8 +399,8 @@ void mj_tendon(const mjModel* m, mjData* d) {
jacdif = mj_stackAlloc(d, 3*nv);
tmp = mj_stackAlloc(d, nv);
if (issparse) {
chain = (int*)mj_stackAlloc(d, nv);
buf_ind = (int*)mj_stackAlloc(d, nv);
chain = mj_stackAllocInt(d, nv);
buf_ind = mj_stackAllocInt(d, nv);
sparse_buf = mj_stackAlloc(d, nv);
}

Expand Down Expand Up @@ -869,7 +869,7 @@ void mj_transmission(const mjModel* m, mjData* d) {
jacdifp = mj_stackAlloc(d, 3*nv);
jac1p = mj_stackAlloc(d, 3*nv);
jac2p = mj_stackAlloc(d, 3*nv);
chain = issparse ? (int*)mj_stackAlloc(d, nv) : NULL;
chain = issparse ? mj_stackAllocInt(d, nv) : NULL;
}

// clear efc_force and moment_exclude
Expand Down
6 changes: 3 additions & 3 deletions src/engine/engine_derivative.c
Original file line number Diff line number Diff line change
Expand Up @@ -1122,8 +1122,8 @@ void mjd_ellipsoidFluid(const mjModel* m, mjData* d, int bodyid) {
int rownnz[6], rowadr[6];
mjtNum* J = mj_stackAlloc(d, 6*nv);
mjtNum* tmp = mj_stackAlloc(d, 3*nv);
int* colind = (int*) mj_stackAlloc(d, 6*nv);
int* colind_compressed = (int*) mj_stackAlloc(d, 6*nv);
int* colind = mj_stackAllocInt(d, 6*nv);
int* colind_compressed = mj_stackAllocInt(d, 6*nv);

mjtNum lvel[6], wind[6], lwind[6];
mjtNum geom_interaction_coef, magnus_lift_coef, kutta_lift_coef;
Expand Down Expand Up @@ -1239,7 +1239,7 @@ void mjd_inertiaBoxFluid(const mjModel* m, mjData* d, int i)
int rownnz[6], rowadr[6];
mjtNum* J = mj_stackAlloc(d, 6*nv);
mjtNum* tmp = mj_stackAlloc(d, 3*nv);
int* colind = (int*) mj_stackAlloc(d, 6*nv);
int* colind = mj_stackAllocInt(d, 6*nv);

mjtNum lvel[6], wind[6], lwind[6], box[3], B;
mjtNum* inertia = m->body_inertia + 3*i;
Expand Down
2 changes: 1 addition & 1 deletion src/engine/engine_derivative_fd.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ void mjd_passive_velFD(const mjModel* m, mjData* d, mjtNum eps) {
mjMARKSTACK;
mjtNum* qfrc_passive = mj_stackAlloc(d, nv);
mjtNum* fd = mj_stackAlloc(d, nv);
int* cnt = (int*)mj_stackAlloc(d, nv);
int* cnt = mj_stackAllocInt(d, nv);

// clear row counters
memset(cnt, 0, nv*sizeof(int));
Expand Down
17 changes: 15 additions & 2 deletions src/engine/engine_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,7 @@ static void makeDSparse(const mjModel* m, mjData* d) {
int* colind = d->D_colind;

mjMARKSTACK;
int* remaining = (int*)mj_stackAlloc(d, nv);
int* remaining = mj_stackAllocInt(d, nv);

// compute rownnz
memset(rownnz, 0, nv * sizeof(int));
Expand Down Expand Up @@ -912,7 +912,7 @@ static void makeBSparse(const mjModel* m, mjData* d) {

// allocate and clear incremental row counts
mjMARKSTACK;
int* cnt = (int*)mj_stackAlloc(d, nbody);
int* cnt = mj_stackAllocInt(d, nbody);
memset(cnt, 0, sizeof(int) * nbody);

// add subtree dofs to colind
Expand Down Expand Up @@ -1232,6 +1232,19 @@ mjtNum* mj_stackAlloc(mjData* d, int size) {



int* mj_stackAllocInt(mjData* d, int size) {
// optimize for mjtNum being twice the size of int
if (2*sizeof(int) == sizeof(mjtNum)) {
return (int*)mj_stackAlloc(d, (size + 1) >> 1);
}

// arbitrary bytes sizes
int new_size = (sizeof(int)*size + sizeof(mjtNum) - 1) / sizeof(mjtNum);
return (int*)mj_stackAlloc(d, new_size);
}



// clear data, set defaults
static void _resetData(const mjModel* m, mjData* d, unsigned char debug_value) {
//------------------------------ save plugin state and data
Expand Down
5 changes: 4 additions & 1 deletion src/engine/engine_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,12 @@ MJAPI void mj_resetDataKeyframe(const mjModel* m, mjData* d, int key);
// mjData arena allocate
void* mj_arenaAlloc(mjData* d, int bytes, int alignment);

// mjData stack allocate
// mjData stack allocate for array of mjtNums
MJAPI mjtNum* mj_stackAlloc(mjData* d, int size);

// mjData stack allocate for array of ints
MJAPI int* mj_stackAllocInt(mjData* d, int size);

// de-allocate data
MJAPI void mj_deleteData(mjData* d);

Expand Down
12 changes: 6 additions & 6 deletions src/engine/engine_solver.c
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ void mj_solPGS(const mjModel* m, mjData* d, int maxiter) {
mjContact* con;
mjMARKSTACK;
mjtNum* ARinv = mj_stackAlloc(d, nefc);
int* oldstate = (int*)mj_stackAlloc(d, nefc);
int* oldstate = mj_stackAllocInt(d, nefc);

// precompute inverse diagonal of AR
ARdiaginv(m, d, ARinv, 0);
Expand Down Expand Up @@ -511,7 +511,7 @@ void mj_solNoSlip(const mjModel* m, mjData* d, int maxiter) {
mjContact* con;
mjMARKSTACK;
mjtNum* ARinv = mj_stackAlloc(d, nefc);
int* oldstate = (int*)mj_stackAlloc(d, nefc);
int* oldstate = mj_stackAllocInt(d, nefc);

// precompute inverse diagonal of A
ARdiaginv(m, d, ARinv, 1);
Expand Down Expand Up @@ -768,9 +768,9 @@ static void CGallocate(const mjModel* m, mjData* d,
if (flg_Newton) {
ctx->H = mj_stackAlloc(d, nv*nv);
ctx->Hcone = mj_stackAlloc(d, nv*nv);
ctx->rownnz = (int*)mj_stackAlloc(d, nv);
ctx->rowadr = (int*)mj_stackAlloc(d, nv);
ctx->colind = (int*)mj_stackAlloc(d, nv*nv);
ctx->rownnz = mj_stackAllocInt(d, nv);
ctx->rowadr = mj_stackAllocInt(d, nv);
ctx->colind = mj_stackAllocInt(d, nv*nv);
}
}

Expand Down Expand Up @@ -1507,7 +1507,7 @@ static void mj_solCGNewton(const mjModel* m, mjData* d, int maxiter, int flg_New
Mgradold = mj_stackAlloc(d, nv);
Mgraddif = mj_stackAlloc(d, nv);
}
int* oldstate = (int*)mj_stackAlloc(d, nefc);
int* oldstate = mj_stackAllocInt(d, nefc);

// initialize matrix-vector products
mj_mulM(m, d, ctx.Ma, d->qacc);
Expand Down
2 changes: 1 addition & 1 deletion src/engine/engine_support.c
Original file line number Diff line number Diff line change
Expand Up @@ -984,7 +984,7 @@ void mj_copyM2DSparse(const mjModel* m, mjData* d, mjtNum* dst, const mjtNum* sr
mjMARKSTACK;

// init remaining
int* remaining = (int*)mj_stackAlloc(d, nv);
int* remaining = mj_stackAllocInt(d, nv);
memcpy(remaining, d->D_rownnz, nv * sizeof(int));

// copy data
Expand Down
6 changes: 3 additions & 3 deletions src/engine/engine_util_sparse.c
Original file line number Diff line number Diff line change
Expand Up @@ -418,8 +418,8 @@ void mju_sqrMatTDSparse(mjtNum* res, const mjtNum* mat, const mjtNum* matT,
int* res_rownnz, int* res_rowadr, int* res_colind,
const int* rownnz, const int* rowadr,
const int* colind, const int* rowsuper,
const int* rownnzT, const int* rowadrT,
const int* colindT, const int* rowsuperT,
const int* rownnzT, const int* rowadrT,
const int* colindT, const int* rowsuperT,
mjData* d) {
// allocate space for accumulation buffer and matT
mjMARKSTACK;
Expand All @@ -434,7 +434,7 @@ void mju_sqrMatTDSparse(mjtNum* res, const mjtNum* mat, const mjtNum* matT,

// these mark the currently set columns in the dense row buffer,
// used for when creating the resulting sparse row
int* markers = (int*) mj_stackAlloc(d, nc);
int* markers = mj_stackAllocInt(d, nc);

for (int i=0; i<nc; i++) {
int* cols = res_colind+res_rowadr[i];
Expand Down
20 changes: 10 additions & 10 deletions test/benchmark/engine_util_sparse_benchmark_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ void ABSL_ATTRIBUTE_NOINLINE mju_sqrMatTDSparse_baseline(
const int* rowsuper, const int* rownnzT, const int* rowadrT,
const int* colindT, const int* rowsuperT, mjData* d) {
mjMARKSTACK;
int* chain = (int*)mj_stackAlloc(d, 2 * nc);
int* chain = mj_stackAllocInt(d, 2 * nc);
mjtNum* buffer = mj_stackAlloc(d, nc);

for (int r = 0; r < nc; r++) {
Expand Down Expand Up @@ -433,9 +433,9 @@ static void BM_combineSparse(benchmark::State& state, CombineFuncPtr func) {
// allocate
mjMARKSTACK;
mjtNum* H = mj_stackAlloc(d, m->nv*m->nv);
int* rownnz = (int*)mj_stackAlloc(d, m->nv);
int* rowadr = (int*)mj_stackAlloc(d, m->nv);
int* colind = (int*)mj_stackAlloc(d, m->nv*m->nv);
int* rownnz = mj_stackAllocInt(d, m->nv);
int* rowadr = mj_stackAllocInt(d, m->nv);
int* colind = mj_stackAllocInt(d, m->nv*m->nv);

// compute D corresponding to quad states
mjtNum* D = mj_stackAlloc(d, d->nefc);
Expand Down Expand Up @@ -511,9 +511,9 @@ static void BM_transposeSparse(benchmark::State& state, TransposeFuncPtr func) {

// need uncompressed layout
mjtNum* res = mj_stackAlloc(d, m->nv * d->nefc);
int* res_rownnz = (int*)mj_stackAlloc(d, m->nv);
int* res_rowadr = (int*)mj_stackAlloc(d, m->nv);
int* res_colind = (int*)mj_stackAlloc(d, m->nv * d->nefc);
int* res_rownnz = mj_stackAllocInt(d, m->nv);
int* res_rowadr = mj_stackAllocInt(d, m->nv);
int* res_colind = mj_stackAllocInt(d, m->nv * d->nefc);

// time benchmark
for (auto s : state) {
Expand Down Expand Up @@ -555,9 +555,9 @@ static void BM_sqrMatTDSparse(benchmark::State& state, SqrMatTDFuncPtr func) {
// allocate
mjMARKSTACK;
mjtNum* H = mj_stackAlloc(d, m->nv * m->nv);
int* rownnz = (int*)mj_stackAlloc(d, m->nv);
int* rowadr = (int*)mj_stackAlloc(d, m->nv);
int* colind = (int*)mj_stackAlloc(d, m->nv * m->nv);
int* rownnz = mj_stackAllocInt(d, m->nv);
int* rowadr = mj_stackAllocInt(d, m->nv);
int* colind = mj_stackAllocInt(d, m->nv * m->nv);

// compute D corresponding to quad states
mjtNum* D = mj_stackAlloc(d, d->nefc);
Expand Down
3 changes: 3 additions & 0 deletions unity/Runtime/Bindings/MjBindings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2833,6 +2833,9 @@ public unsafe struct mjvFigure_ {
[DllImport("mujoco", CallingConvention = CallingConvention.Cdecl)]
public static unsafe extern double* mj_stackAlloc(mjData_* d, int size);

[DllImport("mujoco", CallingConvention = CallingConvention.Cdecl)]
public static unsafe extern int* mj_stackAllocInt(mjData_* d, int size);

[DllImport("mujoco", CallingConvention = CallingConvention.Cdecl)]
public static unsafe extern void mj_deleteData(mjData_* d);

Expand Down

0 comments on commit 8696247

Please sign in to comment.