Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Recycle actor heap chunks after GC instead of returning to pool #4531

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .release-notes/4531.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## Make actor heap allocations more efficient by recycling freed memory

Prior to this change, the actor heap garbage collection process would return freed memory back to the pony runtime at the end of a garbage collection run. The returning of memory to the pony runtime and allocating of new memory from the pony runtime are both expensive operations. This change makes it so that the actor garbage collection process keeps old freed memory chunks around with the expectation that the actor will very likely need memory again as it continues to run behaviors. This avoids the expensive return to and reallocation of memory from the pony runtime. It is possible that the overall application might end up using more memory as any freed memory chunks can only be reused by the actor that owns them and the runtime and other actors can no longer reuse that memory as they previously might have been able to.
128 changes: 99 additions & 29 deletions src/libponyrt/mem/heap.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ typedef struct small_chunk_t
struct small_chunk_t* next;
} small_chunk_t;

// include it after defining `large_chunk_t`
#include "heap_chunk_sorting.h"

#define CHUNK_TYPE_BITMASK (uintptr_t)0x1
#define CHUNK_NEEDS_TO_BE_CLEARED_BITMASK (uintptr_t)0x2
Expand Down Expand Up @@ -547,8 +549,11 @@ void ponyint_heap_init(heap_t* heap)

void ponyint_heap_destroy(heap_t* heap)
{
large_chunk_list(destroy_large, heap->large_recyclable, 0);
large_chunk_list(destroy_large, heap->large, 0);

small_chunk_list(destroy_small, heap->small_recyclable, 0);

for(int i = 0; i < HEAP_SIZECLASSES; i++)
{
small_chunk_list(destroy_small, heap->small_free[i], 0);
Expand Down Expand Up @@ -622,9 +627,29 @@ void* ponyint_heap_alloc_small(pony_actor_t* actor, heap_t* heap,
heap->small_full[sizeclass] = chunk;
}
} else {
small_chunk_t* n = (small_chunk_t*) POOL_ALLOC(small_chunk_t);
n->base.m = (char*) POOL_ALLOC(block_t);
small_chunk_t* n = NULL;

// recycle a small chunk if available because it avoids setting the pagemap
if (NULL != heap->small_recyclable)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

our normal pattern in the codebase is to compare to NULL as the second item. Is there a reason for the variance in this patch?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no reason except that i accidentally forgot an = and didn't notice and that's much easier to track down with the condition written this way..

{
n = heap->small_recyclable;
heap->small_recyclable = n->next;
}

// If there are none in the recyclable list, get a new one.
if (NULL == n)
{
n = (small_chunk_t*) POOL_ALLOC(small_chunk_t);
n->base.m = (char*) POOL_ALLOC(block_t);
ponyint_pagemap_set(get_m((chunk_t*)n), (chunk_t*)n, actor);
}

// we should always have a chunk and m at this point
pony_assert(n != NULL);
pony_assert(n->base.m != NULL);

set_small_chunk_size(n, sizeclass);

#ifdef USE_RUNTIMESTATS
actor->actorstats.heap_mem_used += sizeof(small_chunk_t);
actor->actorstats.heap_mem_allocated += POOL_ALLOC_SIZE(small_chunk_t);
Expand All @@ -640,10 +665,10 @@ void* ponyint_heap_alloc_small(pony_actor_t* actor, heap_t* heap,
n->slots = sizeclass_init[sizeclass];
n->next = NULL;

// this is required for the heap garbage collection as it needs to know that
// this chunk needs to be cleared
set_chunk_needs_clearing((chunk_t*)n);

ponyint_pagemap_set(get_m((chunk_t*)n), (chunk_t*)n, actor);

heap->small_free[sizeclass] = n;
chunk = n;

Expand Down Expand Up @@ -672,9 +697,49 @@ void* ponyint_heap_alloc_large(pony_actor_t* actor, heap_t* heap, size_t size,

size = ponyint_pool_adjust_size(size);

large_chunk_t* chunk = (large_chunk_t*) POOL_ALLOC(large_chunk_t);
chunk->size = size;
chunk->base.m = (char*) ponyint_pool_alloc_size(size);
large_chunk_t* chunk = NULL;

// recycle a large chunk if available because it avoids setting the pagemap
if (NULL != heap->large_recyclable)
{
large_chunk_t* recycle = heap->large_recyclable;
large_chunk_t** prev = &recycle;

// short circuit as soon as we see a chunk that is too big because this list is sorted by size
while (NULL != recycle && recycle->size <= size)
{
// we only recycle if the size is exactly what is required
if (recycle->size == size)
{
if (*prev == heap->large_recyclable)
heap->large_recyclable = recycle->next;
else
(*prev)->next = recycle->next;

chunk = recycle;
chunk->next = NULL;
break;
} else {
prev = &recycle;
recycle = recycle->next;
}
}
}

// If there is no suitable one in the recyclable list, get a new one.
if (NULL == chunk)
{
chunk = (large_chunk_t*) POOL_ALLOC(large_chunk_t);
chunk->size = size;
chunk->base.m = (char*) ponyint_pool_alloc_size(size);
large_pagemap(get_m((chunk_t*)chunk), size, (chunk_t*)chunk, actor);
}

// we should always have a chunk and m of the right size at this point
pony_assert(chunk != NULL);
pony_assert(chunk->base.m != NULL);
pony_assert(chunk->size == size);

#ifdef USE_RUNTIMESTATS
actor->actorstats.heap_mem_used += sizeof(large_chunk_t);
actor->actorstats.heap_mem_used += chunk->size;
Expand All @@ -685,13 +750,13 @@ void* ponyint_heap_alloc_large(pony_actor_t* actor, heap_t* heap, size_t size,
#endif
set_large_chunk_slot(chunk, 0);

// this is required for the heap garbage collection as it needs to know that
// this chunk needs to be cleared
set_chunk_needs_clearing((chunk_t*)chunk);

// note if a finaliser needs to run or not
set_large_chunk_finaliser(chunk, (track_finalisers_mask & 1));

large_pagemap(get_m((chunk_t*)chunk), size, (chunk_t*)chunk, actor);

chunk->next = heap->large;
heap->large = chunk;
heap->used += chunk->size;
Expand Down Expand Up @@ -886,18 +951,18 @@ void ponyint_heap_endgc(heap_t* heap
// make a copy of all the heap list pointers into a temporary heap
heap_t theap = {};
heap_t* temp_heap = &theap;
memcpy(temp_heap, heap, offsetof(heap_t, used));
memcpy(temp_heap, heap, offsetof(heap_t, small_recyclable));

// set all the heap list pointers to NULL in the real heap to ensure that
// any new allocations by finalisers during the GC process don't reuse memory
// freed during the GC process
memset(heap, 0, offsetof(heap_t, used));

// lists of chunks to destroy
small_chunk_t* to_destroy_small = NULL;
large_chunk_t* to_destroy_large = NULL;
// lists of chunks to recycle
small_chunk_t** to_recycle_small = &temp_heap->small_recyclable;
large_chunk_t** to_recycle_large = &temp_heap->large_recyclable;

// sweep all the chunks in the temporary heap while accumulating chunks to destroy
// sweep all the chunks in the temporary heap while accumulating chunks to recycle
// NOTE: the real heap will get new chunks allocated and added to it during this process
// if a finaliser allocates memory... we have to make sure that finalisers don't
// reuse memory being freed or we'll end up with a use-after-free bug due to
Expand All @@ -917,21 +982,21 @@ void ponyint_heap_endgc(heap_t* heap
uint32_t empty = sizeclass_empty[i];

#ifdef USE_RUNTIMESTATS
used += sweep_small(list1, avail, full, &to_destroy_small, empty, size,
used += sweep_small(list1, avail, full, to_recycle_small, empty, size,
&mem_allocated, &mem_used, &num_allocated);
used += sweep_small(list2, avail, full, &to_destroy_small, empty, size,
used += sweep_small(list2, avail, full, to_recycle_small, empty, size,
&mem_allocated, &mem_used, &num_allocated);
#else
used += sweep_small(list1, avail, full, &to_destroy_small, empty, size);
used += sweep_small(list2, avail, full, &to_destroy_small, empty, size);
used += sweep_small(list1, avail, full, to_recycle_small, empty, size);
used += sweep_small(list2, avail, full, to_recycle_small, empty, size);
#endif
}

#ifdef USE_RUNTIMESTATS
temp_heap->large = sweep_large(temp_heap->large, &to_destroy_large, &used, &mem_allocated, &mem_used,
temp_heap->large = sweep_large(temp_heap->large, to_recycle_large, &used, &mem_allocated, &mem_used,
&num_allocated);
#else
temp_heap->large = sweep_large(temp_heap->large, &to_destroy_large, &used);
temp_heap->large = sweep_large(temp_heap->large, to_recycle_large, &used);
#endif

// we need to combine the temporary heap lists with the real heap lists
Expand All @@ -951,8 +1016,8 @@ void ponyint_heap_endgc(heap_t* heap
{
small_chunk_t* temp = *sc_temp;
*sc_temp = temp->next;
temp->next = to_destroy_small;
to_destroy_small = temp;
temp->next = *to_recycle_small;
*to_recycle_small = temp;
} else {
sc_temp = &(*sc_temp)->next;
}
Expand All @@ -969,8 +1034,8 @@ void ponyint_heap_endgc(heap_t* heap
{
small_chunk_t* temp = *sc_temp;
*sc_temp = temp->next;
temp->next = to_destroy_small;
to_destroy_small = temp;
temp->next = *to_recycle_small;
*to_recycle_small = temp;
} else {
sc_temp = &(*sc_temp)->next;
}
Expand All @@ -988,18 +1053,23 @@ void ponyint_heap_endgc(heap_t* heap
{
large_chunk_t* temp = *lc_temp;
*lc_temp = temp->next;
temp->next = to_destroy_large;
to_destroy_large = temp;
temp->next = *to_recycle_large;
*to_recycle_large = temp;
} else {
lc_temp = &(*lc_temp)->next;
}
}
*lc_temp = temp_heap->large;

// destroy all the chunks that need to be destroyed
small_chunk_list(destroy_small, to_destroy_small, 0);
large_chunk_list(destroy_large, to_destroy_large, 0);
// destroy all the chunks that didn't get re-used since the last GC run so
// that other actors can re-use the memory from the pool
small_chunk_list(destroy_small, heap->small_recyclable, 0);
large_chunk_list(destroy_large, heap->large_recyclable, 0);

// save any chunks that can be recycled from this GC run
// sort large chunks by the size of the chunks
heap->large_recyclable = sort_large_chunk_list_by_size(*to_recycle_large);
heap->small_recyclable = *to_recycle_small;

// Foreign object sizes will have been added to heap->used already. Here we
// add local object sizes as well and set the next gc point for when memory
Expand Down
2 changes: 2 additions & 0 deletions src/libponyrt/mem/heap.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ typedef struct heap_t
small_chunk_t* small_free[HEAP_SIZECLASSES];
small_chunk_t* small_full[HEAP_SIZECLASSES];
large_chunk_t* large;
small_chunk_t* small_recyclable;
large_chunk_t* large_recyclable;

size_t used;
size_t next_gc;
Expand Down
124 changes: 124 additions & 0 deletions src/libponyrt/mem/heap_chunk_sorting.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#ifndef mem_heap_chunk_sorting_h
#define mem_heap_chunk_sorting_h

#include "heap.h"

/*
* Shamelessly stolen/adapted from Simon Tatham from:
* https://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.c
*
* This file is copyright 2001 Simon Tatham.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
static large_chunk_t* sort_large_chunk_list_by_size(large_chunk_t *list)
{
large_chunk_t *p, *q, *e, *tail;
int32_t insize, nmerges, psize, qsize, i;

/*
* Silly special case: if `list' was passed in as NULL, return
* NULL immediately.
*/
if (!list)
return NULL;

insize = 1;

while (true)
{
p = list;
list = NULL;
tail = NULL;

nmerges = 0; /* count number of merges we do in this pass */

while (NULL != p)
{
nmerges++; /* there exists a merge to be done */
/* step `insize' places along from p */
q = p;
psize = 0;
for (i = 0; i < insize; i++)
{
psize++;
q = q->next;
if (NULL == q)
break;
}

/* if q hasn't fallen off end, we have two lists to merge */
qsize = insize;

/* now we have two lists; merge them */
while (psize > 0 || (qsize > 0 && (NULL != q)))
{
/* decide whether next element of merge comes from p or q */
if (psize == 0)
{
/* p is empty; e must come from q. */
e = q;
q = q->next;
qsize--;
} else if (qsize == 0 || !q) {
/* q is empty; e must come from p. */
e = p;
p = p->next;
psize--;
} else if (p->size <= q->size) {
/* First element of p is lower (or same);
* e must come from p. */
e = p;
p = p->next;
psize--;
} else {
/* First element of q is lower; e must come from q. */
e = q;
q = q->next;
qsize--;
}

/* add the next element to the merged list */
if (NULL != tail)
tail->next = e;
else
list = e;

tail = e;
}

/* now p has stepped `insize' places along, and q has too */
p = q;
}

tail->next = NULL;

/* If we have done only one merge, we're finished. */
if (nmerges <= 1) /* allow for nmerges==0, the empty list case */
return list;

/* Otherwise repeat, merging lists twice the size */
insize *= 2;
}
}

#endif