diff --git a/CMakeLists.txt b/CMakeLists.txt index e4392f53..a19e5d61 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,6 +55,12 @@ set( CACHE STRING "Git commit hash (used by CI)" ) +set(PSN00BSDK_LIBC_ALLOCATORS CUSTOM AFF TLSF) +set( + PSN00BSDK_LIBC_ALLOCATOR AFF + CACHE STRING "Allocator to use in libc implementation (one of ${PSN00BSDK_LIBC_ALLOCATORS})" +) + # Attempt to automatically select a suitable CMake generator to build libpsn00b # and the examples. Only Ninja and makefile-based generators can be used, as # other generators (VS and Xcode) do not allow custom toolchains to be used. @@ -73,6 +79,12 @@ if(NOT LIBPSN00B_GENERATOR MATCHES ".*(Make|Makefiles|Ninja)( Multi-Config)?$") endif() endif() +list(FIND PSN00BSDK_LIBC_ALLOCATORS ${PSN00BSDK_LIBC_ALLOCATOR} index) +if (index EQUAL -1) + message(FATAL_ERROR "Invalid allocator: ${PSN00BSDK_LIBC_ALLOCATOR} (must be CUSTOM, AFF or TLSF)") +endif() +set(PSN00BSDK_LIBC_ALLOCATOR ${index}) + string(TIMESTAMP PSN00BSDK_BUILD_DATE UTC) # Forward some important variables to mkpsxiso and to the subprojects (they are @@ -89,6 +101,8 @@ set( -DPSN00BSDK_BUILD_DATE:STRING=${PSN00BSDK_BUILD_DATE} -DPSN00BSDK_GIT_TAG:STRING=${PSN00BSDK_GIT_TAG} -DPSN00BSDK_GIT_COMMIT:STRING=${PSN00BSDK_GIT_COMMIT} + -DPSN00BSDK_LIBC_ALLOCATOR:STRING=${PSN00BSDK_LIBC_ALLOCATOR} + -DPSN00BSDK_LIBC_ALLOCATORS:STRING=${PSN00BSDK_LIBC_ALLOCATORS} ) set( _tools_args diff --git a/README.md b/README.md index 50c50a00..4368f770 100644 --- a/README.md +++ b/README.md @@ -96,8 +96,7 @@ the basics of the graphics and controller APIs. * `libpsxpress`: Add an API for SPU-ADPCM audio encoding at runtime. -* `libc`: Improve the memory allocation framework with multiple allocators, - replace the string functions with optimized implementations and maybe add +* `libc`: Replace the string functions with optimized implementations and maybe add helpers to manage swapping between main RAM and VRAM/SPU RAM. * Add a full controller and memory card API that does not depend on the BIOS diff --git a/doc/allocator_implementation.md b/doc/allocator_implementation.md new file mode 100644 index 00000000..ab09c870 --- /dev/null +++ b/doc/allocator_implementation.md @@ -0,0 +1,41 @@ +# Allocator Implementation + +Supplying a custom allocator implementation can be done via the +`CUSTOM` value supplied to the `PSN00BSDK_SDK_ALLOCATOR` cmake +parameter. + +## Functions + +Using this variant requires the user to implement several calls +that are bound as weak functions (via `__attribute__((weak))`) +within the libc implementation. You are required to implement +the following to satisfy the `stdlib.h` header: + +```c +void InitHeap(void* addr, size_t size); +void TrackHeapUsage(ptrdiff_t alloc_incr); +void GetHeapUsage(HeapUsage* usage); + +void* malloc(size_t size); +void* calloc(size_t num, size_t size); +void* realloc(void* ptr, size_t size); +void free(void* ptr); +``` + +For reference implementations, see the [aff.c](../libpsn00b/libc/aff.c) and [tlsf.c](../libpsn00b/libc/tlsf.c) +sources within the SDK. + +## Placeholder Details + +Internally, the default placeholder implementations that are bound when +you provide the `CUSTOM` variant via cmake all contain logging statements +that will cause the application to terminate immediately upon invocation +of any allocator function calls. For example: + +```c +void* malloc(size_t size) { + _sdk_log("[ERROR] Unimplemented custom allocator handle: void* malloc(size_t)\n"); + abort(); + return NULL; +} +``` diff --git a/doc/cmake_reference.md b/doc/cmake_reference.md index 0d693c76..b90734e4 100644 --- a/doc/cmake_reference.md +++ b/doc/cmake_reference.md @@ -331,6 +331,23 @@ rather than setting this variable. `PSN00BSDK_TARGET` must be set regardless of whether or not `PSN00BSDK_TC` is also set. +### `PSN00BSDK_LIBC_ALLOCATOR` (`STRING`) + +Allocator implementation to use for the libc memory allocation API functions. +This MUST be one of the following: + +1. `AFF`: Allocated first-fit, similar to that of the standard glibc allocator. +2. `TLSF`: Two-level segregated-fit designed for latency sensitive applications. + It has a bounded response time, performs faster than glibc-like allocators + and has a lower base overhead and per-allocation overhead. +3. `CUSTOM`: User-defined implementation of the allocation primitives. + +For details on the specific functions required to be implemented as well as any +additional details, see the documentation on [allocator implementation](./allocator_implementation.md). + +**IMPORTANT**: If an allocator is not specified when compliling the SDK, the `AFF` allocator will +be used by default. + ## Internal settings These settings are not stored in CMake's cache and can only be set from within diff --git a/libpsn00b/CMakeLists.txt b/libpsn00b/CMakeLists.txt index a6b6df39..496bc71d 100644 --- a/libpsn00b/CMakeLists.txt +++ b/libpsn00b/CMakeLists.txt @@ -50,6 +50,7 @@ foreach(_library IN LISTS PSN00BSDK_LIBRARIES) ) target_compile_definitions(${_name} PRIVATE SDK_LIBRARY_NAME="${_library}") + target_compile_definitions(${_name} PRIVATE SDK_ALLOC_IMPL=${PSN00BSDK_LIBC_ALLOCATOR}) endforeach() endforeach() diff --git a/libpsn00b/cmake/internal_setup.cmake b/libpsn00b/cmake/internal_setup.cmake index c3bb2dc6..a3d954ef 100644 --- a/libpsn00b/cmake/internal_setup.cmake +++ b/libpsn00b/cmake/internal_setup.cmake @@ -40,6 +40,9 @@ if(TARGET psn00bsdk) endif() link_libraries(-lgcc) +# Guaranteed to not be -1 here +set(PSN00BSDK_LIBC_ALLOCATOR ${index}) + # DON'T CHANGE THE ORDER or you'll break the libraries' internal dependencies. set( PSN00BSDK_LIBRARIES diff --git a/libpsn00b/include/assert.h b/libpsn00b/include/assert.h index 8f8df74e..9c329984 100644 --- a/libpsn00b/include/assert.h +++ b/libpsn00b/include/assert.h @@ -14,7 +14,7 @@ extern "C" { #endif -void _assert_abort(const char *file, int line, const char *expr); +__attribute__((noreturn)) void _assert_abort(const char *file, int line, const char *expr); #ifdef __cplusplus } @@ -25,6 +25,7 @@ void _assert_abort(const char *file, int line, const char *expr); #define assert(expr) #define _sdk_log(fmt, ...) #define _sdk_assert(expr, fmt, ...) +#define _sdk_assert_abort(expr, fmt, ...) #define _sdk_validate_args_void(expr) #define _sdk_validate_args(expr, ret) @@ -46,6 +47,11 @@ void _assert_abort(const char *file, int line, const char *expr); _sdk_log(fmt, __VA_ARGS__); \ return ret; \ } +#define _sdk_assert_abort(expr, fmt, ...) \ + if (!(expr)) { \ + _sdk_log(fmt, __VA_ARGS__); \ + assert(expr); \ + } #define _sdk_validate_args_void(expr) \ if (!(expr)) { \ _sdk_log("invalid args to %s() (%s)\n", __func__, #expr); \ diff --git a/libpsn00b/include/stdlib.h b/libpsn00b/include/stdlib.h index c754c298..ceafafe2 100644 --- a/libpsn00b/include/stdlib.h +++ b/libpsn00b/include/stdlib.h @@ -30,7 +30,7 @@ extern "C" { extern int __argc; extern const char **__argv; -void abort(void); +__attribute__((noreturn)) void abort(void); int abs(int value); int rand(void); @@ -42,9 +42,9 @@ long long strtoll(const char *str, char **str_end, int base); //double strtod(const char *str, char **str_end); //long double strtold(const char *str, char **str_end); -void InitHeap(void *addr, size_t size); void *sbrk(ptrdiff_t incr); +void InitHeap(void *addr, size_t size); void TrackHeapUsage(ptrdiff_t alloc_incr); void GetHeapUsage(HeapUsage *usage); diff --git a/libpsn00b/libc/aff.c b/libpsn00b/libc/aff.c new file mode 100644 index 00000000..8908ba9d --- /dev/null +++ b/libpsn00b/libc/aff.c @@ -0,0 +1,332 @@ +/* + * PSn00bSDK default memory allocator + * (C) 2022 Nicolas Noble, spicyjpeg + * + * This code is based on psyqo's malloc implementation, available here: + * https://github.com/grumpycoders/pcsx-redux/blob/main/src/mips/psyqo/src/alloc.c + * + * Heap management and memory allocation are completely separate, with the + * latter being built on top of the former. This makes it possible to override + * only InitHeap() and sbrk() while still using the default allocator, or + * override malloc()/realloc()/free() while using the default heap manager. + * Custom allocators should call TrackHeapUsage() to let the heap manager know + * how much memory is allocated at a given time. + */ +#include +#include +#include +#include + +#define ALIGN_SIZE 8 +#define _align(x, n) (((x) + ((n) - 1)) & ~((n) - 1)) + +/* Private types */ + +typedef struct __attribute__((aligned(ALIGN_SIZE))) _BlockHeader { + struct _BlockHeader *prev, *next; + void *ptr; + size_t size; +} BlockHeader; + +/* Internal globals */ + +static void *_heap_start, *_heap_end, *_heap_limit; +static size_t _heap_alloc, _heap_alloc_max; + +static void *_alloc_start; +static BlockHeader *_alloc_head, *_alloc_tail; + +/* Heap management API */ + +void affInitHeap(void *addr, size_t size) { + _heap_start = addr; + _heap_end = addr; + _heap_limit = (void *) ((uintptr_t) addr + size); + + _heap_alloc = 0; + _heap_alloc_max = 0; + + _alloc_start = addr; + _alloc_head = 0; + _alloc_tail = 0; + _sdk_log("Initialised AFF allocator\n"); +} + +void *sbrk(ptrdiff_t incr) { + void *old_end = _heap_end; + void *new_end = (void *) _align((uintptr_t) old_end + incr, ALIGN_SIZE); + + if (new_end > _heap_limit) + return 0; + + _heap_end = new_end; + return old_end; +} + +void affTrackHeapUsage(ptrdiff_t alloc_incr) { + _heap_alloc += alloc_incr; + + if (_heap_alloc > _heap_alloc_max) + _heap_alloc_max = _heap_alloc; +} + +void affGetHeapUsage(HeapUsage *usage) { + usage->total = _heap_limit - _heap_start; + usage->heap = _heap_end - _heap_start; + usage->stack = _heap_limit - _heap_end; + + usage->alloc = _heap_alloc; + usage->alloc_max = _heap_alloc_max; +} + +/* Memory allocator */ + +static BlockHeader *_find_fit(BlockHeader *head, size_t size) { + BlockHeader *prev = head; + for (; prev; prev = prev->next) { + if (prev->next) { + uintptr_t next_bot = (uintptr_t) prev->next; + next_bot -= (uintptr_t) prev->ptr + prev->size; + + if (next_bot >= size) + return prev; + } + } + return prev; +} + +void* affMalloc(size_t size) { + if (!size) + return 0; + + size_t _size = _align(size + sizeof(BlockHeader), ALIGN_SIZE); + size_t _size_nh = _size - sizeof(BlockHeader); + + // Nothing's initialized yet? Let's just initialize the bottom of our heap, + // flag it as allocated. + if (!_alloc_head) { + //if (!_alloc_start) + //_alloc_start = sbrk(0); + BlockHeader *new = (BlockHeader *) sbrk(_size); + if (!new) + return 0; + + void *ptr = (void *) &new[1]; + new->ptr = ptr; + new->size = _size_nh; + new->prev = 0; + new->next = 0; + + _alloc_head = new; + _alloc_tail = new; + + affTrackHeapUsage(_size); + return ptr; + } + + // We *may* have the bottom of our heap that has shifted, because of a free. + // So let's check first if we have free space there, because I'm nervous + // about having an incomplete data structure. + if (((uintptr_t) _alloc_start + _size) < ((uintptr_t) _alloc_head)) { + BlockHeader *new = (BlockHeader *) _alloc_start; + + void *ptr = (void *) &new[1]; + new->ptr = ptr; + new->size = _size_nh; + new->prev = 0; + new->next = _alloc_head; + + _alloc_head->prev = new; + _alloc_head = new; + + affTrackHeapUsage(_size); + return ptr; + } + + // No luck at the beginning of the heap, let's walk the heap to find a fit. + BlockHeader *prev = _find_fit(_alloc_head, _size); + if (prev) { + BlockHeader *new = (BlockHeader *) ((uintptr_t) prev->ptr + prev->size); + + void *ptr = (void *) &new[1]; + new->ptr = ptr; + new->size = _size_nh; + new->prev = prev; + new->next = prev->next; + + (new->next)->prev = new; + prev->next = new; + + affTrackHeapUsage(_size); + return ptr; + } + + // Time to extend the size of the heap. + BlockHeader *new = (BlockHeader *) sbrk(_size); + if (!new) + return 0; + + void *ptr = (void *) &new[1]; + new->ptr = ptr; + new->size = _size_nh; + new->prev = _alloc_tail; + new->next = 0; + + _alloc_tail->next = new; + _alloc_tail = new; + + affTrackHeapUsage(_size); + return ptr; +} + +void* affCalloc(size_t num, size_t size) { + return malloc(num * size); +} + +void* affRealloc(void *ptr, size_t size) { + if (!size) { + free(ptr); + return 0; + } + if (!ptr) + return malloc(size); + + size_t _size = _align(size + sizeof(BlockHeader), ALIGN_SIZE); + size_t _size_nh = _size - sizeof(BlockHeader); + BlockHeader *prev = (BlockHeader *) ((uintptr_t) ptr - sizeof(BlockHeader)); + + // New memory block shorter? + if (prev->size >= _size_nh) { + affTrackHeapUsage(_size_nh - prev->size); + prev->size = _size_nh; + + if (!prev->next) + sbrk((ptr - sbrk(0)) + _size_nh); + + return ptr; + } + + // New memory block larger; is it the last one? + if (!prev->next) { + void *new = sbrk(_size_nh - prev->size); + if (!new) + return 0; + affTrackHeapUsage(_size_nh - prev->size); + prev->size = _size_nh; + return ptr; + } + + // Do we have free memory after it? + if ((uintptr_t) prev->next - (uintptr_t) ptr >= _size_nh) { + affTrackHeapUsage(_size_nh - prev->size); + prev->size = _size_nh; + return ptr; + } + + // No luck. + void *new = malloc(size); + if (!new) + return 0; + + __builtin_memcpy(new, ptr, prev->size); + free(ptr); + return new; +} + +void affFree(void *ptr) { + if (!ptr || !_alloc_head) + return; + // First block; bumping head ahead. + if (ptr == _alloc_head->ptr) { + size_t size = _alloc_head->size; + size += (uintptr_t) _alloc_head->ptr - (uintptr_t) _alloc_head; + _alloc_head = _alloc_head->next; + + if (_alloc_head) { + _alloc_head->prev = 0; + } else { + _alloc_tail = 0; + sbrk(-size); + } + + affTrackHeapUsage(-size); + return; + } + + // Finding the proper block + BlockHeader* cur = (BlockHeader*) (ptr - sizeof(BlockHeader)); + if (cur->next) { + // In the middle, just unlink it + (cur->next)->prev = cur->prev; + } else { + // At the end, shrink heap + void *top = sbrk(0); + size_t size = (top - (cur->prev)->ptr) - (cur->prev)->size; + _alloc_tail = cur->prev; + + sbrk(-size); + } + affTrackHeapUsage(-(cur->size + sizeof(BlockHeader))); + (cur->prev)->next = cur->next; +} + +#include "malloc_impl.h" + +// ==== API ==== + +#if SDK_ALLOC_IMPL == SDK_ALLOC_IMPL_AFF + +void InitHeap(void* addr, size_t size) { + affInitHeap(addr, size); +} + +void TrackHeapUsage(ptrdiff_t alloc_incr) { + affTrackHeapUsage(alloc_incr); +} + +void GetHeapUsage(HeapUsage* usage) { + affGetHeapUsage(usage); +} + +__attribute__((hot)) +void free(void* ptr) { + affFree(ptr); +} + +__attribute__(( + hot, + malloc, + alloc_size(1) +#ifdef gnu_version_10 + , malloc(free, 1) +#endif +)) +void* malloc(size_t size) { + return affMalloc(size); +} + +__attribute__(( + hot, + malloc, + alloc_size(1, 2) +#ifdef gnu_version_10 + , malloc(free, 1) +#endif +)) +void* calloc(size_t num, size_t size) { + return affCalloc(num, size); +} + +__attribute__(( + hot, + malloc, + alloc_size(2) +#ifdef gnu_version_10 + , malloc(free, 1) +#endif +)) +void* realloc(void* ptr, size_t size) { + return affRealloc(ptr, size); +} + +#endif diff --git a/libpsn00b/libc/gnu_version.h b/libpsn00b/libc/gnu_version.h new file mode 100644 index 00000000..e25c8333 --- /dev/null +++ b/libpsn00b/libc/gnu_version.h @@ -0,0 +1,13 @@ +#pragma once + +#ifndef _H_GNU_VERSION_ +#define _H_GNU_VERSION_ + +#if defined(__GNUC__) \ + && __GNUC__ >= 10 \ + && (__GNUC__ > 10 || (__GNUC__ >= 0 && __GNUC_MINOR__ >= 0)) \ + && defined(__GNUC_PATCHLEVEL__) +#define gnu_version_10 +#endif + +#endif // _H_GNU_VERSION_ diff --git a/libpsn00b/libc/malloc.c b/libpsn00b/libc/malloc.c index d3425d0c..ea192b81 100644 --- a/libpsn00b/libc/malloc.c +++ b/libpsn00b/libc/malloc.c @@ -1,279 +1,82 @@ -/* - * PSn00bSDK default memory allocator - * (C) 2022 Nicolas Noble, spicyjpeg - * - * This code is based on psyqo's malloc implementation, available here: - * https://github.com/grumpycoders/pcsx-redux/blob/main/src/mips/psyqo/src/alloc.c - * - * Heap management and memory allocation are completely separate, with the - * latter being built on top of the former. This makes it possible to override - * only InitHeap() and sbrk() while still using the default allocator, or - * override malloc()/realloc()/free() while using the default heap manager. - * Custom allocators should call TrackHeapUsage() to let the heap manager know - * how much memory is allocated at a given time. - */ +#include "malloc_impl.h" -#include -#include +#if SDK_ALLOC_IMPL == SDK_ALLOC_IMPL_CUSTOM #include +#include -#define _align(x, n) (((x) + ((n) - 1)) & ~((n) - 1)) - -/* Private types */ - -typedef struct _BlockHeader { - struct _BlockHeader *prev, *next; - void *ptr; - size_t size; -} BlockHeader; - -/* Internal globals */ - -static void *_heap_start, *_heap_end, *_heap_limit; -static size_t _heap_alloc, _heap_alloc_max; - -static void *_alloc_start; -static BlockHeader *_alloc_head, *_alloc_tail; - -/* Heap management API */ - -__attribute__((weak)) void InitHeap(void *addr, size_t size) { - _heap_start = addr; - _heap_end = addr; - _heap_limit = (void *) ((uintptr_t) addr + size); - - _heap_alloc = 0; - _heap_alloc_max = 0; - - _alloc_start = addr; - _alloc_head = 0; - _alloc_tail = 0; +__attribute__((weak)) +void InitHeap(void* addr, size_t size) { + _sdk_log("[ERROR] Unimplemented custom allocator handle: void InitHeap(void* addr, size_t size)\n"); + abort(); } -__attribute__((weak)) void *sbrk(ptrdiff_t incr) { - void *old_end = _heap_end; - void *new_end = (void *) _align((uintptr_t) old_end + incr, 8); - - if (new_end > _heap_limit) - return 0; - - _heap_end = new_end; - return old_end; +__attribute__((weak)) +void TrackHeapUsage(ptrdiff_t alloc_incr) { + _sdk_log("[ERROR] Unimplemented custom allocator handle: void TrackHeapUsage(ptrdiff_t alloc_incr)\n"); + abort(); } -__attribute__((weak)) void TrackHeapUsage(ptrdiff_t alloc_incr) { - _heap_alloc += alloc_incr; - - if (_heap_alloc > _heap_alloc_max) - _heap_alloc_max = _heap_alloc; +__attribute__((weak)) +void GetHeapUsage(HeapUsage* usage) { + _sdk_log("[ERROR] Unimplemented custom allocator handle: void GetHeapUsage(HeapUsage* usage)\n"); + abort(); } -__attribute__((weak)) void GetHeapUsage(HeapUsage *usage) { - usage->total = _heap_limit - _heap_start; - usage->heap = _heap_end - _heap_start; - usage->stack = _heap_limit - _heap_end; - - usage->alloc = _heap_alloc; - usage->alloc_max = _heap_alloc_max; +__attribute__((weak)) +void* sbrk(ptrdiff_t incr) { + _sdk_log("[ERROR] Unimplemented custom allocator handle: void* sbrk(ptrdiff_t)\n"); + abort(); + return NULL; } -/* Memory allocator */ - -static BlockHeader *_find_fit(BlockHeader *head, size_t size) { - BlockHeader *prev = head; - - for (; prev; prev = prev->next) { - if (prev->next) { - uintptr_t next_bot = (uintptr_t) prev->next; - next_bot -= (uintptr_t) prev->ptr + prev->size; - - if (next_bot >= size) - return prev; - } - } - - return prev; -} - -__attribute__((weak)) void *malloc(size_t size) { - if (!size) - return 0; - - size_t _size = _align(size + sizeof(BlockHeader), 8); - - // Nothing's initialized yet? Let's just initialize the bottom of our heap, - // flag it as allocated. - if (!_alloc_head) { - //if (!_alloc_start) - //_alloc_start = sbrk(0); - - BlockHeader *new = (BlockHeader *) sbrk(_size); - if (!new) - return 0; - - void *ptr = (void *) &new[1]; - new->ptr = ptr; - new->size = _size - sizeof(BlockHeader); - new->prev = 0; - new->next = 0; - - _alloc_head = new; - _alloc_tail = new; - - TrackHeapUsage(size); - return ptr; - } - - // We *may* have the bottom of our heap that has shifted, because of a free. - // So let's check first if we have free space there, because I'm nervous - // about having an incomplete data structure. - if (((uintptr_t) _alloc_start + _size) < ((uintptr_t) _alloc_head)) { - BlockHeader *new = (BlockHeader *) _alloc_start; - - void *ptr = (void *) &new[1]; - new->ptr = ptr; - new->size = _size - sizeof(BlockHeader); - new->prev = 0; - new->next = _alloc_head; - - _alloc_head->prev = new; - _alloc_head = new; - - TrackHeapUsage(size); - return ptr; - } - - // No luck at the beginning of the heap, let's walk the heap to find a fit. - BlockHeader *prev = _find_fit(_alloc_head, _size); - if (prev) { - BlockHeader *new = (BlockHeader *) ((uintptr_t) prev->ptr + prev->size); - - void *ptr = (void *)((uintptr_t) new + sizeof(BlockHeader)); - new->ptr = ptr; - new->size = _size - sizeof(BlockHeader); - new->prev = prev; - new->next = prev->next; - - (new->next)->prev = new; - prev->next = new; - - TrackHeapUsage(size); - return ptr; - } - - // Time to extend the size of the heap. - BlockHeader *new = (BlockHeader *) sbrk(_size); - if (!new) - return 0; - - void *ptr = (void *) &new[1]; - new->ptr = ptr; - new->size = _size - sizeof(BlockHeader); - new->prev = _alloc_tail; - new->next = 0; - - _alloc_tail->next = new; - _alloc_tail = new; - - TrackHeapUsage(size); - return ptr; +__attribute__((weak, hot)) +void free(void* ptr) { + _sdk_log("[ERROR] Unimplemented custom allocator handle: void free(void* ptr)\n"); + abort(); } -__attribute__((weak)) void *calloc(size_t num, size_t size) { - return malloc(num * size); +__attribute__(( + weak, + hot, + malloc, + alloc_size(1) +#ifdef gnu_version_10 + , malloc(free, 1) +#endif +)) +void* malloc(size_t size) { + _sdk_log("[ERROR] Unimplemented custom allocator handle: void* malloc(size_t)\n"); + abort(); + return NULL; } -__attribute__((weak)) void *realloc(void *ptr, size_t size) { - if (!size) { - free(ptr); - return 0; - } - if (!ptr) - return malloc(size); - - size_t _size = _align(size + sizeof(BlockHeader), 8); - BlockHeader *prev = (BlockHeader *) ((uintptr_t) ptr - sizeof(BlockHeader)); - - // New memory block shorter? - if (prev->size >= _size) { - TrackHeapUsage(size - prev->size); - prev->size = _size; - - if (!prev->next) - sbrk((ptr - sbrk(0)) + _size); - - return ptr; - } - - // New memory block larger; is it the last one? - if (!prev->next) { - void *new = sbrk(_size - prev->size); - if (!new) - return 0; - - TrackHeapUsage(size - prev->size); - prev->size = _size; - return ptr; - } - - // Do we have free memory after it? - if (((prev->next)->ptr - ptr) > _size) { - TrackHeapUsage(size - prev->size); - prev->size = _size; - return ptr; - } - - // No luck. - void *new = malloc(size); - if (!new) - return 0; - - __builtin_memcpy(new, ptr, prev->size); - free(ptr); - return new; +__attribute__(( + weak, + hot, + malloc, + alloc_size(1, 2) +#ifdef gnu_version_10 + , malloc(free, 1) +#endif +)) +void* calloc(size_t num, size_t size) { + _sdk_log("[ERROR] Unimplemented custom allocator handle: void* calloc(size_t num, size_t size)\n"); + abort(); + return NULL; } -__attribute__((weak)) void free(void *ptr) { - if (!ptr || !_alloc_head) - return; - - // First block; bumping head ahead. - if (ptr == _alloc_head->ptr) { - size_t size = _alloc_head->size; - size += (uintptr_t) _alloc_head->ptr - (uintptr_t) _alloc_head; - _alloc_head = _alloc_head->next; - - if (_alloc_head) { - _alloc_head->prev = 0; - } else { - _alloc_tail = 0; - sbrk(-size); - } - - TrackHeapUsage(-(_alloc_head->size)); - return; - } - - // Finding the proper block - BlockHeader *cur = _alloc_head; - - for (cur = _alloc_head; ptr != cur->ptr; cur = cur->next) { - if (!cur->next) - return; - } - - if (cur->next) { - // In the middle, just unlink it - (cur->next)->prev = cur->prev; - } else { - // At the end, shrink heap - void *top = sbrk(0); - size_t size = (top - (cur->prev)->ptr) - (cur->prev)->size; - _alloc_tail = cur->prev; - - sbrk(-size); - } - - TrackHeapUsage(-(cur->size)); - (cur->prev)->next = cur->next; +__attribute__(( + weak, + hot, + malloc, + alloc_size(2) +#ifdef gnu_version_10 + , malloc(free, 1) +#endif +)) +void* realloc(void* ptr, size_t size) { + _sdk_log("[ERROR] Unimplemented custom allocator handle: void* realloc(void* ptr, size_t size)\n"); + abort(); + return NULL; } +#endif diff --git a/libpsn00b/libc/malloc_impl.h b/libpsn00b/libc/malloc_impl.h new file mode 100644 index 00000000..2a0cd851 --- /dev/null +++ b/libpsn00b/libc/malloc_impl.h @@ -0,0 +1,17 @@ +#pragma once + +#ifndef _H_SDK_ALLOC_IMPL_ +#define _H_SDK_ALLOC_IMPL + +// Custom allocator overriding default implementation +#define SDK_ALLOC_IMPL_CUSTOM 0 +// Allocated block first-fit +#define SDK_ALLOC_IMPL_AFF 1 +// Two-level segregated fit +#define SDK_ALLOC_IMPL_TLSF 2 + +#ifndef SDK_ALLOC_IMPL +#define SDK_ALLOC_IMPL SDK_ALLOC_IMPL_AFF +#endif + +#endif // _H_SDK_ALLOC_IMPL diff --git a/libpsn00b/libc/tlsf.c b/libpsn00b/libc/tlsf.c new file mode 100644 index 00000000..8beab961 --- /dev/null +++ b/libpsn00b/libc/tlsf.c @@ -0,0 +1,1450 @@ +/* +** Two Level Segregated Fit memory allocator, version 3.1. +** Written by Matthew Conte +** http://tlsf.baisoku.org +** +** Based on the original documentation by Miguel Masmano: +** http://www.gii.upv.es/tlsf/main/docs +** +** This implementation was written to the specification +** of the document, therefore no GPL restrictions apply. +** +** Copyright (c) 2006-2016, Matthew Conte +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions are met: +** * Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** * Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** * Neither the name of the copyright holder nor the +** names of its contributors may be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +** DISCLAIMED. IN NO EVENT SHALL MATTHEW CONTE BE LIABLE FOR ANY +** DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include +#include +#include +#include +#include +#include + +/* tlsf_t: a TLSF structure. Can contain 1 to N pools. */ +/* pool_t: a block of memory that TLSF can manage. */ +typedef void* tlsf_t; +typedef void* pool_t; + +tlsf_t __tlsf_allocator = NULL; + +/* Create/destroy a memory pool. */ +tlsf_t tlsf_create(void* mem); +tlsf_t tlsf_create_with_pool(void* mem, size_t bytes); +void tlsf_destroy(tlsf_t tlsf); +pool_t tlsf_get_pool(tlsf_t tlsf); + +/* Add/remove memory pools. */ +pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes); +void tlsf_remove_pool(tlsf_t tlsf, pool_t pool); + +/* malloc/memalign/realloc/free replacements. */ +void* tlsf_malloc(tlsf_t tlsf, size_t bytes); +void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t bytes); +void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size); +void tlsf_free(tlsf_t tlsf, void* ptr); + +/* Returns internal block size, not original request size */ +size_t tlsf_block_size(void* ptr); + +/* Overheads/limits of internal structures. */ +size_t tlsf_size(void); +size_t tlsf_align_size(void); +size_t tlsf_block_size_min(void); +size_t tlsf_block_size_max(void); +size_t tlsf_pool_overhead(void); +size_t tlsf_alloc_overhead(void); + +/* Tracking */ +void tlsf_init_heap(void* addr, size_t size); +void tlsf_track_heap_usage(void* alloc_addr, ptrdiff_t alloc_incr); +void tlsf_get_heap_usage(HeapUsage* usage); + +/* Debugging. */ +typedef void (*tlsf_walker)(void* ptr, size_t size, int used, void* user); +void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user); +/* Returns nonzero if any internal consistency check fails. */ +int tlsf_check(tlsf_t tlsf); +int tlsf_check_pool(pool_t pool); + +#if defined(__cplusplus) +#define tlsf_decl inline +#else +#define tlsf_decl static +#endif + +/* +** Architecture-specific bit manipulation routines. +** +** TLSF achieves O(1) cost for malloc and free operations by limiting +** the search for a free block to a free list of guaranteed size +** adequate to fulfill the request, combined with efficient free list +** queries using bitmasks and architecture-specific bit-manipulation +** routines. +** +** Most modern processors provide instructions to count leading zeroes +** in a word, find the lowest and highest set bit, etc. These +** specific implementations will be used when available, falling back +** to a reasonably efficient generic implementation. +** +** NOTE: TLSF spec relies on ffs/fls returning value 0..31. +** ffs/fls return 1-32 by default, returning 0 for error. +*/ + +/* +** Detect whether or not we are building for a 32- or 64-bit (LP/LLP) +** architecture. There is no reliable portable method at compile-time. +*/ +#if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) \ + || defined (_WIN64) || defined (__LP64__) || defined (__LLP64__) +#define TLSF_64BIT +#endif + +/* +** gcc 3.4 and above have builtin support, specialized for architecture. +** Some compilers masquerade as gcc; patchlevel test filters them out. +*/ +#if defined (__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) \ + && defined (__GNUC_PATCHLEVEL__) + +#if defined (__SNC__) +/* SNC for Playstation 3. */ + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __builtin_clz(reverse); + return bit - 1; +} + +#else + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + return __builtin_ffs(word) - 1; +} + +#endif + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __builtin_clz(word) : 0; + return bit - 1; +} + +#elif defined (_MSC_VER) && (_MSC_VER >= 1400) && (defined (_M_IX86) || defined (_M_X64)) +/* Microsoft Visual C++ support on x86/X64 architectures. */ + +#include + +#pragma intrinsic(_BitScanReverse) +#pragma intrinsic(_BitScanForward) + +tlsf_decl int tlsf_fls(unsigned int word) +{ + unsigned long index; + return _BitScanReverse(&index, word) ? index : -1; +} + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + unsigned long index; + return _BitScanForward(&index, word) ? index : -1; +} + +#elif defined (_MSC_VER) && defined (_M_PPC) +/* Microsoft Visual C++ support on PowerPC architectures. */ + +#include + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = 32 - _CountLeadingZeros(word); + return bit - 1; +} + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - _CountLeadingZeros(reverse); + return bit - 1; +} + +#elif defined (__ARMCC_VERSION) +/* RealView Compilation Tools for ARM */ + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __clz(reverse); + return bit - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __clz(word) : 0; + return bit - 1; +} + +#elif defined (__ghs__) +/* Green Hills support for PowerPC */ + +#include + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __CLZ32(reverse); + return bit - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __CLZ32(word) : 0; + return bit - 1; +} + +#else +/* Fall back to generic implementation. */ + +tlsf_decl int tlsf_fls_generic(unsigned int word) +{ + int bit = 32; + + if (!word) bit -= 1; + if (!(word & 0xffff0000)) { word <<= 16; bit -= 16; } + if (!(word & 0xff000000)) { word <<= 8; bit -= 8; } + if (!(word & 0xf0000000)) { word <<= 4; bit -= 4; } + if (!(word & 0xc0000000)) { word <<= 2; bit -= 2; } + if (!(word & 0x80000000)) { word <<= 1; bit -= 1; } + + return bit; +} + +/* Implement ffs in terms of fls. */ +tlsf_decl int tlsf_ffs(unsigned int word) +{ + return tlsf_fls_generic(word & (~word + 1)) - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + return tlsf_fls_generic(word) - 1; +} + +#endif + +/* Possibly 64-bit version of tlsf_fls. */ +#if defined (TLSF_64BIT) +tlsf_decl int tlsf_fls_sizet(size_t size) +{ + int high = (int)(size >> 32); + int bits = 0; + if (high) + { + bits = 32 + tlsf_fls(high); + } + else + { + bits = tlsf_fls((int)size & 0xffffffff); + + } + return bits; +} +#else +#define tlsf_fls_sizet tlsf_fls +#endif + +#undef tlsf_decl + +/* +** Constants. +*/ + +/* Public constants: may be modified. */ +enum tlsf_public +{ + /* log2 of number of linear subdivisions of block sizes. Larger + ** values require more memory in the control structure. Values of + ** 4 or 5 are typical. + */ + SL_INDEX_COUNT_LOG2 = 5, +}; + +/* Private constants: do not modify. */ +enum tlsf_private +{ +#if defined (TLSF_64BIT) + /* All allocation sizes and addresses are aligned to 8 bytes. */ + ALIGN_SIZE_LOG2 = 3, +#else + /* All allocation sizes and addresses are aligned to 4 bytes. */ + ALIGN_SIZE_LOG2 = 2, +#endif + ALIGN_SIZE = (1 << ALIGN_SIZE_LOG2), + + /* + ** We support allocations of sizes up to (1 << FL_INDEX_MAX) bits. + ** However, because we linearly subdivide the second-level lists, and + ** our minimum size granularity is 4 bytes, it doesn't make sense to + ** create first-level lists for sizes smaller than SL_INDEX_COUNT * 4, + ** or (1 << (SL_INDEX_COUNT_LOG2 + 2)) bytes, as there we will be + ** trying to split size ranges into more slots than we have available. + ** Instead, we calculate the minimum threshold size, and place all + ** blocks below that size into the 0th first-level list. + */ + +#if defined (TLSF_64BIT) + /* + ** TODO: We can increase this to support larger sizes, at the expense + ** of more overhead in the TLSF structure. + */ + FL_INDEX_MAX = 32, +#else + FL_INDEX_MAX = 30, +#endif + SL_INDEX_COUNT = (1 << SL_INDEX_COUNT_LOG2), + FL_INDEX_SHIFT = (SL_INDEX_COUNT_LOG2 + ALIGN_SIZE_LOG2), + FL_INDEX_COUNT = (FL_INDEX_MAX - FL_INDEX_SHIFT + 1), + + SMALL_BLOCK_SIZE = (1 << FL_INDEX_SHIFT), +}; + +/* +** Cast and min/max macros. +*/ + +#define tlsf_cast(t, exp) ((t) (exp)) +#define tlsf_min(a, b) ((a) < (b) ? (a) : (b)) +#define tlsf_max(a, b) ((a) > (b) ? (a) : (b)) + +/* +** Set assert macro, if it has not been provided by the user. +*/ +#if !defined (tlsf_assert) +#define tlsf_assert assert +#endif + +/* +** Static assertion mechanism. +*/ + +#define _tlsf_glue2(x, y) x ## y +#define _tlsf_glue(x, y) _tlsf_glue2(x, y) +#define tlsf_static_assert(exp) \ + typedef char _tlsf_glue(static_assert, __LINE__) [(exp) ? 1 : -1] + +/* This code has been tested on 32- and 64-bit (LP/LLP) architectures. */ +tlsf_static_assert(sizeof(int) * CHAR_BIT == 32); +tlsf_static_assert(sizeof(size_t) * CHAR_BIT >= 32); +tlsf_static_assert(sizeof(size_t) * CHAR_BIT <= 64); + +/* SL_INDEX_COUNT must be <= number of bits in sl_bitmap's storage type. */ +tlsf_static_assert(sizeof(unsigned int) * CHAR_BIT >= SL_INDEX_COUNT); + +/* Ensure we've properly tuned our sizes. */ +tlsf_static_assert(ALIGN_SIZE == SMALL_BLOCK_SIZE / SL_INDEX_COUNT); + +/* +** Data structures and associated constants. +*/ + +/* +** Block header structure. +** +** There are several implementation subtleties involved: +** - The prev_phys_block field is only valid if the previous block is free. +** - The prev_phys_block field is actually stored at the end of the +** previous block. It appears at the beginning of this structure only to +** simplify the implementation. +** - The next_free / prev_free fields are only valid if the block is free. +*/ +typedef struct block_header_t +{ + /* Points to the previous physical block. */ + struct block_header_t* prev_phys_block; + + /* The size of this block, excluding the block header. */ + size_t size; + + /* Next and previous free blocks. */ + struct block_header_t* next_free; + struct block_header_t* prev_free; +} block_header_t; + +/* +** Since block sizes are always at least a multiple of 4, the two least +** significant bits of the size field are used to store the block status: +** - bit 0: whether block is busy or free +** - bit 1: whether previous block is busy or free +*/ +static const size_t block_header_free_bit = 1 << 0; +static const size_t block_header_prev_free_bit = 1 << 1; + +/* +** The size of the block header exposed to used blocks is the size field. +** The prev_phys_block field is stored *inside* the previous free block. +*/ +static const size_t block_header_overhead = sizeof(size_t); + +/* User data starts directly after the size field in a used block. */ +static const size_t block_start_offset = + offsetof(block_header_t, size) + sizeof(size_t); + +/* +** A free block must be large enough to store its header minus the size of +** the prev_phys_block field, and no larger than the number of addressable +** bits for FL_INDEX. +*/ +static const size_t block_size_min = + sizeof(block_header_t) - sizeof(block_header_t*); +static const size_t block_size_max = tlsf_cast(size_t, 1) << FL_INDEX_MAX; + + +/* The TLSF control structure. */ +typedef struct control_t +{ + /* Empty lists point at this block to indicate they are free. */ + block_header_t block_null; + + /* Bitmaps for free lists. */ + unsigned int fl_bitmap; + unsigned int sl_bitmap[FL_INDEX_COUNT]; + + /* Head of free lists. */ + block_header_t* blocks[FL_INDEX_COUNT][SL_INDEX_COUNT]; +} control_t; + +/* A type used for casting when doing pointer arithmetic. */ +typedef ptrdiff_t tlsfptr_t; + +/* +** block_header_t member functions. +*/ + +static size_t block_size(const block_header_t* block) +{ + return block->size & ~(block_header_free_bit | block_header_prev_free_bit); +} + +static void block_set_size(block_header_t* block, size_t size) +{ + const size_t oldsize = block->size; + block->size = size | (oldsize & (block_header_free_bit | block_header_prev_free_bit)); +} + +static int block_is_last(const block_header_t* block) +{ + return block_size(block) == 0; +} + +static int block_is_free(const block_header_t* block) +{ + return tlsf_cast(int, block->size & block_header_free_bit); +} + +static void block_set_free(block_header_t* block) +{ + block->size |= block_header_free_bit; +} + +static void block_set_used(block_header_t* block) +{ + block->size &= ~block_header_free_bit; +} + +static int block_is_prev_free(const block_header_t* block) +{ + return tlsf_cast(int, block->size & block_header_prev_free_bit); +} + +static void block_set_prev_free(block_header_t* block) +{ + block->size |= block_header_prev_free_bit; +} + +static void block_set_prev_used(block_header_t* block) +{ + block->size &= ~block_header_prev_free_bit; +} + +static block_header_t* block_from_ptr(const void* ptr) +{ + return tlsf_cast(block_header_t*, + tlsf_cast(unsigned char*, ptr) - block_start_offset); +} + +static void* block_to_ptr(const block_header_t* block) +{ + return tlsf_cast(void*, + tlsf_cast(unsigned char*, block) + block_start_offset); +} + +/* Return location of next block after block of given size. */ +static block_header_t* offset_to_block(const void* ptr, size_t size) +{ + return tlsf_cast(block_header_t*, tlsf_cast(tlsfptr_t, ptr) + size); +} + +/* Return location of previous block. */ +static block_header_t* block_prev(const block_header_t* block) +{ + tlsf_assert(block_is_prev_free(block) && "previous block must be free"); + return block->prev_phys_block; +} + +/* Return location of next existing block. */ +static block_header_t* block_next(const block_header_t* block) +{ + block_header_t* next = offset_to_block(block_to_ptr(block), + block_size(block) - block_header_overhead); + tlsf_assert(!block_is_last(block)); + return next; +} + +/* Link a new block with its physical neighbor, return the neighbor. */ +static block_header_t* block_link_next(block_header_t* block) +{ + block_header_t* next = block_next(block); + next->prev_phys_block = block; + return next; +} + +static void block_mark_as_free(block_header_t* block) +{ + /* Link the block to the next block, first. */ + block_header_t* next = block_link_next(block); + block_set_prev_free(next); + block_set_free(block); +} + +static void block_mark_as_used(block_header_t* block) +{ + block_header_t* next = block_next(block); + block_set_prev_used(next); + block_set_used(block); +} + +static size_t align_up(size_t x, size_t align) +{ + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return (x + (align - 1)) & ~(align - 1); +} + +static size_t align_down(size_t x, size_t align) +{ + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return x - (x & (align - 1)); +} + +static void* align_ptr(const void* ptr, size_t align) +{ + const tlsfptr_t aligned = + (tlsf_cast(tlsfptr_t, ptr) + (align - 1)) & ~(align - 1); + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return tlsf_cast(void*, aligned); +} + +/* +** Adjust an allocation size to be aligned to word size, and no smaller +** than internal minimum. +*/ +static size_t adjust_request_size(size_t size, size_t align) +{ + size_t adjust = 0; + if (size) + { + const size_t aligned = align_up(size, align); + + /* aligned sized must not exceed block_size_max or we'll go out of bounds on sl_bitmap */ + if (aligned < block_size_max) + { + adjust = tlsf_max(aligned, block_size_min); + } + } + return adjust; +} + +/* +** TLSF utility functions. In most cases, these are direct translations of +** the documentation found in the white paper. +*/ + +static void mapping_insert(size_t size, int* fli, int* sli) +{ + int fl, sl; + if (size < SMALL_BLOCK_SIZE) + { + /* Store small blocks in first list. */ + fl = 0; + sl = tlsf_cast(int, size) / (SMALL_BLOCK_SIZE / SL_INDEX_COUNT); + } + else + { + fl = tlsf_fls_sizet(size); + sl = tlsf_cast(int, size >> (fl - SL_INDEX_COUNT_LOG2)) ^ (1 << SL_INDEX_COUNT_LOG2); + fl -= (FL_INDEX_SHIFT - 1); + } + *fli = fl; + *sli = sl; +} + +/* This version rounds up to the next block size (for allocations) */ +static void mapping_search(size_t size, int* fli, int* sli) +{ + if (size >= SMALL_BLOCK_SIZE) + { + const size_t round = (1 << (tlsf_fls_sizet(size) - SL_INDEX_COUNT_LOG2)) - 1; + size += round; + } + mapping_insert(size, fli, sli); +} + +static block_header_t* search_suitable_block(control_t* control, int* fli, int* sli) +{ + int fl = *fli; + int sl = *sli; + + /* + ** First, search for a block in the list associated with the given + ** fl/sl index. + */ + unsigned int sl_map = control->sl_bitmap[fl] & (~0U << sl); + if (!sl_map) + { + /* No block exists. Search in the next largest first-level list. */ + const unsigned int fl_map = control->fl_bitmap & (~0U << (fl + 1)); + if (!fl_map) + { + /* No free blocks available, memory has been exhausted. */ + return 0; + } + + fl = tlsf_ffs(fl_map); + *fli = fl; + sl_map = control->sl_bitmap[fl]; + } + tlsf_assert(sl_map && "internal error - second level bitmap is null"); + sl = tlsf_ffs(sl_map); + *sli = sl; + + /* Return the first block in the free list. */ + return control->blocks[fl][sl]; +} + +/* Remove a free block from the free list.*/ +static void remove_free_block(control_t* control, block_header_t* block, int fl, int sl) +{ + block_header_t* prev = block->prev_free; + block_header_t* next = block->next_free; + tlsf_assert(prev && "prev_free field can not be null"); + tlsf_assert(next && "next_free field can not be null"); + next->prev_free = prev; + prev->next_free = next; + + /* If this block is the head of the free list, set new head. */ + if (control->blocks[fl][sl] == block) + { + control->blocks[fl][sl] = next; + + /* If the new head is null, clear the bitmap. */ + if (next == &control->block_null) + { + control->sl_bitmap[fl] &= ~(1U << sl); + + /* If the second bitmap is now empty, clear the fl bitmap. */ + if (!control->sl_bitmap[fl]) + { + control->fl_bitmap &= ~(1U << fl); + } + } + } +} + +/* Insert a free block into the free block list. */ +static void insert_free_block(control_t* control, block_header_t* block, int fl, int sl) +{ + block_header_t* current = control->blocks[fl][sl]; + tlsf_assert(current && "free list cannot have a null entry"); + tlsf_assert(block && "cannot insert a null entry into the free list"); + block->next_free = current; + block->prev_free = &control->block_null; + current->prev_free = block; + + tlsf_assert(block_to_ptr(block) == align_ptr(block_to_ptr(block), ALIGN_SIZE) + && "block not aligned properly"); + /* + ** Insert the new block at the head of the list, and mark the first- + ** and second-level bitmaps appropriately. + */ + control->blocks[fl][sl] = block; + control->fl_bitmap |= (1U << fl); + control->sl_bitmap[fl] |= (1U << sl); +} + +/* Remove a given block from the free list. */ +static void block_remove(control_t* control, block_header_t* block) +{ + int fl, sl; + mapping_insert(block_size(block), &fl, &sl); + remove_free_block(control, block, fl, sl); +} + +/* Insert a given block into the free list. */ +static void block_insert(control_t* control, block_header_t* block) +{ + int fl, sl; + mapping_insert(block_size(block), &fl, &sl); + insert_free_block(control, block, fl, sl); +} + +static int block_can_split(block_header_t* block, size_t size) +{ + return block_size(block) >= sizeof(block_header_t) + size; +} + +/* Split a block into two, the second of which is free. */ +static block_header_t* block_split(block_header_t* block, size_t size) +{ + /* Calculate the amount of space left in the remaining block. */ + block_header_t* remaining = + offset_to_block(block_to_ptr(block), size - block_header_overhead); + + const size_t remain_size = block_size(block) - (size + block_header_overhead); + + tlsf_assert(block_to_ptr(remaining) == align_ptr(block_to_ptr(remaining), ALIGN_SIZE) + && "remaining block not aligned properly"); + + tlsf_assert(block_size(block) == remain_size + size + block_header_overhead); + block_set_size(remaining, remain_size); + tlsf_assert(block_size(remaining) >= block_size_min && "block split with invalid size"); + + block_set_size(block, size); + block_mark_as_free(remaining); + + return remaining; +} + +/* Absorb a free block's storage into an adjacent previous free block. */ +static block_header_t* block_absorb(block_header_t* prev, block_header_t* block) +{ + tlsf_assert(!block_is_last(prev) && "previous block can't be last"); + /* Note: Leaves flags untouched. */ + prev->size += block_size(block) + block_header_overhead; + block_link_next(prev); + return prev; +} + +/* Merge a just-freed block with an adjacent previous free block. */ +static block_header_t* block_merge_prev(control_t* control, block_header_t* block) +{ + if (block_is_prev_free(block)) + { + block_header_t* prev = block_prev(block); + tlsf_assert(prev && "prev physical block can't be null"); + tlsf_assert(block_is_free(prev) && "prev block is not free though marked as such"); + block_remove(control, prev); + block = block_absorb(prev, block); + } + + return block; +} + +/* Merge a just-freed block with an adjacent free block. */ +static block_header_t* block_merge_next(control_t* control, block_header_t* block) +{ + block_header_t* next = block_next(block); + tlsf_assert(next && "next physical block can't be null"); + + if (block_is_free(next)) + { + tlsf_assert(!block_is_last(block) && "previous block can't be last"); + block_remove(control, next); + block = block_absorb(block, next); + } + + return block; +} + +/* Trim any trailing block space off the end of a block, return to pool. */ +static void block_trim_free(control_t* control, block_header_t* block, size_t size) +{ + tlsf_assert(block_is_free(block) && "block must be free"); + if (block_can_split(block, size)) + { + block_header_t* remaining_block = block_split(block, size); + block_link_next(block); + block_set_prev_free(remaining_block); + block_insert(control, remaining_block); + } +} + +/* Trim any trailing block space off the end of a used block, return to pool. */ +static void block_trim_used(control_t* control, block_header_t* block, size_t size) +{ + tlsf_assert(!block_is_free(block) && "block must be used"); + if (block_can_split(block, size)) + { + /* If the next block is free, we must coalesce. */ + block_header_t* remaining_block = block_split(block, size); + block_set_prev_used(remaining_block); + + remaining_block = block_merge_next(control, remaining_block); + block_insert(control, remaining_block); + } +} + +static block_header_t* block_trim_free_leading(control_t* control, block_header_t* block, size_t size) +{ + block_header_t* remaining_block = block; + if (block_can_split(block, size)) + { + /* We want the 2nd block. */ + remaining_block = block_split(block, size - block_header_overhead); + block_set_prev_free(remaining_block); + + block_link_next(block); + block_insert(control, block); + } + + return remaining_block; +} + +static block_header_t* block_locate_free(control_t* control, size_t size) +{ + int fl = 0, sl = 0; + block_header_t* block = 0; + + if (size) + { + mapping_search(size, &fl, &sl); + + /* + ** mapping_search can futz with the size, so for excessively large sizes it can sometimes wind up + ** with indices that are off the end of the block array. + ** So, we protect against that here, since this is the only callsite of mapping_search. + ** Note that we don't need to check sl, since it comes from a modulo operation that guarantees it's always in range. + */ + if (fl < FL_INDEX_COUNT) + { + block = search_suitable_block(control, &fl, &sl); + } + } + + if (block) + { + tlsf_assert(block_size(block) >= size); + remove_free_block(control, block, fl, sl); + } + + return block; +} + +static void* block_prepare_used(control_t* control, block_header_t* block, size_t size) +{ + void* p = 0; + if (block) + { + tlsf_assert(size && "size must be non-zero"); + block_trim_free(control, block, size); + block_mark_as_used(block); + p = block_to_ptr(block); + } + return p; +} + +/* Clear structure and point all empty lists at the null block. */ +static void control_construct(control_t* control) +{ + int i, j; + + control->block_null.next_free = &control->block_null; + control->block_null.prev_free = &control->block_null; + + control->fl_bitmap = 0; + for (i = 0; i < FL_INDEX_COUNT; ++i) + { + control->sl_bitmap[i] = 0; + for (j = 0; j < SL_INDEX_COUNT; ++j) + { + control->blocks[i][j] = &control->block_null; + } + } +} + +/* +** Debugging utilities. +*/ + +typedef struct integrity_t +{ + int prev_status; + int status; +} integrity_t; + +#define tlsf_insist(x) { tlsf_assert(x); if (!(x)) { status--; } } + +static void integrity_walker(void* ptr, size_t size, int used, void* user) +{ + block_header_t* block = block_from_ptr(ptr); + integrity_t* integ = tlsf_cast(integrity_t*, user); + const int this_prev_status = block_is_prev_free(block) ? 1 : 0; + const int this_status = block_is_free(block) ? 1 : 0; + const size_t this_block_size = block_size(block); + + int status = 0; + (void)used; + tlsf_insist(integ->prev_status == this_prev_status && "prev status incorrect"); + tlsf_insist(size == this_block_size && "block size incorrect"); + + integ->prev_status = this_status; + integ->status += status; +} + +int tlsf_check(tlsf_t tlsf) +{ + int i, j; + + control_t* control = tlsf_cast(control_t*, tlsf); + int status = 0; + + /* Check that the free lists and bitmaps are accurate. */ + for (i = 0; i < FL_INDEX_COUNT; ++i) + { + for (j = 0; j < SL_INDEX_COUNT; ++j) + { + const int fl_map = control->fl_bitmap & (1U << i); + const int sl_list = control->sl_bitmap[i]; + const int sl_map = sl_list & (1U << j); + const block_header_t* block = control->blocks[i][j]; + + /* Check that first- and second-level lists agree. */ + if (!fl_map) + { + tlsf_insist(!sl_map && "second-level map must be null"); + } + + if (!sl_map) + { + tlsf_insist(block == &control->block_null && "block list must be null"); + continue; + } + + /* Check that there is at least one free block. */ + tlsf_insist(sl_list && "no free blocks in second-level map"); + tlsf_insist(block != &control->block_null && "block should not be null"); + + while (block != &control->block_null) + { + int fli, sli; + tlsf_insist(block_is_free(block) && "block should be free"); + tlsf_insist(!block_is_prev_free(block) && "blocks should have coalesced"); + tlsf_insist(!block_is_free(block_next(block)) && "blocks should have coalesced"); + tlsf_insist(block_is_prev_free(block_next(block)) && "block should be free"); + tlsf_insist(block_size(block) >= block_size_min && "block not minimum size"); + + mapping_insert(block_size(block), &fli, &sli); + tlsf_insist(fli == i && sli == j && "block size indexed in wrong list"); + block = block->next_free; + } + } + } + + return status; +} + +#undef tlsf_insist + +static void default_walker(void* ptr, size_t size, int used, void* user) +{ + (void)user; + printf("\t%p %s size: %x (%p)\n", ptr, used ? "used" : "free", (unsigned int)size, block_from_ptr(ptr)); +} + +void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user) +{ + tlsf_walker pool_walker = walker ? walker : default_walker; + block_header_t* block = + offset_to_block(pool, -(int)block_header_overhead); + + while (block && !block_is_last(block)) + { + pool_walker( + block_to_ptr(block), + block_size(block), + !block_is_free(block), + user); + block = block_next(block); + } +} + +size_t tlsf_block_size(void* ptr) +{ + size_t size = 0; + if (ptr) + { + const block_header_t* block = block_from_ptr(ptr); + size = block_size(block); + } + return size; +} + +int tlsf_check_pool(pool_t pool) +{ + /* Check that the blocks are physically correct. */ + integrity_t integ = { 0, 0 }; + tlsf_walk_pool(pool, integrity_walker, &integ); + + return integ.status; +} + +/* +** Size of the TLSF structures in a given memory block passed to +** tlsf_create, equal to the size of a control_t +*/ +size_t tlsf_size(void) +{ + return sizeof(control_t); +} + +size_t tlsf_align_size(void) +{ + return ALIGN_SIZE; +} + +size_t tlsf_block_size_min(void) +{ + return block_size_min; +} + +size_t tlsf_block_size_max(void) +{ + return block_size_max; +} + +/* +** Overhead of the TLSF structures in a given memory block passed to +** tlsf_add_pool, equal to the overhead of a free block and the +** sentinel block. +*/ +size_t tlsf_pool_overhead(void) +{ + return 2 * block_header_overhead; +} + +size_t tlsf_alloc_overhead(void) +{ + return block_header_overhead; +} + +pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes) +{ + block_header_t* block; + block_header_t* next; + + const size_t pool_overhead = tlsf_pool_overhead(); + const size_t pool_bytes = align_down(bytes - pool_overhead, ALIGN_SIZE); + + if (((ptrdiff_t)mem % ALIGN_SIZE) != 0) + { + printf("tlsf_add_pool: Memory must be aligned by %u bytes.\n", + (unsigned int)ALIGN_SIZE); + return 0; + } + + if (pool_bytes < block_size_min || pool_bytes > block_size_max) + { +#if defined (TLSF_64BIT) + printf("tlsf_add_pool: Memory size must be between 0x%x and 0x%x00 bytes.\n", + (unsigned int)(pool_overhead + block_size_min), + (unsigned int)((pool_overhead + block_size_max) / 256)); +#else + printf("tlsf_add_pool: Memory size must be between %u and %u bytes.\n", + (unsigned int)(pool_overhead + block_size_min), + (unsigned int)(pool_overhead + block_size_max)); +#endif + return 0; + } + + /* + ** Create the main free block. Offset the start of the block slightly + ** so that the prev_phys_block field falls outside of the pool - + ** it will never be used. + */ + block = offset_to_block(mem, -(tlsfptr_t)block_header_overhead); + block_set_size(block, pool_bytes); + block_set_free(block); + block_set_prev_used(block); + block_insert(tlsf_cast(control_t*, tlsf), block); + + /* Split the block to create a zero-size sentinel block. */ + next = block_link_next(block); + block_set_size(next, 0); + block_set_used(next); + block_set_prev_free(next); + + return mem; +} + +void tlsf_remove_pool(tlsf_t tlsf, pool_t pool) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + block_header_t* block = offset_to_block(pool, -(int)block_header_overhead); + + int fl = 0, sl = 0; + + tlsf_assert(block_is_free(block) && "block should be free"); + tlsf_assert(!block_is_free(block_next(block)) && "next block should not be free"); + tlsf_assert(block_size(block_next(block)) == 0 && "next block size should be zero"); + + mapping_insert(block_size(block), &fl, &sl); + remove_free_block(control, block, fl, sl); +} + +/* +** TLSF main interface. +*/ + +#if _DEBUG +int test_ffs_fls() +{ + /* Verify ffs/fls work properly. */ + int rv = 0; + rv += (tlsf_ffs(0) == -1) ? 0 : 0x1; + rv += (tlsf_fls(0) == -1) ? 0 : 0x2; + rv += (tlsf_ffs(1) == 0) ? 0 : 0x4; + rv += (tlsf_fls(1) == 0) ? 0 : 0x8; + rv += (tlsf_ffs(0x80000000) == 31) ? 0 : 0x10; + rv += (tlsf_ffs(0x80008000) == 15) ? 0 : 0x20; + rv += (tlsf_fls(0x80000008) == 31) ? 0 : 0x40; + rv += (tlsf_fls(0x7FFFFFFF) == 30) ? 0 : 0x80; + +#if defined (TLSF_64BIT) + rv += (tlsf_fls_sizet(0x80000000) == 31) ? 0 : 0x100; + rv += (tlsf_fls_sizet(0x100000000) == 32) ? 0 : 0x200; + rv += (tlsf_fls_sizet(0xffffffffffffffff) == 63) ? 0 : 0x400; +#endif + + if (rv) + { + printf("test_ffs_fls: %x ffs/fls tests failed.\n", rv); + } + return rv; +} +#endif + +tlsf_t tlsf_create(void* mem) +{ +#if _DEBUG + if (test_ffs_fls()) + { + return 0; + } +#endif + + if (((tlsfptr_t)mem % ALIGN_SIZE) != 0) + { + printf("tlsf_create: Memory must be aligned to %u bytes.\n", + (unsigned int)ALIGN_SIZE); + return 0; + } + + control_construct(tlsf_cast(control_t*, mem)); + + return tlsf_cast(tlsf_t, mem); +} + +tlsf_t tlsf_create_with_pool(void* mem, size_t bytes) +{ + tlsf_t tlsf = tlsf_create(mem); + tlsf_add_pool(tlsf, (char*)mem + tlsf_size(), bytes - tlsf_size()); + return tlsf; +} + +void tlsf_destroy(tlsf_t tlsf) +{ + /* Nothing to do. */ + (void)tlsf; +} + +pool_t tlsf_get_pool(tlsf_t tlsf) +{ + return tlsf_cast(pool_t, (char*)tlsf + tlsf_size()); +} + +void* tlsf_malloc(tlsf_t tlsf, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + block_header_t* block = block_locate_free(control, adjust); + void* mem = block_prepare_used(control, block, adjust); + tlsf_track_heap_usage(mem, block_size(block)); + return mem; +} + +void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + + /* + ** We must allocate an additional minimum block size bytes so that if + ** our free block will leave an alignment gap which is smaller, we can + ** trim a leading free block and release it back to the pool. We must + ** do this because the previous physical block is in use, therefore + ** the prev_phys_block field is not valid, and we can't simply adjust + ** the size of that block. + */ + const size_t gap_minimum = sizeof(block_header_t); + const size_t size_with_gap = adjust_request_size(adjust + align + gap_minimum, align); + + /* + ** If alignment is less than or equals base alignment, we're done. + ** If we requested 0 bytes, return null, as tlsf_malloc(0) does. + */ + const size_t aligned_size = (adjust && align > ALIGN_SIZE) ? size_with_gap : adjust; + + block_header_t* block = block_locate_free(control, aligned_size); + + /* This can't be a static assert. */ + tlsf_assert(sizeof(block_header_t) == block_size_min + block_header_overhead); + + if (block) + { + void* ptr = block_to_ptr(block); + void* aligned = align_ptr(ptr, align); + size_t gap = tlsf_cast(size_t, + tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr)); + + /* If gap size is too small, offset to next aligned boundary. */ + if (gap && gap < gap_minimum) + { + const size_t gap_remain = gap_minimum - gap; + const size_t offset = tlsf_max(gap_remain, align); + const void* next_aligned = tlsf_cast(void*, + tlsf_cast(tlsfptr_t, aligned) + offset); + + aligned = align_ptr(next_aligned, align); + gap = tlsf_cast(size_t, + tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr)); + } + + if (gap) + { + tlsf_assert(gap >= gap_minimum && "gap size too small"); + block = block_trim_free_leading(control, block, gap); + } + } + + void* mem = block_prepare_used(control, block, adjust); + tlsf_track_heap_usage(mem, block_size(block)); + return mem; +} + +void tlsf_free(tlsf_t tlsf, void* ptr) +{ + /* Don't attempt to free a NULL pointer. */ + if (ptr) + { + control_t* control = tlsf_cast(control_t*, tlsf); + block_header_t* block = block_from_ptr(ptr); + tlsf_assert(!block_is_free(block) && "block already marked as free"); + tlsf_track_heap_usage(NULL, -block_size(block)); + block_mark_as_free(block); + block = block_merge_prev(control, block); + block = block_merge_next(control, block); + block_insert(control, block); + } +} + +/* +** The TLSF block information provides us with enough information to +** provide a reasonably intelligent implementation of realloc, growing or +** shrinking the currently allocated block as required. +** +** This routine handles the somewhat esoteric edge cases of realloc: +** - a non-zero size with a null pointer will behave like malloc +** - a zero size with a non-null pointer will behave like free +** - a request that cannot be satisfied will leave the original buffer +** untouched +** - an extended buffer size will leave the newly-allocated area with +** contents undefined +*/ +void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + void* p = 0; + + /* Zero-size requests are treated as free. */ + if (ptr && size == 0) + { + tlsf_free(tlsf, ptr); + } + /* Requests with NULL pointers are treated as malloc. */ + else if (!ptr) + { + p = tlsf_malloc(tlsf, size); + } + else + { + block_header_t* block = block_from_ptr(ptr); + block_header_t* next = block_next(block); + + const size_t cursize = block_size(block); + const size_t combined = cursize + block_size(next) + block_header_overhead; + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + + tlsf_assert(!block_is_free(block) && "block already marked as free"); + + /* + ** If the next block is used, or when combined with the current + ** block, does not offer enough space, we must reallocate and copy. + */ + if (adjust > cursize && (!block_is_free(next) || adjust > combined)) + { + p = tlsf_malloc(tlsf, size); + if (p) + { + const size_t minsize = tlsf_min(cursize, size); + memcpy(p, ptr, minsize); + tlsf_free(tlsf, ptr); + } + } + else + { + /* Do we need to expand to the next block? */ + if (adjust > cursize) + { + block_merge_next(control, block); + block_mark_as_used(block); + } + + /* Trim the resulting block and return the original pointer. */ + block_trim_used(control, block, adjust); + const size_t newsize = block_size(block); + tlsf_track_heap_usage(ptr, newsize - cursize); + p = ptr; + } + } + + return p; +} + +#include "malloc_impl.h" + +// ==== API ==== + +static void *_heap_start, *_heap_end, *_heap_limit; +static size_t _heap_alloc, _heap_alloc_max; + +void tlsf_init_heap(void* addr, size_t size) { + _sdk_assert_abort(__tlsf_allocator == NULL, "[ERROR] Heap already initialised\n"); + __tlsf_allocator = tlsf_create_with_pool(addr, size); + _sdk_assert_abort(__tlsf_allocator != NULL, "[ERROR] Unable to initialise allocator\n"); + _heap_start = addr; + _heap_end = addr; + _heap_limit = (void*)((uintptr_t) addr + size); + _heap_alloc = 0; + _heap_alloc_max = 0; + _sdk_log("Initialised TLSF allocator\n"); +} + +void tlsf_track_heap_usage(void* alloc_addr, ptrdiff_t alloc_size) { + if (alloc_addr != NULL) { + void* alloc_end = alloc_addr + alloc_size; + if (alloc_end > _heap_end) { + _heap_end = alloc_end; + } + } + _heap_alloc += alloc_size; + if (_heap_alloc > _heap_alloc_max) { + _heap_alloc_max = _heap_alloc; + } +} + +void tlsf_get_heap_usage(HeapUsage* usage) { + usage->total = _heap_limit - _heap_start; + usage->heap = _heap_end - _heap_start; + usage->stack = _heap_limit - _heap_end; + usage->alloc = _heap_alloc; + usage->alloc_max = _heap_alloc_max; +} + +#if SDK_ALLOC_IMPL == SDK_ALLOC_IMPL_TLSF + +void InitHeap(void* addr, size_t size) { + tlsf_init_heap(addr, size); +} + +void TrackHeapUsage(ptrdiff_t alloc_incr) { + tlsf_track_heap_usage((void*) 1, alloc_incr); +} + +void GetHeapUsage(HeapUsage* usage) { + tlsf_get_heap_usage(usage); +} + +__attribute__((hot)) +void free(void* ptr) { + tlsf_free(__tlsf_allocator, ptr); +} + +__attribute__(( + hot, + malloc, + alloc_size(1) +#ifdef gnu_version_10 + , malloc(free, 1) +#endif +)) +void* malloc(size_t size) { + return tlsf_malloc(__tlsf_allocator, size); +} + +__attribute__(( + hot, + malloc, + alloc_size(1, 2) +#ifdef gnu_version_10 + , malloc(free, 1) +#endif +)) +void* calloc(size_t num, size_t size) { + return tlsf_malloc(__tlsf_allocator, num * size); +} + +__attribute__(( + hot, + malloc, + alloc_size(2) +#ifdef gnu_version_10 + , malloc(free, 1) +#endif +)) +void* realloc(void* ptr, size_t size) { + return tlsf_realloc(__tlsf_allocator, ptr, size); +} + +#endif