diff --git a/src/runtime/internal/block_allocator.h b/src/runtime/internal/block_allocator.h index feee56a4e531..89b1a929e79b 100644 --- a/src/runtime/internal/block_allocator.h +++ b/src/runtime/internal/block_allocator.h @@ -55,10 +55,11 @@ class BlockAllocator { // Public interface methods MemoryRegion *reserve(void *user_context, const MemoryRequest &request); - int release(void *user_context, MemoryRegion *region); //< unmark and cache the region for reuse - int reclaim(void *user_context, MemoryRegion *region); //< free the region and consolidate - int retain(void *user_context, MemoryRegion *region); //< retain the region and increase the usage count - bool collect(void *user_context); //< returns true if any blocks were removed + int conform(void *user_context, MemoryRequest *request) const; //< conform the given request into a suitable allocation + int release(void *user_context, MemoryRegion *region); //< unmark and cache the region for reuse + int reclaim(void *user_context, MemoryRegion *region); //< free the region and consolidate + int retain(void *user_context, MemoryRegion *region); //< retain the region and increase the usage count + bool collect(void *user_context); //< returns true if any blocks were removed int release(void *user_context); int destroy(void *user_context); @@ -86,13 +87,13 @@ class BlockAllocator { int destroy_region_allocator(void *user_context, RegionAllocator *region_allocator); // Reserves a block of memory for the requested size and returns the corresponding block entry, or nullptr on failure - BlockEntry *reserve_block_entry(void *user_context, const MemoryProperties &properties, size_t size, bool dedicated); + BlockEntry *reserve_block_entry(void *user_context, const MemoryRequest &request); // Locates the "best-fit" block entry for the requested size, or nullptr if none was found - BlockEntry *find_block_entry(void *user_context, const MemoryProperties &properties, size_t size, bool dedicated); + BlockEntry *find_block_entry(void *user_context, const MemoryRequest &request); - // Creates a new block entry and int the list - BlockEntry *create_block_entry(void *user_context, const MemoryProperties &properties, size_t size, bool dedicated); + // Creates a new block entry and adds it tos the list + BlockEntry *create_block_entry(void *user_context, const MemoryRequest &request); // Releases the block entry from being used, and makes it available for further allocations int release_block_entry(void *user_context, BlockEntry *block_entry); @@ -113,7 +114,7 @@ class BlockAllocator { bool is_compatible_block(const BlockResource *block, const MemoryProperties &properties) const; // Returns true if the given block is suitable for the request allocation - bool is_block_suitable_for_request(void *user_context, const BlockResource *block, const MemoryProperties &properties, size_t size, bool dedicated) const; + bool is_block_suitable_for_request(void *user_context, const BlockResource *block, const MemoryRequest &request) const; Config config; LinkedList block_list; @@ -162,7 +163,8 @@ MemoryRegion *BlockAllocator::reserve(void *user_context, const MemoryRequest &r << "caching=" << halide_memory_caching_name(request.properties.caching) << " " << "visibility=" << halide_memory_visibility_name(request.properties.visibility) << ") ..."; #endif - BlockEntry *block_entry = reserve_block_entry(user_context, request.properties, request.size, request.dedicated); + // Reserve a block entry for use + BlockEntry *block_entry = reserve_block_entry(user_context, request); if (block_entry == nullptr) { error(user_context) << "BlockAllocator: Failed to allocate new empty block of requested size (" << (int32_t)(request.size) << " bytes)\n"; @@ -173,11 +175,12 @@ MemoryRegion *BlockAllocator::reserve(void *user_context, const MemoryRequest &r halide_abort_if_false(user_context, block != nullptr); halide_abort_if_false(user_context, block->allocator != nullptr); + // Reserve an initial memory region for the block MemoryRegion *result = reserve_memory_region(user_context, block->allocator, request); if (result == nullptr) { // Unable to reserve region in an existing block ... create a new block and try again. - block_entry = create_block_entry(user_context, request.properties, request.size, request.dedicated); + block_entry = create_block_entry(user_context, request); if (block_entry == nullptr) { error(user_context) << "BlockAllocator: Out of memory! Failed to allocate empty block of size (" << (int32_t)(request.size) << " bytes)\n"; @@ -299,8 +302,8 @@ MemoryRegion *BlockAllocator::reserve_memory_region(void *user_context, RegionAl return result; } -bool BlockAllocator::is_block_suitable_for_request(void *user_context, const BlockResource *block, const MemoryProperties &properties, size_t size, bool dedicated) const { - if (!is_compatible_block(block, properties)) { +bool BlockAllocator::is_block_suitable_for_request(void *user_context, const BlockResource *block, const MemoryRequest &request) const { + if (!is_compatible_block(block, request.properties)) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "BlockAllocator: skipping block ... incompatible properties! (" << "block_resource=" << (void *)block << " " @@ -309,16 +312,16 @@ bool BlockAllocator::is_block_suitable_for_request(void *user_context, const Blo << "block_usage=" << halide_memory_usage_name(block->memory.properties.usage) << " " << "block_caching=" << halide_memory_caching_name(block->memory.properties.caching) << " " << "block_visibility=" << halide_memory_visibility_name(block->memory.properties.visibility) << " " - << "request_size=" << (uint32_t)size << " " - << "request_usage=" << halide_memory_usage_name(properties.usage) << " " - << "request_caching=" << halide_memory_caching_name(properties.caching) << " " - << "request_visibility=" << halide_memory_visibility_name(properties.visibility) << ")"; + << "request_size=" << (uint32_t)request.size << " " + << "request_usage=" << halide_memory_usage_name(request.properties.usage) << " " + << "request_caching=" << halide_memory_caching_name(request.properties.caching) << " " + << "request_visibility=" << halide_memory_visibility_name(request.properties.visibility) << ")"; #endif // skip blocks that are using incompatible memory return false; } - if (dedicated && (block->reserved > 0)) { + if (request.dedicated && (block->reserved > 0)) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "BlockAllocator: skipping block ... can be used for dedicated allocation! (" << "block_resource=" << (void *)block << " " @@ -340,7 +343,7 @@ bool BlockAllocator::is_block_suitable_for_request(void *user_context, const Blo } size_t available = (block->memory.size - block->reserved); - if (available >= size) { + if (available >= request.size) { return true; } @@ -348,23 +351,23 @@ bool BlockAllocator::is_block_suitable_for_request(void *user_context, const Blo } BlockAllocator::BlockEntry * -BlockAllocator::find_block_entry(void *user_context, const MemoryProperties &properties, size_t size, bool dedicated) { +BlockAllocator::find_block_entry(void *user_context, const MemoryRequest &request) { BlockEntry *block_entry = block_list.back(); while (block_entry != nullptr) { BlockEntry *prev_entry = block_entry->prev_ptr; const BlockResource *block = static_cast(block_entry->value); - if (is_block_suitable_for_request(user_context, block, properties, size, dedicated)) { + if (is_block_suitable_for_request(user_context, block, request)) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "BlockAllocator: found suitable block (" << "user_context=" << (void *)(user_context) << " " << "block_resource=" << (void *)block << " " << "block_size=" << (uint32_t)block->memory.size << " " << "block_reserved=" << (uint32_t)block->reserved << " " - << "request_size=" << (uint32_t)size << " " - << "dedicated=" << (dedicated ? "true" : "false") << " " - << "usage=" << halide_memory_usage_name(properties.usage) << " " - << "caching=" << halide_memory_caching_name(properties.caching) << " " - << "visibility=" << halide_memory_visibility_name(properties.visibility) << ")"; + << "request_size=" << (uint32_t)request.size << " " + << "request_dedicated=" << (request.dedicated ? "true" : "false") << " " + << "request_usage=" << halide_memory_usage_name(request.properties.usage) << " " + << "request_caching=" << halide_memory_caching_name(request.properties.caching) << " " + << "request_visibility=" << halide_memory_visibility_name(request.properties.visibility) << ")"; #endif return block_entry; } @@ -375,37 +378,37 @@ BlockAllocator::find_block_entry(void *user_context, const MemoryProperties &pro #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "BlockAllocator: couldn't find suitable block! (" << "user_context=" << (void *)(user_context) << " " - << "request_size=" << (uint32_t)size << " " - << "dedicated=" << (dedicated ? "true" : "false") << " " - << "usage=" << halide_memory_usage_name(properties.usage) << " " - << "caching=" << halide_memory_caching_name(properties.caching) << " " - << "visibility=" << halide_memory_visibility_name(properties.visibility) << ")"; + << "request_size=" << (uint32_t)request.size << " " + << "request_dedicated=" << (request.dedicated ? "true" : "false") << " " + << "request_usage=" << halide_memory_usage_name(request.properties.usage) << " " + << "request_caching=" << halide_memory_caching_name(request.properties.caching) << " " + << "request_visibility=" << halide_memory_visibility_name(request.properties.visibility) << ")"; #endif } return block_entry; } BlockAllocator::BlockEntry * -BlockAllocator::reserve_block_entry(void *user_context, const MemoryProperties &properties, size_t size, bool dedicated) { +BlockAllocator::reserve_block_entry(void *user_context, const MemoryRequest &request) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "BlockAllocator: reserving block ... ! (" - << "requested_size=" << (uint32_t)size << " " - << "requested_is_dedicated=" << (dedicated ? "true" : "false") << " " - << "requested_usage=" << halide_memory_usage_name(properties.usage) << " " - << "requested_caching=" << halide_memory_caching_name(properties.caching) << " " - << "requested_visibility=" << halide_memory_visibility_name(properties.visibility) << ")"; + << "requested_size=" << (uint32_t)request.size << " " + << "requested_is_dedicated=" << (request.dedicated ? "true" : "false") << " " + << "requested_usage=" << halide_memory_usage_name(request.properties.usage) << " " + << "requested_caching=" << halide_memory_caching_name(request.properties.caching) << " " + << "requested_visibility=" << halide_memory_visibility_name(request.properties.visibility) << ")"; #endif - BlockEntry *block_entry = find_block_entry(user_context, properties, size, dedicated); + BlockEntry *block_entry = find_block_entry(user_context, request); if (block_entry == nullptr) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "BlockAllocator: creating block ... ! (" - << "requested_size=" << (uint32_t)size << " " - << "requested_is_dedicated=" << (dedicated ? "true" : "false") << " " - << "requested_usage=" << halide_memory_usage_name(properties.usage) << " " - << "requested_caching=" << halide_memory_caching_name(properties.caching) << " " - << "requested_visibility=" << halide_memory_visibility_name(properties.visibility) << ")"; + << "requested_size=" << (uint32_t)request.size << " " + << "requested_is_dedicated=" << (request.dedicated ? "true" : "false") << " " + << "requested_usage=" << halide_memory_usage_name(request.properties.usage) << " " + << "requested_caching=" << halide_memory_caching_name(request.properties.caching) << " " + << "requested_visibility=" << halide_memory_visibility_name(request.properties.visibility) << ")"; #endif - block_entry = create_block_entry(user_context, properties, size, dedicated); + block_entry = create_block_entry(user_context, request); } if (block_entry) { @@ -449,7 +452,7 @@ int BlockAllocator::destroy_region_allocator(void *user_context, RegionAllocator } BlockAllocator::BlockEntry * -BlockAllocator::create_block_entry(void *user_context, const MemoryProperties &properties, size_t size, bool dedicated) { +BlockAllocator::create_block_entry(void *user_context, const MemoryRequest &request) { if (config.maximum_pool_size && (pool_size() >= config.maximum_pool_size)) { error(user_context) << "BlockAllocator: No free blocks found! Maximum pool size reached (" << (int32_t)(config.maximum_pool_size) << " bytes or " @@ -476,12 +479,16 @@ BlockAllocator::create_block_entry(void *user_context, const MemoryProperties &p << "allocator=" << (void *)(allocators.block.allocate) << ")..."; #endif + // Constrain the request to the a valid block allocation + MemoryRequest block_request = request; + conform(user_context, &block_request); + + // Create the block resource itself BlockResource *block = static_cast(block_entry->value); - block->memory.size = constrain_requested_size(size); + block->memory.size = block_request.size; block->memory.handle = nullptr; - block->memory.properties = properties; - block->memory.properties.nearest_multiple = max(config.nearest_multiple, properties.nearest_multiple); - block->memory.dedicated = dedicated; + block->memory.properties = block_request.properties; + block->memory.dedicated = block_request.dedicated; block->reserved = 0; block->allocator = create_region_allocator(user_context, block); alloc_memory_block(user_context, block); @@ -561,6 +568,33 @@ size_t BlockAllocator::constrain_requested_size(size_t size) const { return actual_size; } +int BlockAllocator::conform(void *user_context, MemoryRequest *request) const { + + request->properties.nearest_multiple = max(config.nearest_multiple, request->properties.nearest_multiple); + + if (request->properties.nearest_multiple) { + size_t nm = request->properties.nearest_multiple; + request->size = (((request->size + nm - 1) / nm) * nm); // round up to nearest multiple + } + + if (config.minimum_block_size) { + request->size = ((request->size < config.minimum_block_size) ? + config.minimum_block_size : + request->size); + } + if (config.maximum_block_size) { + request->size = ((request->size > config.maximum_block_size) ? + config.maximum_block_size : + request->size); + } + + if (allocators.block.conform) { + return allocators.block.conform(user_context, request); + } + + return 0; +} + bool BlockAllocator::is_compatible_block(const BlockResource *block, const MemoryProperties &properties) const { if (properties.caching != MemoryCaching::DefaultCaching) { if (properties.caching != block->memory.properties.caching) { diff --git a/src/runtime/internal/memory_resources.h b/src/runtime/internal/memory_resources.h index d41fa57304fb..0be6041519a1 100644 --- a/src/runtime/internal/memory_resources.h +++ b/src/runtime/internal/memory_resources.h @@ -202,18 +202,22 @@ struct HalideSystemAllocatorFns { typedef int (*AllocateBlockFn)(void *, MemoryBlock *); typedef int (*DeallocateBlockFn)(void *, MemoryBlock *); +typedef int (*ConformBlockRequestFn)(void *, MemoryRequest *); struct MemoryBlockAllocatorFns { AllocateBlockFn allocate = nullptr; DeallocateBlockFn deallocate = nullptr; + ConformBlockRequestFn conform = nullptr; }; typedef int (*AllocateRegionFn)(void *, MemoryRegion *); typedef int (*DeallocateRegionFn)(void *, MemoryRegion *); +typedef int (*ConformBlockRegionFn)(void *, MemoryRequest *); struct MemoryRegionAllocatorFns { AllocateRegionFn allocate = nullptr; DeallocateRegionFn deallocate = nullptr; + ConformBlockRegionFn conform = nullptr; }; // -- diff --git a/src/runtime/internal/region_allocator.h b/src/runtime/internal/region_allocator.h index 02c2cd7e6aa0..3588389c3747 100644 --- a/src/runtime/internal/region_allocator.h +++ b/src/runtime/internal/region_allocator.h @@ -46,10 +46,11 @@ class RegionAllocator { // Public interface methods MemoryRegion *reserve(void *user_context, const MemoryRequest &request); - int release(void *user_context, MemoryRegion *memory_region); //< unmark and cache the region for reuse - int reclaim(void *user_context, MemoryRegion *memory_region); //< free the region and consolidate - int retain(void *user_context, MemoryRegion *memory_region); //< retain the region and increase usage count - bool collect(void *user_context); //< returns true if any blocks were removed + int conform(void *user_context, MemoryRequest *request) const; //< conform the given request into a suitable allocation + int release(void *user_context, MemoryRegion *memory_region); //< unmark and cache the region for reuse + int reclaim(void *user_context, MemoryRegion *memory_region); //< free the region and consolidate + int retain(void *user_context, MemoryRegion *memory_region); //< retain the region and increase usage count + bool collect(void *user_context); //< returns true if any blocks were removed int release(void *user_context); int destroy(void *user_context); @@ -73,13 +74,13 @@ class RegionAllocator { BlockRegion *coalesce_block_regions(void *user_context, BlockRegion *region); // Returns true if the given region can be split to accomodate the given size - bool can_split(const BlockRegion *region, size_t size, size_t alignment) const; + bool can_split(const BlockRegion *region, const MemoryRequest &request) const; // Splits the given block region into a smaller region to accomodate the given size, followed by empty space for the remaining - BlockRegion *split_block_region(void *user_context, BlockRegion *region, size_t size, size_t alignment); + BlockRegion *split_block_region(void *user_context, BlockRegion *region, const MemoryRequest &request); // Creates a new block region and adds it to the region list - BlockRegion *create_block_region(void *user_context, const MemoryProperties &properties, size_t offset, size_t size, bool dedicated); + BlockRegion *create_block_region(void *user_context, const MemoryRequest &request); // Creates a new block region and adds it to the region list int destroy_block_region(void *user_context, BlockRegion *region); @@ -137,30 +138,55 @@ int RegionAllocator::initialize(void *user_context, BlockResource *mb, const Mem allocators = ma; arena = MemoryArena::create(user_context, {sizeof(BlockRegion), MemoryArena::default_capacity, 0}, allocators.system); halide_abort_if_false(user_context, arena != nullptr); + MemoryRequest block_request = {}; + block_request.size = block->memory.size; + block_request.offset = 0; + block_request.alignment = block->memory.properties.alignment; + block_request.properties = block->memory.properties; + block_request.dedicated = block->memory.dedicated; block->allocator = this; - block->regions = create_block_region( - user_context, - block->memory.properties, - 0, block->memory.size, - block->memory.dedicated); + block->regions = create_block_region(user_context, block_request); + return 0; +} + +int RegionAllocator::conform(void *user_context, MemoryRequest *request) const { + if (allocators.region.conform) { + return allocators.region.conform(user_context, request); + } else { + size_t actual_alignment = conform_alignment(request->alignment, block->memory.properties.alignment); + size_t actual_offset = aligned_offset(request->offset, actual_alignment); + size_t actual_size = conform_size(actual_offset, request->size, actual_alignment, block->memory.properties.nearest_multiple); + request->alignment = actual_alignment; + request->offset = actual_offset; + request->size = actual_size; + } return 0; } MemoryRegion *RegionAllocator::reserve(void *user_context, const MemoryRequest &request) { halide_abort_if_false(user_context, request.size > 0); - size_t actual_alignment = conform_alignment(request.alignment, block->memory.properties.alignment); - size_t actual_size = conform_size(request.offset, request.size, actual_alignment, block->memory.properties.nearest_multiple); + + MemoryRequest region_request = request; + + int error_code = conform(user_context, ®ion_request); + if (error_code) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "RegionAllocator: Failed to conform region request! Unable to reserve memory ...\n"; +#endif + return nullptr; + } + size_t remaining = block->memory.size - block->reserved; - if (remaining < actual_size) { + if (remaining < region_request.size) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Unable to reserve more memory from block " - << "-- requested size (" << (int32_t)(request.size) << " bytes) " + << "-- requested size (" << (int32_t)(region_request.size) << " bytes) " << "greater than available (" << (int32_t)(remaining) << " bytes)"; #endif return nullptr; } - BlockRegion *block_region = find_block_region(user_context, request); + BlockRegion *block_region = find_block_region(user_context, region_request); if (block_region == nullptr) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Failed to locate region for requested size (" @@ -169,12 +195,12 @@ MemoryRegion *RegionAllocator::reserve(void *user_context, const MemoryRequest & return nullptr; } - if (can_split(block_region, request.size, request.alignment)) { + if (can_split(block_region, region_request)) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Splitting region of size ( " << (int32_t)(block_region->memory.size) << ") " - << "to accomodate requested size (" << (int32_t)(request.size) << " bytes)"; + << "to accomodate requested size (" << (int32_t)(region_request.size) << " bytes)"; #endif - split_block_region(user_context, block_region, request.size, request.alignment); + split_block_region(user_context, block_region, region_request); } alloc_block_region(user_context, block_region); @@ -237,8 +263,17 @@ bool RegionAllocator::is_block_region_suitable_for_request(void *user_context, c return false; } + MemoryRequest region_request = request; + int error_code = conform(user_context, ®ion_request); + if (error_code) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "RegionAllocator: Failed to conform region request! Unable to reserve memory ...\n"; +#endif + return false; + } + // skip incompatible block regions for this request - if (!is_compatible_block_region(region, request.properties)) { + if (!is_compatible_block_region(region, region_request.properties)) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << " skipping block region ... incompatible properties! (" << " block_region=" << (void *)region @@ -248,16 +283,13 @@ bool RegionAllocator::is_block_region_suitable_for_request(void *user_context, c return false; } - size_t actual_alignment = conform_alignment(request.alignment, block->memory.properties.alignment); - size_t actual_size = conform_size(region->memory.offset, request.size, actual_alignment, block->memory.properties.nearest_multiple); - // is the adjusted size larger than the current region? - if (actual_size > region->memory.size) { + if (region_request.size > region->memory.size) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << " skipping block region ... not enough space for adjusted size! (" << " block_region=" << (void *)region << " request_size=" << (uint32_t)(request.size) - << " actual_size=" << (uint32_t)(actual_size) + << " actual_size=" << (uint32_t)(region_request.size) << " region_size=" << (uint32_t)(region->memory.size) << ")"; #endif @@ -265,12 +297,12 @@ bool RegionAllocator::is_block_region_suitable_for_request(void *user_context, c } // will the adjusted size fit within the remaining unallocated space? - if ((actual_size + block->reserved) <= block->memory.size) { + if ((region_request.size + block->reserved) <= block->memory.size) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << " found suitable block region! (" << " block_region=" << (void *)region << " request_size=" << (uint32_t)(request.size) - << " actual_size=" << (uint32_t)(actual_size) + << " actual_size=" << (uint32_t)(region_request.size) << " region_size=" << (uint32_t)(region->memory.size) << ")"; #endif @@ -411,13 +443,11 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe return block_region; } -bool RegionAllocator::can_split(const BlockRegion *block_region, size_t size, size_t alignment) const { - size_t actual_alignment = conform_alignment(alignment, block->memory.properties.alignment); - size_t split_size = conform_size(block_region->memory.offset, size, actual_alignment, block->memory.properties.nearest_multiple); - return (block_region && (block_region->memory.size > split_size) && (block_region->usage_count == 0)); +bool RegionAllocator::can_split(const BlockRegion *block_region, const MemoryRequest &split_request) const { + return (block_region && (block_region->memory.size > split_request.size) && (block_region->usage_count == 0)); } -BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion *block_region, size_t size, size_t alignment) { +BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion *block_region, const MemoryRequest &request) { if ((block_region->usage_count == 0) && (block_region->memory.handle != nullptr)) { #ifdef DEBUG_RUNTIME_INTERNAL @@ -434,33 +464,17 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion block_region->memory.handle = nullptr; } - size_t actual_alignment = conform_alignment(alignment, block->memory.properties.alignment); - size_t split_size = conform_size(block_region->memory.offset, size, actual_alignment, block->memory.properties.nearest_multiple); - size_t split_offset = aligned_offset(block_region->memory.offset + size, actual_alignment); - size_t empty_size = block_region->memory.size - split_size; - -#ifdef DEBUG_RUNTIME_INTERNAL - debug(user_context) << "RegionAllocator: Conforming size and alignment (" - << "requested_size=" << (uint32_t)size << " " - << "split_size=" << (uint32_t)split_size << " " - << "split_offset=" << (uint32_t)split_size << " " - << "empty_size=" << (uint32_t)empty_size << " " - << "requested_alignment=" << (uint32_t)alignment << " " - << "required_alignment=" << (uint32_t)block->memory.properties.alignment << " " - << "actual_alignment=" << (uint32_t)actual_alignment << ")"; -#endif + MemoryRequest split_request = request; + split_request.size = block_region->memory.size - request.size; + split_request.offset = block_region->memory.offset + request.size; #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Splitting " << "current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes) " - << "to create empty region (offset=" << (int32_t)split_offset << " size=" << (int32_t)(empty_size) << " bytes)"; + << "to create empty region (offset=" << (int32_t)split_request.offset << " size=" << (int32_t)(split_request.size) << " bytes)"; #endif - BlockRegion *next_region = block_region->next_ptr; - BlockRegion *empty_region = create_block_region(user_context, - block_region->memory.properties, - split_offset, empty_size, - block_region->memory.dedicated); + BlockRegion *empty_region = create_block_region(user_context, split_request); halide_abort_if_false(user_context, empty_region != nullptr); empty_region->next_ptr = next_region; @@ -469,42 +483,52 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion } empty_region->prev_ptr = block_region; block_region->next_ptr = empty_region; - block_region->memory.size -= empty_size; + block_region->memory.size -= empty_region->memory.size; return empty_region; } -BlockRegion *RegionAllocator::create_block_region(void *user_context, const MemoryProperties &properties, size_t offset, size_t size, bool dedicated) { +BlockRegion *RegionAllocator::create_block_region(void *user_context, const MemoryRequest &request) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Creating block region request (" << "user_context=" << (void *)(user_context) << " " - << "offset=" << (uint32_t)offset << " " - << "size=" << (uint32_t)size << " " - << "alignment=" << (uint32_t)properties.alignment << " " - << "dedicated=" << (dedicated ? "true" : "false") << " " - << "usage=" << halide_memory_usage_name(properties.usage) << " " - << "caching=" << halide_memory_caching_name(properties.caching) << " " - << "visibility=" << halide_memory_visibility_name(properties.visibility) << ") ..."; -#endif - size_t actual_alignment = conform_alignment(properties.alignment, block->memory.properties.alignment); - size_t actual_size = conform_size(offset, size, actual_alignment, block->memory.properties.nearest_multiple); - size_t actual_offset = aligned_offset(offset, actual_alignment); - - if (actual_size == 0) { - error(user_context) << "RegionAllocator: Failed to allocate new block region ... region size was zero!\n"; + << "offset=" << (uint32_t)request.offset << " " + << "size=" << (uint32_t)request.size << " " + << "alignment=" << (uint32_t)request.properties.alignment << " " + << "dedicated=" << (request.dedicated ? "true" : "false") << " " + << "usage=" << halide_memory_usage_name(request.properties.usage) << " " + << "caching=" << halide_memory_caching_name(request.properties.caching) << " " + << "visibility=" << halide_memory_visibility_name(request.properties.visibility) << ") ..."; +#endif + + MemoryRequest region_request = request; + int error_code = conform(user_context, ®ion_request); + if (error_code) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "RegionAllocator: Failed to conform request for new block region!\n"; +#endif + return nullptr; + } + + if (region_request.size == 0) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "RegionAllocator: Failed to allocate new block region ... region size was zero!\n"; +#endif return nullptr; } BlockRegion *block_region = static_cast(arena->reserve(user_context, true)); if (block_region == nullptr) { - error(user_context) << "RegionAllocator: Failed to allocate new block region!\n"; +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "RegionAllocator: Failed to allocate new block region!\n"; +#endif return nullptr; } block_region->memory.handle = nullptr; - block_region->memory.offset = actual_offset; - block_region->memory.size = actual_size; - block_region->memory.properties = properties; - block_region->memory.dedicated = dedicated; + block_region->memory.offset = region_request.offset; + block_region->memory.size = region_request.size; + block_region->memory.properties = region_request.properties; + block_region->memory.dedicated = region_request.dedicated; block_region->status = AllocationStatus::Available; block_region->block_ptr = block; block_region->usage_count = 0; @@ -669,6 +693,8 @@ bool RegionAllocator::collect(void *user_context) { uint32_t collected_count = 0; uint32_t remaining_count = 0; + uint64_t available_bytes = 0; + uint64_t scanned_bytes = 0; uint64_t reserved = block->reserved; debug(user_context) << " collecting unused regions (" << "block_ptr=" << (void *)block << " " @@ -679,6 +705,8 @@ bool RegionAllocator::collect(void *user_context) { bool has_collected = false; BlockRegion *block_region = block->regions; while (block_region != nullptr) { +#ifdef DEBUG_RUNTIME_INTERNAL + scanned_bytes += block_region->memory.size; debug(user_context) << " checking region (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " @@ -687,6 +715,7 @@ bool RegionAllocator::collect(void *user_context) { << "memory_size=" << (uint32_t)(block_region->memory.size) << " " << "block_reserved=" << (uint32_t)block->reserved << " " << ")"; +#endif if (can_coalesce(block_region)) { #ifdef DEBUG_RUNTIME_INTERNAL @@ -705,6 +734,9 @@ bool RegionAllocator::collect(void *user_context) { remaining_count++; #endif } +#ifdef DEBUG_RUNTIME_INTERNAL + available_bytes += is_available(block_region) ? block_region->memory.size : 0; +#endif if (is_last_block_region(user_context, block_region)) { break; } @@ -715,6 +747,8 @@ bool RegionAllocator::collect(void *user_context) { << "block_ptr=" << (void *)block << " " << "total_count=" << (uint32_t)(collected_count + remaining_count) << " " << "block_reserved=" << (uint32_t)(block->reserved) << " " + << "scanned_bytes=" << (uint32_t)(scanned_bytes) << " " + << "available_bytes=" << (uint32_t)(available_bytes) << " " << ")"; #endif diff --git a/src/runtime/vulkan_memory.h b/src/runtime/vulkan_memory.h index 96535f3446ba..055fbef72277 100644 --- a/src/runtime/vulkan_memory.h +++ b/src/runtime/vulkan_memory.h @@ -58,11 +58,12 @@ class VulkanMemoryAllocator { static int destroy(void *user_context, VulkanMemoryAllocator *allocator); // Public interface methods - MemoryRegion *reserve(void *user_context, MemoryRequest &request); - int release(void *user_context, MemoryRegion *region); //< unmark and cache the region for reuse - int reclaim(void *user_context, MemoryRegion *region); //< free the region and consolidate - int retain(void *user_context, MemoryRegion *region); //< retain the region and increase its use count - bool collect(void *user_context); //< returns true if any blocks were removed + MemoryRegion *reserve(void *user_context, const MemoryRequest &request); + int conform(void *user_context, MemoryRequest *request); //< conforms the given memory request into one that can be allocated + int release(void *user_context, MemoryRegion *region); //< unmark and cache the region for reuse + int reclaim(void *user_context, MemoryRegion *region); //< free the region and consolidate + int retain(void *user_context, MemoryRegion *region); //< retain the region and increase its use count + bool collect(void *user_context); //< returns true if any blocks were removed int release(void *user_context); int destroy(void *user_context); @@ -86,9 +87,11 @@ class VulkanMemoryAllocator { static int allocate_block(void *instance_ptr, MemoryBlock *block); static int deallocate_block(void *instance_ptr, MemoryBlock *block); + static int conform_block_request(void *instance_ptr, MemoryRequest *request); static int allocate_region(void *instance_ptr, MemoryRegion *region); static int deallocate_region(void *instance_ptr, MemoryRegion *region); + static int conform_region_request(void *instance_ptr, MemoryRequest *request); size_t bytes_allocated_for_blocks() const; size_t blocks_allocated() const; @@ -113,6 +116,8 @@ class VulkanMemoryAllocator { MemoryProperties properties, uint32_t required_flags) const; + int lookup_requirements(void *user_context, size_t size, uint32_t usage_flags, VkMemoryRequirements *memory_requirements); + size_t block_byte_count = 0; size_t block_count = 0; size_t region_byte_count = 0; @@ -180,8 +185,8 @@ int VulkanMemoryAllocator::initialize(void *user_context, block_byte_count = 0; BlockAllocator::MemoryAllocators allocators; allocators.system = system_allocator; - allocators.block = {VulkanMemoryAllocator::allocate_block, VulkanMemoryAllocator::deallocate_block}; - allocators.region = {VulkanMemoryAllocator::allocate_region, VulkanMemoryAllocator::deallocate_region}; + allocators.block = {VulkanMemoryAllocator::allocate_block, VulkanMemoryAllocator::deallocate_block, VulkanMemoryAllocator::conform_block_request}; + allocators.region = {VulkanMemoryAllocator::allocate_region, VulkanMemoryAllocator::deallocate_region, VulkanMemoryAllocator::conform_region_request}; BlockAllocator::Config block_allocator_config = {0}; block_allocator_config.maximum_pool_size = cfg.maximum_pool_size; block_allocator_config.maximum_block_count = cfg.maximum_block_count; @@ -202,7 +207,7 @@ int VulkanMemoryAllocator::initialize(void *user_context, return halide_error_code_success; } -MemoryRegion *VulkanMemoryAllocator::reserve(void *user_context, MemoryRequest &request) { +MemoryRegion *VulkanMemoryAllocator::reserve(void *user_context, const MemoryRequest &request) { #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: Reserving memory (" << "user_context=" << user_context << " " @@ -272,6 +277,7 @@ void *VulkanMemoryAllocator::map(void *user_context, MemoryRegion *region) { error(user_context) << "VulkanMemoryAllocator: Unable to map region! Invalid memory range !\n"; return nullptr; } +#if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: MapMemory (" << "user_context=" << user_context << "\n" << " region_size=" << (uint32_t)region->size << "\n" @@ -279,8 +285,8 @@ void *VulkanMemoryAllocator::map(void *user_context, MemoryRegion *region) { << " region_range.head_offset=" << (uint32_t)region->range.head_offset << "\n" << " region_range.tail_offset=" << (uint32_t)region->range.tail_offset << "\n" << " memory_offset=" << (uint32_t)memory_offset << "\n" - << " memory_size=" << (uint32_t)memory_size << ") ...\n"; - + << " memory_size=" << (uint32_t)memory_size << "\n)\n"; +#endif VkResult result = vkMapMemory(device, *device_memory, memory_offset, memory_size, 0, (void **)(&mapped_ptr)); if (result != VK_SUCCESS) { error(user_context) << "VulkanMemoryAllocator: Mapping region failed! vkMapMemory returned error code: " << vk_get_error_name(result) << "\n"; @@ -528,6 +534,79 @@ VulkanMemoryAllocator::default_config() { } // -- +int VulkanMemoryAllocator::lookup_requirements(void *user_context, size_t size, uint32_t usage_flags, VkMemoryRequirements *memory_requirements) { +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Looking up requirements (" + << "user_context=" << user_context << " " + << "size=" << (uint32_t)block->size << ", " + << "usage_flags=" << usage_flags << ") ... \n"; +#endif + VkBufferCreateInfo create_info = { + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // struct type + nullptr, // struct extending this + 0, // create flags + size, // buffer size (in bytes) + usage_flags, // buffer usage flags + VK_SHARING_MODE_EXCLUSIVE, // sharing mode + 0, nullptr}; + + // Create a buffer to determine alignment requirements + VkBuffer buffer = {0}; + VkResult result = vkCreateBuffer(this->device, &create_info, this->alloc_callbacks, &buffer); + if (result != VK_SUCCESS) { +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Failed to create buffer to find requirements!\n\t" + << "vkCreateBuffer returned: " << vk_get_error_name(result) << "\n"; +#endif + return halide_error_code_device_malloc_failed; + } + + vkGetBufferMemoryRequirements(this->device, buffer, memory_requirements); + vkDestroyBuffer(this->device, buffer, this->alloc_callbacks); + return halide_error_code_success; +} + +int VulkanMemoryAllocator::conform_block_request(void *instance_ptr, MemoryRequest *request) { + + VulkanMemoryAllocator *instance = reinterpret_cast(instance_ptr); + if (instance == nullptr) { + return halide_error_code_internal_error; + } + + void *user_context = instance->owner_context; +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Conforming block request (" + << "user_context=" << user_context << " " + << "request=" << (void *)(request) << ") ... \n"; +#endif + + if ((instance->device == nullptr) || (instance->physical_device == nullptr)) { + error(user_context) << "VulkanRegionAllocator: Unable to conform block request! Invalid device handle!\n"; + return halide_error_code_internal_error; + } + + VkMemoryRequirements memory_requirements = {0}; + uint32_t usage_flags = instance->select_memory_usage(user_context, request->properties); + int error_code = instance->lookup_requirements(user_context, request->size, usage_flags, &memory_requirements); + if (error_code != halide_error_code_success) { + error(user_context) << "VulkanRegionAllocator: Failed to conform block request! Unable to lookup requirements!\n"; + return error_code; + } + +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Block allocated (" + << "size=" << (uint32_t)request->size << ", " + << "required_alignment=" << (uint32_t)memory_requirements.alignment << ", " + << "required_size=" << (uint32_t)memory_requirements.size << ", " + << "uniform_buffer_offset_alignment=" << (uint32_t)instance->physical_device_limits.minUniformBufferOffsetAlignment << ", " + << "storage_buffer_offset_alignment=" << (uint32_t)instance->physical_device_limits.minStorageBufferOffsetAlignment << ", " + << "dedicated=" << (request->dedicated ? "true" : "false") << ")\n"; +#endif + + request->size = memory_requirements.size; + request->properties.alignment = memory_requirements.alignment; + return halide_error_code_success; +} int VulkanMemoryAllocator::allocate_block(void *instance_ptr, MemoryBlock *block) { VulkanMemoryAllocator *instance = reinterpret_cast(instance_ptr); @@ -587,53 +666,6 @@ int VulkanMemoryAllocator::allocate_block(void *instance_ptr, MemoryBlock *block debug(nullptr) << "vkAllocateMemory: Allocated memory for device region (" << (uint64_t)block->size << " bytes) ...\n"; #endif - uint32_t usage_flags = instance->select_memory_usage(user_context, block->properties); - - VkBufferCreateInfo create_info = { - VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // struct type - nullptr, // struct extending this - 0, // create flags - sizeof(uint32_t), // buffer size (in bytes) - usage_flags, // buffer usage flags - VK_SHARING_MODE_EXCLUSIVE, // sharing mode - 0, nullptr}; - - // Create a buffer to determine alignment requirements - VkBuffer buffer = {0}; - result = vkCreateBuffer(instance->device, &create_info, instance->alloc_callbacks, &buffer); - if (result != VK_SUCCESS) { - debug(nullptr) << "VulkanMemoryAllocator: Failed to create buffer!\n\t" - << "vkCreateBuffer returned: " << vk_get_error_name(result) << "\n"; - return halide_error_code_device_malloc_failed; - } - - VkMemoryRequirements memory_requirements = {0}; - vkGetBufferMemoryRequirements(instance->device, buffer, &memory_requirements); - vkDestroyBuffer(instance->device, buffer, instance->alloc_callbacks); - -#if defined(HL_VK_DEBUG_MEM) - debug(nullptr) << "VulkanMemoryAllocator: Block allocated (" - << "size=" << (uint32_t)block->size << ", " - << "required_alignment=" << (uint32_t)memory_requirements.alignment << ", " - << "required_size=" << (uint32_t)memory_requirements.size << ", " - << "uniform_buffer_offset_alignment=" << (uint32_t)instance->physical_device_limits.minUniformBufferOffsetAlignment << ", " - << "storage_buffer_offset_alignment=" << (uint32_t)instance->physical_device_limits.minStorageBufferOffsetAlignment << ", " - << "dedicated=" << (block->dedicated ? "true" : "false") << ")\n"; -#endif - - // Enforce any alignment constrainst reported by the device limits for each usage type - if (usage_flags & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) { - block->properties.alignment = instance->physical_device_limits.minStorageBufferOffsetAlignment; - } else if (usage_flags & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { - block->properties.alignment = instance->physical_device_limits.minUniformBufferOffsetAlignment; - } - // Some drivers appear to report a buffer alignment constraint (regardless of usage) that can be larger than either of the above - if (memory_requirements.alignment > block->properties.alignment) { - block->properties.alignment = memory_requirements.alignment; - } - if (memory_requirements.alignment > block->properties.nearest_multiple) { - block->properties.nearest_multiple = memory_requirements.alignment; - } block->handle = (void *)device_memory; instance->block_byte_count += block->size; instance->block_count++; @@ -814,6 +846,98 @@ uint32_t VulkanMemoryAllocator::select_memory_type(void *user_context, // -- +int VulkanMemoryAllocator::conform(void *user_context, MemoryRequest *request) { + + // NOTE: Vulkan will only allow us to bind device memory to a buffer if the memory requirements are met. + // So now we have to check those (on every allocation) and potentially recreate the buffer if the requirements + // don't match the requested VkBuffer's properties. Note that this is the internal storage for the driver, + // whose size may be required to larger than our requested size (even though we will only ever touch the + // size of the region we're managing as within our block) + + VkMemoryRequirements memory_requirements = {0}; + uint32_t usage_flags = select_memory_usage(user_context, request->properties); + int error_code = lookup_requirements(user_context, request->size, usage_flags, &memory_requirements); + if (error_code != halide_error_code_success) { + error(user_context) << "VulkanRegionAllocator: Failed to conform block request! Unable to lookup requirements!\n"; + return error_code; + } + +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Buffer requirements (" + << "requested_size=" << (uint32_t)region->size << ", " + << "required_alignment=" << (uint32_t)memory_requirements.alignment << ", " + << "required_size=" << (uint32_t)memory_requirements.size << ")\n"; +#endif + + // Enforce any alignment constraints reported by the device limits for each usage type + if (usage_flags & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) { + if ((request->alignment % this->physical_device_limits.minStorageBufferOffsetAlignment) != 0) { + request->alignment = this->physical_device_limits.minStorageBufferOffsetAlignment; + } + } else if (usage_flags & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { + if ((request->alignment % this->physical_device_limits.minUniformBufferOffsetAlignment) != 0) { + request->alignment = this->physical_device_limits.minUniformBufferOffsetAlignment; + } + } + + // Ensure the request ends on an aligned address + if (request->alignment > config.nearest_multiple) { + request->properties.nearest_multiple = request->alignment; + } + + size_t actual_alignment = conform_alignment(request->alignment, memory_requirements.alignment); + size_t actual_offset = aligned_offset(request->offset, actual_alignment); + size_t actual_size = conform_size(actual_offset, memory_requirements.size, actual_alignment, request->properties.nearest_multiple); + +#if defined(HL_VK_DEBUG_MEM) + if ((request->size != actual_size) || (request->alignment != actual_alignment) || (request->offset != actual_offset)) { + debug(nullptr) << "VulkanMemoryAllocator: Adjusting request to match requirements (\n" + << " size = " << (uint64_t)request->size << " => " << (uint64_t)actual_size << ",\n" + << " alignment = " << (uint64_t)request->alignment << " => " << (uint64_t)actual_alignment << ",\n" + << " offset = " << (uint64_t)request->offset << " => " << (uint64_t)actual_offset << ",\n" + << " required.size = " << (uint64_t)memory_requirements.size << ",\n" + << " required.alignment = " << (uint64_t)memory_requirements.alignment << "\n)\n"; + } +#endif + request->size = actual_size; + request->alignment = actual_alignment; + request->offset = actual_offset; + + return halide_error_code_success; +} + +int VulkanMemoryAllocator::conform_region_request(void *instance_ptr, MemoryRequest *request) { + + VulkanMemoryAllocator *instance = reinterpret_cast(instance_ptr); + if (instance == nullptr) { + return halide_error_code_internal_error; + } + + void *user_context = instance->owner_context; +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Conforming region request (" + << "user_context=" << user_context << " " + << "request=" << (void *)(region) << ") ... \n"; +#endif + + if ((instance->device == nullptr) || (instance->physical_device == nullptr)) { + error(user_context) << "VulkanRegionAllocator: Unable to conform region request! Invalid device handle!\n"; + return halide_error_code_internal_error; + } + +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanRegionAllocator: Conforming region request (" + << "size=" << (uint32_t)request->size << ", " + << "offset=" << (uint32_t)request->offset << ", " + << "dedicated=" << (request->dedicated ? "true" : "false") << " " + << "usage=" << halide_memory_usage_name(request->properties.usage) << " " + << "caching=" << halide_memory_caching_name(request->properties.caching) << " " + << "visibility=" << halide_memory_visibility_name(request->properties.visibility) << ")\n"; +#endif + + return instance->conform(user_context, request); +} + int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *region) { VulkanMemoryAllocator *instance = reinterpret_cast(instance_ptr); @@ -890,7 +1014,8 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg if (memory_requirements.size > region->size) { vkDestroyBuffer(instance->device, *buffer, instance->alloc_callbacks); #ifdef DEBUG_RUNTIME - debug(nullptr) << "VulkanMemoryAllocator: Reallocating buffer to match required size (" << (uint64_t)memory_requirements.size << " bytes) ...\n"; + debug(nullptr) << "VulkanMemoryAllocator: Reallocating buffer to match required size (" + << (uint64_t)region->size << " => " << (uint64_t)memory_requirements.size << " bytes) ...\n"; #endif create_info.size = memory_requirements.size; VkResult result = vkCreateBuffer(instance->device, &create_info, instance->alloc_callbacks, buffer); diff --git a/test/runtime/block_allocator.cpp b/test/runtime/block_allocator.cpp index b2190f63b592..26ce8066e118 100644 --- a/test/runtime/block_allocator.cpp +++ b/test/runtime/block_allocator.cpp @@ -1,3 +1,7 @@ +// NOTE: Uncomment the following two defines to enable debug output +// #define DEBUG_RUNTIME +// #define DEBUG_RUNTIME_INTERNAL + #include "HalideRuntime.h" #include "common.h" @@ -39,6 +43,17 @@ int deallocate_block(void *user_context, MemoryBlock *block) { return halide_error_code_success; } +int conform_block(void *user_context, MemoryRequest *request) { + + debug(user_context) << "Test : conform_block (" + << "request_size=" << int32_t(request->size) << " " + << "request_offset=" << int32_t(request->offset) << " " + << "request_alignment=" << int32_t(request->alignment) << " " + << ") ..."; + + return halide_error_code_success; +} + int allocate_region(void *user_context, MemoryRegion *region) { region->handle = (void *)1; allocated_region_memory += region->size; @@ -65,17 +80,38 @@ int deallocate_region(void *user_context, MemoryRegion *region) { return halide_error_code_success; } +int conform_region(void *user_context, MemoryRequest *request) { + size_t actual_alignment = conform_alignment(request->alignment, 0); + size_t actual_offset = aligned_offset(request->offset, actual_alignment); + size_t actual_size = conform_size(actual_offset, request->size, actual_alignment, actual_alignment); + + debug(user_context) << "Test : conform_region (\n " + << "request_size=" << int32_t(request->size) << "\n " + << "request_offset=" << int32_t(request->offset) << "\n " + << "request_alignment=" << int32_t(request->alignment) << "\n " + << "actual_size=" << int32_t(actual_size) << "\n " + << "actual_offset=" << int32_t(actual_offset) << "\n " + << "actual_alignment=" << int32_t(actual_alignment) << "\n" + << ") ..."; + + request->alignment = actual_alignment; + request->offset = actual_offset; + request->size = actual_size; + return halide_error_code_success; +} + } // end namespace int main(int argc, char **argv) { void *user_context = (void *)1; SystemMemoryAllocatorFns system_allocator = {allocate_system, deallocate_system}; - MemoryBlockAllocatorFns block_allocator = {allocate_block, deallocate_block}; - MemoryRegionAllocatorFns region_allocator = {allocate_region, deallocate_region}; // test region allocator class interface { + // Use custom conform allocation request callbacks + MemoryRegionAllocatorFns region_allocator = {allocate_region, deallocate_region, conform_region}; + // Manually create a block resource and allocate memory size_t block_size = 4 * 1024 * 1024; BlockResource block_resource = {}; @@ -164,8 +200,104 @@ int main(int argc, char **argv) { HALIDE_CHECK(user_context, get_allocated_system_memory() == 0); } + // test region allocator conform request + { + // Use default conform allocation request callbacks + MemoryRegionAllocatorFns region_allocator = {allocate_region, deallocate_region, nullptr}; + + // Manually create a block resource and allocate memory + size_t block_size = 4 * 1024 * 1024; + size_t padded_size = 32; + BlockResource block_resource = {}; + MemoryBlock *memory_block = &(block_resource.memory); + memory_block->size = block_size; + memory_block->properties.nearest_multiple = padded_size; + allocate_block(user_context, memory_block); + + // Create a region allocator to manage the block resource + RegionAllocator::MemoryAllocators allocators = {system_allocator, region_allocator}; + RegionAllocator *instance = RegionAllocator::create(user_context, &block_resource, allocators); + + // test zero size request + MemoryRequest request = {0}; + instance->conform(user_context, &request); + + debug(user_context) << "Test : region_allocator::conform (" + << "request.size=" << int32_t(request.size) << " " + << "request.alignment=" << int32_t(request.alignment) << " " + << ") ..."; + + halide_abort_if_false(user_context, request.size == size_t(0)); + + // test round up size to alignment + request.size = 1; + request.alignment = 0; + request.properties.alignment = 4; + instance->conform(user_context, &request); + halide_abort_if_false(user_context, request.size != 4); + halide_abort_if_false(user_context, request.alignment != 4); + + size_t nm = padded_size; + for (uint32_t sz = 1; sz < 256; ++sz) { + for (uint32_t a = 2; a < sz; a *= 2) { + request.size = sz; + request.alignment = a; + instance->conform(user_context, &request); + + debug(user_context) << "Test : region_allocator::conform (" + << "request.size=(" << sz << " => " << int32_t(request.size) << ") " + << "request.alignment=(" << a << " => " << int32_t(request.alignment) << ") " + << "..."; + + halide_abort_if_false(user_context, request.size == max(nm, (((sz + nm - 1) / nm) * nm))); + halide_abort_if_false(user_context, request.alignment == a); + } + } + + // test round up size and offset to alignment + request.size = 1; + request.offset = 1; + request.alignment = 32; + instance->conform(user_context, &request); + halide_abort_if_false(user_context, request.size == 32); + halide_abort_if_false(user_context, request.offset == 32); + halide_abort_if_false(user_context, request.alignment == 32); + + for (uint32_t sz = 1; sz < 256; ++sz) { + for (uint32_t os = 1; os < sz; ++os) { + for (uint32_t a = 2; a < sz; a *= 2) { + request.size = sz; + request.offset = os; + request.alignment = a; + instance->conform(user_context, &request); + + debug(user_context) << "Test : region_allocator::conform (" + << "request.size=(" << sz << " => " << int32_t(request.size) << ") " + << "request.offset=(" << os << " => " << int32_t(request.offset) << ") " + << "request.alignment=(" << a << " => " << int32_t(request.alignment) << ") " + << "..."; + + halide_abort_if_false(user_context, request.size == max(nm, (((sz + nm - 1) / nm) * nm))); + halide_abort_if_false(user_context, request.offset == aligned_offset(os, a)); + halide_abort_if_false(user_context, request.alignment == a); + } + } + } + + instance->destroy(user_context); + deallocate_block(user_context, memory_block); + HALIDE_CHECK(user_context, allocated_block_memory == 0); + HALIDE_CHECK(user_context, allocated_region_memory == 0); + + RegionAllocator::destroy(user_context, instance); + HALIDE_CHECK(user_context, get_allocated_system_memory() == 0); + } + // test region allocator nearest_multiple padding { + // Use default conform allocation request callbacks + MemoryRegionAllocatorFns region_allocator = {allocate_region, deallocate_region, nullptr}; + // Manually create a block resource and allocate memory size_t block_size = 4 * 1024 * 1024; size_t padded_size = 32; @@ -245,6 +377,9 @@ int main(int argc, char **argv) { BlockAllocator::Config config = {0}; config.minimum_block_size = 1024; + // Use default conform allocation request callbacks + MemoryBlockAllocatorFns block_allocator = {allocate_block, deallocate_block, nullptr}; + MemoryRegionAllocatorFns region_allocator = {allocate_region, deallocate_region, nullptr}; BlockAllocator::MemoryAllocators allocators = {system_allocator, block_allocator, region_allocator}; BlockAllocator *instance = BlockAllocator::create(user_context, config, allocators); @@ -296,11 +431,58 @@ int main(int argc, char **argv) { HALIDE_CHECK(user_context, get_allocated_system_memory() == 0); } + // test conform request + { + uint32_t mbs = 1024; // min block size + BlockAllocator::Config config = {0}; + config.minimum_block_size = mbs; + + // Use default conform allocation request callbacks + MemoryBlockAllocatorFns block_allocator = {allocate_block, deallocate_block, nullptr}; + MemoryRegionAllocatorFns region_allocator = {allocate_region, deallocate_region, nullptr}; + BlockAllocator::MemoryAllocators allocators = {system_allocator, block_allocator, region_allocator}; + BlockAllocator *instance = BlockAllocator::create(user_context, config, allocators); + + MemoryRequest request = {0}; + instance->conform(user_context, &request); + halide_abort_if_false(user_context, request.size != 0); + + // test round up size to alignment + request.size = 1; + request.alignment = 0; + request.properties.alignment = 4; + instance->conform(user_context, &request); + halide_abort_if_false(user_context, request.size != 4); + halide_abort_if_false(user_context, request.alignment != 4); + + for (uint32_t sz = 1; sz < 256; ++sz) { + for (uint32_t a = 2; a < sz; a *= 2) { + request.size = sz; + request.alignment = a; + instance->conform(user_context, &request); + + debug(user_context) << "Test : block_allocator::conform (" + << "request.size=(" << sz << " => " << int32_t(request.size) << ") " + << "request.alignment=(" << a << " => " << int32_t(request.alignment) << ") " + << "..."; + + halide_abort_if_false(user_context, request.size == max(mbs, (((sz + a - 1) / a) * a))); + halide_abort_if_false(user_context, request.alignment == a); + } + } + + BlockAllocator::destroy(user_context, instance); + HALIDE_CHECK(user_context, get_allocated_system_memory() == 0); + } + // allocation stress test { BlockAllocator::Config config = {0}; config.minimum_block_size = 1024; + // Use default conform allocation request callbacks + MemoryBlockAllocatorFns block_allocator = {allocate_block, deallocate_block, nullptr}; + MemoryRegionAllocatorFns region_allocator = {allocate_region, deallocate_region, nullptr}; BlockAllocator::MemoryAllocators allocators = {system_allocator, block_allocator, region_allocator}; BlockAllocator *instance = BlockAllocator::create(user_context, config, allocators); @@ -340,6 +522,9 @@ int main(int argc, char **argv) { BlockAllocator::Config config = {0}; config.minimum_block_size = 1024; + // Use default conform allocation request callbacks + MemoryBlockAllocatorFns block_allocator = {allocate_block, deallocate_block, nullptr}; + MemoryRegionAllocatorFns region_allocator = {allocate_region, deallocate_region, nullptr}; BlockAllocator::MemoryAllocators allocators = {system_allocator, block_allocator, region_allocator}; BlockAllocator *instance = BlockAllocator::create(user_context, config, allocators);