Skip to content

Commit

Permalink
Backport #8259 to release/17.x (#8270)
Browse files Browse the repository at this point in the history
* [vulkan] Fix Vulkan SIMT mappings for GPU loop vars.  (#8259)

* Fix Vulkan SIMT mappings for GPU loop vars. Previous refactoring
accidentally used the fully qualified var name rather than the
categorized vulkan intrinsic name.

* Avoid formatting the GPU kernel to a string for Vulkan (since it's binary SPIR-V needs to remain intact).

---------

Co-authored-by: Derek Gerstmann <[email protected]>
Co-authored-by: Steven Johnson <[email protected]>

* Update CodeGen_Vulkan_Dev.cpp

* [vulkan] Add conform API methods to memory allocator to fix block allocations (#8130)

* Add conform API methods to block and region allocator classes
Override conform requests for Vulkan memory allocator
Cleanup memory requirement constraints for Vulkan
Add conform test cases to block_allocator runtime test.

* Clang format/tidy pas

* Fix unsigned int comparisons

* Clang format pass

* Fix other unsigned int comparisons

* Fix mismatched template types for max()

* Fix whitespace for clang format

---------

Co-authored-by: Derek Gerstmann <[email protected]>

* Backport fixes for Vulkan in src/runtime/internal for allocations.

---------

Co-authored-by: Derek Gerstmann <[email protected]>
Co-authored-by: Derek Gerstmann <[email protected]>
  • Loading branch information
3 people authored Jun 24, 2024
1 parent 54a7f1d commit c4dbb06
Show file tree
Hide file tree
Showing 8 changed files with 945 additions and 296 deletions.
5 changes: 3 additions & 2 deletions src/CodeGen_C.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1159,8 +1159,9 @@ void CodeGen_C::compile(const Buffer<> &buffer) {
bool is_constant = buffer.dimensions() != 0;

// If it is an GPU source kernel, we would like to see the actual output, not the
// uint8 representation. We use a string literal for this.
if (ends_with(name, "gpu_source_kernels")) {
// uint8 representation. We use a string literal for this. Since the Vulkan backend
// actually generates a SPIR-V binary, keep it as raw data to avoid textual reformatting.
if (ends_with(name, "gpu_source_kernels") && !target.has_feature(Target::Vulkan)) {
stream << "static const char *" << name << "_string = R\"BUFCHARSOURCE(";
stream.write((char *)b.host, num_elems);
stream << ")BUFCHARSOURCE\";\n";
Expand Down
22 changes: 16 additions & 6 deletions src/CodeGen_Vulkan_Dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2514,12 +2514,20 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_workgroup_size(SpvId kernel_func
namespace {

// Locate all the unique GPU variables used as SIMT intrinsics
// This pass is used to identify if LocalInvocationID and/or WorkgroupID
// need to be declared as variables for the entrypoint to the Kernel. Since
// these can only be declared once and their type is vec3, we don't
// care about the specific dims that are mapped to loop variables.
class FindIntrinsicsUsed : public IRVisitor {
using IRVisitor::visit;
void visit(const For *op) override {
if (CodeGen_GPU_Dev::is_gpu_var(op->name)) {

// map the block or thread id name to the SIMT intrinsic definition
auto intrinsic = simt_intrinsic(op->name);
intrinsics_used.insert(intrinsic.first);

// mark the name of the intrinsic being used (without the dimension)
intrinsics_used.insert(intrinsic.first); // name only!
}
op->body.accept(this);
}
Expand Down Expand Up @@ -2555,20 +2563,22 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_entry_point(const Stmt &s, SpvId
s.accept(&find_intrinsics);

SpvFactory::Variables entry_point_variables;
for (const std::string &intrinsic_name : find_intrinsics.intrinsics_used) {
for (const std::string &used_intrinsic : find_intrinsics.intrinsics_used) {

// The builtins are pointers to vec3
// The builtins are pointers to vec3 and can only be declared once per kernel entrypoint
SpvStorageClass storage_class = SpvStorageClassInput;
SpvId intrinsic_type_id = builder.declare_type(Type(Type::UInt, 32, 3));
SpvId intrinsic_ptr_type_id = builder.declare_pointer_type(intrinsic_type_id, storage_class);
const std::string intrinsic_var_name = std::string("k") + std::to_string(kernel_index) + std::string("_") + intrinsic_name;
const std::string intrinsic_var_name = std::string("k") + std::to_string(kernel_index) + std::string("_") + used_intrinsic;
SpvId intrinsic_var_id = builder.declare_global_variable(intrinsic_var_name, intrinsic_ptr_type_id, storage_class);
SpvId intrinsic_loaded_id = builder.reserve_id();
builder.append(SpvFactory::load(intrinsic_type_id, intrinsic_loaded_id, intrinsic_var_id));
symbol_table.push(intrinsic_var_name, {intrinsic_loaded_id, storage_class});

// Annotate that this is the specific builtin
SpvBuiltIn built_in_kind = map_simt_builtin(intrinsic_name);
// Map the used intrinsic name to the specific builtin
SpvBuiltIn built_in_kind = map_simt_builtin(used_intrinsic);

// Add an annotation that indicates this variable is bound to the requested intrinsic
SpvBuilder::Literals annotation_literals = {(uint32_t)built_in_kind};
builder.add_annotation(intrinsic_var_id, SpvDecorationBuiltIn, annotation_literals);

Expand Down
210 changes: 122 additions & 88 deletions src/runtime/internal/block_allocator.h

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/runtime/internal/memory_arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ void *MemoryArena::create_entry(void *user_context, Block *block, uint32_t index
void *entry_ptr = lookup_entry(user_context, block, index);
block->free_index = block->indices[index];
block->status[index] = AllocationStatus::InUse;
#if DEBUG_RUNTIME_INTERNAL
#ifdef DEBUG_RUNTIME_INTERNAL
memset(entry_ptr, 0, config.entry_size);
#endif
return entry_ptr;
Expand Down
6 changes: 5 additions & 1 deletion src/runtime/internal/memory_resources.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ ALWAYS_INLINE bool is_power_of_two_alignment(size_t x) {
// -- Alignment must be power of two!
ALWAYS_INLINE size_t aligned_offset(size_t offset, size_t alignment) {
halide_abort_if_false(nullptr, is_power_of_two_alignment(alignment));
return (offset + (alignment - 1)) & ~(alignment - 1);
return (alignment == 0) ? (offset) : (offset + (alignment - 1)) & ~(alignment - 1);
}

// Returns a suitable alignment such that requested alignment is a suitable
Expand Down Expand Up @@ -202,18 +202,22 @@ struct HalideSystemAllocatorFns {

typedef int (*AllocateBlockFn)(void *, MemoryBlock *);
typedef int (*DeallocateBlockFn)(void *, MemoryBlock *);
typedef int (*ConformBlockRequestFn)(void *, MemoryRequest *);

struct MemoryBlockAllocatorFns {
AllocateBlockFn allocate = nullptr;
DeallocateBlockFn deallocate = nullptr;
ConformBlockRequestFn conform = nullptr;
};

typedef int (*AllocateRegionFn)(void *, MemoryRegion *);
typedef int (*DeallocateRegionFn)(void *, MemoryRegion *);
typedef int (*ConformBlockRegionFn)(void *, MemoryRequest *);

struct MemoryRegionAllocatorFns {
AllocateRegionFn allocate = nullptr;
DeallocateRegionFn deallocate = nullptr;
ConformBlockRegionFn conform = nullptr;
};

// --
Expand Down
Loading

0 comments on commit c4dbb06

Please sign in to comment.