Skip to content

Commit

Permalink
fixup! fixup! fixup! Fix barrier issues in computecloth
Browse files Browse the repository at this point in the history
  • Loading branch information
jherico committed Feb 5, 2024
1 parent e1466f9 commit 6584315
Showing 1 changed file with 67 additions and 71 deletions.
138 changes: 67 additions & 71 deletions examples/computecloth/computecloth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class VulkanExample : public VulkanExampleBase
struct Cloth {
glm::uvec2 gridsize{ 60, 60 };
glm::vec2 size{ 5.0f, 5.0f };
uint32_t numParticles{ gridsize.x * gridsize.y };
} cloth;

// We put the resource "types" into structs to make this sample easier to understand
Expand Down Expand Up @@ -370,10 +371,8 @@ class VulkanExample : public VulkanExampleBase
}

// Setup and fill the shader storage buffers containing the particles
// These buffers are used as shader storage buffers in the compute shader (to update them) and as vertex input in the vertex shader (to display them)
void prepareStorageBuffers()
{
std::vector<Particle> particleBuffer(cloth.gridsize.x * cloth.gridsize.y);
std::vector<Particle> populateParticleBuffer() {
std::vector<Particle> particleBuffer{ cloth.numParticles };

float dx = cloth.size.x / (cloth.gridsize.x - 1);
float dy = cloth.size.y / (cloth.gridsize.y - 1);
Expand All @@ -389,21 +388,14 @@ class VulkanExample : public VulkanExampleBase
particleBuffer[i + j * cloth.gridsize.y].uv = glm::vec4(1.0f - du * i, dv * j, 0.0f, 0.0f);
}
}
return particleBuffer;
}

VkDeviceSize storageBufferSize = particleBuffer.size() * sizeof(Particle);

// Staging
// SSBO won't be changed on the host after upload so copy to device local memory

vks::Buffer stagingBuffer;

vulkanDevice->createBuffer(
VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&stagingBuffer,
storageBufferSize,
particleBuffer.data());

// Allocate the shader storage buffers containing the particles (but do not populate them yet)
// The "input" buffer is used on the compute queue only, while the "output" buffer is
// used as shader storage buffers in the compute shader (to update them) and as vertex input in the vertex shader (to display them)
void prepareStorageBuffers() {
VkDeviceSize storageBufferSize = cloth.gridsize.x * cloth.gridsize.y * sizeof(Particle);
// storageBuffers.input is used as an SSBO and only on the compute queue as an intermediate location for iterative computation
vulkanDevice->createBuffer(
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
Expand All @@ -418,37 +410,64 @@ class VulkanExample : public VulkanExampleBase
&storageBuffers.output,
storageBufferSize);

// Copy from staging buffer
VkCommandBuffer copyCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true);
VkBufferCopy copyRegion = {};
copyRegion.size = storageBufferSize;
vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, storageBuffers.input.buffer, 1, &copyRegion);
vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, storageBuffers.output.buffer, 1, &copyRegion);

// Add an initial release barrier to the graphics queue,
// so that when the compute command buffer executes for the first time
// it doesn't complain about a lack of a corresponding "release" to its "acquire"
if (dedicatedComputeQueue) {
std::array<VkBufferMemoryBarrier, 2> bufferBarriers;
bufferBarriers[0] = vks::initializers::bufferMemoryBarrier();
bufferBarriers[0].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
bufferBarriers[0].dstAccessMask = 0;
bufferBarriers[0].srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics;
bufferBarriers[0].dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
bufferBarriers[0].size = VK_WHOLE_SIZE;
bufferBarriers[0].buffer = storageBuffers.input.buffer;
bufferBarriers[1] = bufferBarriers[0];
bufferBarriers[1].buffer = storageBuffers.output.buffer;
VkCommandBuffer releaseCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true);

vkCmdPipelineBarrier(copyCmd,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VkBufferMemoryBarrier bufferBarrier;
bufferBarrier = vks::initializers::bufferMemoryBarrier();
bufferBarrier.srcAccessMask = 0;
bufferBarrier.dstAccessMask = 0;
bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics;
bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
bufferBarrier.size = VK_WHOLE_SIZE;
bufferBarrier.buffer = storageBuffers.output.buffer;
// This implicitly takes ownership of the output buffer by the graphics queue and sends a release barrier that
// will pair up with the first compute frame acquire barrier
vkCmdPipelineBarrier(
releaseCmd,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
VK_FLAGS_NONE,
0, nullptr,
2, bufferBarriers.data(),
1, &bufferBarrier,
0, nullptr);
vulkanDevice->flushCommandBuffer(releaseCmd, queue, true);
}
vulkanDevice->flushCommandBuffer(copyCmd, queue, true);

// Two indices per face plus a restart primitive at the end
uint32_t indexRowCount = (cloth.gridsize.x * 2) + 1;
uint32_t indexBufferSize = indexRowCount * (cloth.gridsize.y - 1) * sizeof(uint32_t);

vulkanDevice->createBuffer(
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
&graphics.indices,
indexBufferSize);
}

void populateBuffers() {
std::vector<Particle> particleBuffer = populateParticleBuffer();

// Staging
// SSBO won't be changed on the host after upload so copy to device local memory
assert(storageBuffers.input.size == storageBuffers.output.size);
VkDeviceSize storageBufferSize = storageBuffers.input.size;

vks::Buffer stagingBuffer;

vulkanDevice->createBuffer(
VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&stagingBuffer,
storageBufferSize,
particleBuffer.data());

// Copy from staging buffer to the input buffer (on the compute queue, so that it takes ownership of the input buffer)
VkCommandBuffer copyCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, compute.commandPool, true);
VkBufferCopy copyRegion = {};
copyRegion.size = storageBufferSize;
vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, storageBuffers.input.buffer, 1, &copyRegion);
vulkanDevice->flushCommandBuffer(copyCmd, compute.queue, compute.commandPool, true);

stagingBuffer.destroy();

Expand All @@ -462,26 +481,22 @@ class VulkanExample : public VulkanExampleBase
// Primitive restart (signaled by special value 0xFFFFFFFF)
indices.push_back(0xFFFFFFFF);
}
uint32_t indexBufferSize = static_cast<uint32_t>(indices.size()) * sizeof(uint32_t);

assert(graphics.indices.size == static_cast<uint32_t>(indices.size()) * sizeof(uint32_t));
indexCount = static_cast<uint32_t>(indices.size());

vulkanDevice->createBuffer(
VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&stagingBuffer,
indexBufferSize,
graphics.indices.size,
indices.data());

vulkanDevice->createBuffer(
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
&graphics.indices,
indexBufferSize);

// Copy from staging buffer
copyCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true);
copyRegion = {};
copyRegion.size = indexBufferSize;
copyRegion.size = graphics.indices.size;
vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, graphics.indices.buffer, 1, &copyRegion);
vulkanDevice->flushCommandBuffer(copyCmd, queue, true);

Expand Down Expand Up @@ -664,27 +679,6 @@ class VulkanExample : public VulkanExampleBase
VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &compute.semaphores.ready));
VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &compute.semaphores.complete));

// Acquire the input buffer from the graphics queue explicitly here. This is the only time we need to do this
// as the input buffer doesn't move between queues after initialization. Note, this wouldn't be necessary
// if we created the input buffer on the compute queue in the first place, but the order of initialization
// means both storage buffers are created on the graphics queue.
// There are alternative paths, but all would be complicated by the fact that we need the input buffer
// populated with the initial data, and the output buffer to be initially owned by the graphics queue with a
// pending release barrier to the compute queue, to line up with the first compute dispatch.
VkCommandBuffer acquireCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, compute.commandPool, true);

VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier();
bufferBarrier.srcAccessMask = 0;
bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics;
bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
bufferBarrier.size = VK_WHOLE_SIZE;
bufferBarrier.buffer = storageBuffers.input.buffer;
// Explicitly acquire the storageBuffer.input buffer (the output buffer will ping-pong back and forth so it's acquired as part of the main compute command buffer)
vkCmdPipelineBarrier(acquireCmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_FLAGS_NONE, 0, nullptr, 1, &bufferBarrier, 0, nullptr);

vulkanDevice->flushCommandBuffer(acquireCmd, compute.queue, compute.commandPool, true);

// Build a single command buffer containing the compute dispatch commands
buildComputeCommandBuffer();
}
Expand Down Expand Up @@ -781,6 +775,8 @@ class VulkanExample : public VulkanExampleBase
prepareStorageBuffers();
prepareGraphics();
prepareCompute();
// Now that the compute queue exists we can populate all the buffers on their corresponding queues
populateBuffers();
prepared = true;
}

Expand Down

0 comments on commit 6584315

Please sign in to comment.