From d78853f7aa6f61a28d2e335b530b32d376748c4c Mon Sep 17 00:00:00 2001 From: Ryp Date: Wed, 16 Aug 2023 12:33:36 +0200 Subject: [PATCH] vulkan: hook HZB to multiple mip views for downsampling Remove previous downsample step --- src/renderer/CMakeLists.txt | 1 + .../shader/copy_to_depth_from_hzb.frag.hlsl | 32 ++++++ .../shader/copy_to_depth_from_hzb.share.hlsl | 18 ++++ src/renderer/shader/hzb_reduce.comp.hlsl | 87 +++++++++-------- src/renderer/shader/hzb_reduce.share.hlsl | 5 +- src/renderer/vulkan/ShaderModules.cpp | 2 + src/renderer/vulkan/ShaderModules.h | 1 + src/renderer/vulkan/renderpass/HZBPass.cpp | 16 +-- src/renderer/vulkan/renderpass/HZBPass.h | 2 +- .../vulkan/renderpass/TestGraphics.cpp | 97 ++++--------------- .../vulkan/renderpass/TiledRasterPass.cpp | 45 +++++---- .../vulkan/renderpass/TiledRasterPass.h | 5 +- 12 files changed, 160 insertions(+), 151 deletions(-) create mode 100644 src/renderer/shader/copy_to_depth_from_hzb.frag.hlsl create mode 100644 src/renderer/shader/copy_to_depth_from_hzb.share.hlsl diff --git a/src/renderer/CMakeLists.txt b/src/renderer/CMakeLists.txt index 99a344c6..ab529630 100644 --- a/src/renderer/CMakeLists.txt +++ b/src/renderer/CMakeLists.txt @@ -145,6 +145,7 @@ endif() set(REAPER_SHADER_DIR ${CMAKE_CURRENT_SOURCE_DIR}/shader) set(REAPER_SHADER_SRCS ${REAPER_SHADER_DIR}/copy_to_depth.frag.hlsl + ${REAPER_SHADER_DIR}/copy_to_depth_from_hzb.frag.hlsl ${REAPER_SHADER_DIR}/debug_geometry/build_cmds.comp.hlsl ${REAPER_SHADER_DIR}/debug_geometry/draw.frag.hlsl ${REAPER_SHADER_DIR}/debug_geometry/draw.vert.hlsl diff --git a/src/renderer/shader/copy_to_depth_from_hzb.frag.hlsl b/src/renderer/shader/copy_to_depth_from_hzb.frag.hlsl new file mode 100644 index 00000000..50807df4 --- /dev/null +++ b/src/renderer/shader/copy_to_depth_from_hzb.frag.hlsl @@ -0,0 +1,32 @@ +#include "lib/base.hlsl" + +#include "copy_to_depth_from_hzb.share.hlsl" + +VK_PUSH_CONSTANT_HELPER(CopyDepthFromHZBPushConstants) consts; + +VK_BINDING(0, 0) Texture2D HZBMip; + +struct PS_INPUT +{ + float4 PositionCS : SV_Position; + float2 PositionUV : TEXCOORD0; +}; + +struct PS_OUTPUT +{ + float depth : SV_Depth; +}; + +void main(in PS_INPUT input, out PS_OUTPUT output) +{ + const float2 depth_min_max_cs = HZBMip.Load(int3(input.PositionCS.xy, 0)); + + if (consts.copy_min) + { + output.depth = depth_min_max_cs.x; + } + else + { + output.depth = depth_min_max_cs.y; + } +} diff --git a/src/renderer/shader/copy_to_depth_from_hzb.share.hlsl b/src/renderer/shader/copy_to_depth_from_hzb.share.hlsl new file mode 100644 index 00000000..e0b2ace4 --- /dev/null +++ b/src/renderer/shader/copy_to_depth_from_hzb.share.hlsl @@ -0,0 +1,18 @@ +//////////////////////////////////////////////////////////////////////////////// +/// Reaper +/// +/// Copyright (c) 2015-2022 Thibault Schueller +/// This file is distributed under the MIT License +//////////////////////////////////////////////////////////////////////////////// + +#ifndef COPY_DEPTH_FROM_HZB_SHARE_INCLUDED +#define COPY_DEPTH_FROM_HZB_SHARE_INCLUDED + +#include "shared_types.hlsl" + +struct CopyDepthFromHZBPushConstants +{ + hlsl_uint copy_min; +}; + +#endif diff --git a/src/renderer/shader/hzb_reduce.comp.hlsl b/src/renderer/shader/hzb_reduce.comp.hlsl index 1e67d81b..135b0bbd 100644 --- a/src/renderer/shader/hzb_reduce.comp.hlsl +++ b/src/renderer/shader/hzb_reduce.comp.hlsl @@ -3,8 +3,6 @@ #include "lib/morton.hlsl" #include "hzb_reduce.share.hlsl" -#define USE_QUAD_INTRINSICS 0 - VK_PUSH_CONSTANT_HELPER(HZBReducePushConstants) consts; VK_BINDING(0, Slot_LinearClampSampler) SamplerState LinearClampSampler; @@ -13,7 +11,7 @@ VK_BINDING(0, Slot_HZB_mips) RWTexture2D HZB_mips[HZBMaxMipCount]; static const uint ThreadCount = HZBReduceThreadCountX * HZBReduceThreadCountY; -groupshared float2 lds_depth_min_max[ThreadCount]; +groupshared float2 lds_depth_min_max[ThreadCount / MinWaveLaneCount]; [numthreads(ThreadCount, 1, 1)] void main(uint3 gtid : SV_GroupThreadID, @@ -24,65 +22,74 @@ void main(uint3 gtid : SV_GroupThreadID, float depth_min = 1.0; float depth_max = 0.0; - const uint2 local_position_ts = gid.xy * uint2(HZBReduceThreadCountX, HZBReduceThreadCountY) * 2; - const uint2 position_ts = local_position_ts + decode_morton_2d(gi) * 2 + 1; - const float2 position_uv = (float2)position_ts * consts.extent_ts_inv; - - if (any(position_ts >= consts.extent_ts)) - { - return; - } + const uint2 offset_ts = gid.xy * uint2(HZBReduceThreadCountX, HZBReduceThreadCountY); + const uint2 position_ts = offset_ts + decode_morton_2d(gi); + const float2 gather_uv = (float2)(position_ts * 2 + 1) * consts.depth_extent_ts_inv; - const float4 quad_depth = SceneDepth.GatherRed(LinearClampSampler, position_uv); + const float4 quad_depth = SceneDepth.GatherRed(LinearClampSampler, gather_uv); depth_min = min(min(quad_depth.x, quad_depth.y), min(quad_depth.z, quad_depth.w)); depth_max = max(max(quad_depth.x, quad_depth.y), max(quad_depth.z, quad_depth.w)); - const uint2 position_hzb_mip0_ts = position_ts >> 1; + HZB_mips[0][position_ts] = float2(depth_min, depth_max); - if (true) - { - HZB_mips[0][position_hzb_mip0_ts] = float2(depth_min, depth_max); - } + uint lane_index = WaveGetLaneIndex(); -#if USE_QUAD_INTRINSICS - depth_min = min(depth_min, QuadReadAcrossX(depth_min)); - depth_max = max(depth_max, QuadReadAcrossX(depth_max)); + depth_min = min(depth_min, WaveReadLaneAt(depth_min, lane_index ^ 1)); + depth_max = max(depth_max, WaveReadLaneAt(depth_max, lane_index ^ 1)); - depth_min = min(depth_min, QuadReadAcrossY(depth_min)); - depth_max = max(depth_max, QuadReadAcrossY(depth_max)); -#else - depth_min = min(depth_min, WaveReadLaneAt(depth_min, gi ^ 1)); - depth_max = max(depth_max, WaveReadLaneAt(depth_max, gi ^ 1)); - - depth_min = min(depth_min, WaveReadLaneAt(depth_min, gi ^ 2)); - depth_max = max(depth_max, WaveReadLaneAt(depth_max, gi ^ 2)); -#endif + depth_min = min(depth_min, WaveReadLaneAt(depth_min, lane_index ^ 2)); + depth_max = max(depth_max, WaveReadLaneAt(depth_max, lane_index ^ 2)); if (gi % 4 == 0) { - HZB_mips[1][position_hzb_mip0_ts >> 1] = float2(depth_min, depth_max); + HZB_mips[1][position_ts >> 1] = float2(depth_min, depth_max); } - depth_min = min(depth_min, WaveReadLaneAt(depth_min, gi ^ 4)); - depth_max = max(depth_max, WaveReadLaneAt(depth_max, gi ^ 4)); + depth_min = min(depth_min, WaveReadLaneAt(depth_min, lane_index ^ 4)); + depth_max = max(depth_max, WaveReadLaneAt(depth_max, lane_index ^ 4)); - depth_min = min(depth_min, WaveReadLaneAt(depth_min, gi ^ 8)); - depth_max = max(depth_max, WaveReadLaneAt(depth_max, gi ^ 8)); + depth_min = min(depth_min, WaveReadLaneAt(depth_min, lane_index ^ 8)); + depth_max = max(depth_max, WaveReadLaneAt(depth_max, lane_index ^ 8)); if (gi % 16 == 0) { - HZB_mips[2][position_hzb_mip0_ts >> 2] = float2(depth_min, depth_max); + HZB_mips[2][position_ts >> 2] = float2(depth_min, depth_max); } - depth_min = min(depth_min, WaveReadLaneAt(depth_min, gi ^ 16)); - depth_max = max(depth_max, WaveReadLaneAt(depth_max, gi ^ 16)); +#if 0 + depth_min = min(depth_min, WaveReadLaneAt(depth_min, lane_index ^ 16)); + depth_max = max(depth_max, WaveReadLaneAt(depth_max, lane_index ^ 16)); - depth_min = min(depth_min, WaveReadLaneAt(depth_min, gi ^ 32)); - depth_max = max(depth_max, WaveReadLaneAt(depth_max, gi ^ 32)); + depth_min = min(depth_min, WaveReadLaneAt(depth_min, lane_index ^ 32)); + depth_max = max(depth_max, WaveReadLaneAt(depth_max, lane_index ^ 32)); +#else + if (gi % 16 == 0) + { + lds_depth_min_max[gi / 16] = float2(depth_min, depth_max); + } + + // Reduce using LDS + uint active_threads = ThreadCount / 16; + + for (uint threads = active_threads / 2; threads > 0; threads /= 2) + { + GroupMemoryBarrierWithGroupSync(); + + if (gi < threads) + { + const float2 depth_min_max_neighbor = lds_depth_min_max[gi + threads]; + + depth_min = min(depth_min, depth_min_max_neighbor.x); + depth_max = max(depth_max, depth_min_max_neighbor.y); + + lds_depth_min_max[gi] = float2(depth_min, depth_max); + } + } +#endif if (gi % 64 == 0) { - HZB_mips[3][position_hzb_mip0_ts >> 3] = float2(depth_min, depth_max); + HZB_mips[3][position_ts >> 3] = float2(depth_min, depth_max); } } diff --git a/src/renderer/shader/hzb_reduce.share.hlsl b/src/renderer/shader/hzb_reduce.share.hlsl index 4ffffb04..aea96b08 100644 --- a/src/renderer/shader/hzb_reduce.share.hlsl +++ b/src/renderer/shader/hzb_reduce.share.hlsl @@ -14,6 +14,8 @@ #define Slot_SceneDepth 1 #define Slot_HZB_mips 2 +static const hlsl_uint MinWaveLaneCount = 8; // FIXME + static const hlsl_uint HZBMaxMipCount = 10; static const hlsl_uint HZBReduceThreadCountX = 8; @@ -21,8 +23,7 @@ static const hlsl_uint HZBReduceThreadCountY = 8; struct HZBReducePushConstants { - hlsl_uint2 extent_ts; - hlsl_float2 extent_ts_inv; + hlsl_float2 depth_extent_ts_inv; }; #endif diff --git a/src/renderer/vulkan/ShaderModules.cpp b/src/renderer/vulkan/ShaderModules.cpp index 8f44c6a8..4e740892 100644 --- a/src/renderer/vulkan/ShaderModules.cpp +++ b/src/renderer/vulkan/ShaderModules.cpp @@ -44,6 +44,7 @@ ShaderModules create_shader_modules(ReaperRoot& /*root*/, VulkanBackend& backend ShaderModules modules = {}; modules.copy_to_depth_fs = create_shader_module(backend.device, "copy_to_depth.frag.spv"); + modules.copy_to_depth_from_hzb_fs = create_shader_module(backend.device, "copy_to_depth_from_hzb.frag.spv"); modules.cull_meshlet_cs = create_shader_module(backend.device, "meshlet/cull_meshlet.comp.spv"); modules.cull_triangle_batch_cs = create_shader_module(backend.device, "meshlet/cull_triangle_batch.comp.spv"); modules.debug_geometry_build_cmds_cs = create_shader_module(backend.device, "debug_geometry/build_cmds.comp.spv"); @@ -82,6 +83,7 @@ ShaderModules create_shader_modules(ReaperRoot& /*root*/, VulkanBackend& backend void destroy_shader_modules(VulkanBackend& backend, ShaderModules& shader_modules) { vkDestroyShaderModule(backend.device, shader_modules.copy_to_depth_fs, nullptr); + vkDestroyShaderModule(backend.device, shader_modules.copy_to_depth_from_hzb_fs, nullptr); vkDestroyShaderModule(backend.device, shader_modules.cull_meshlet_cs, nullptr); vkDestroyShaderModule(backend.device, shader_modules.cull_triangle_batch_cs, nullptr); vkDestroyShaderModule(backend.device, shader_modules.debug_geometry_build_cmds_cs, nullptr); diff --git a/src/renderer/vulkan/ShaderModules.h b/src/renderer/vulkan/ShaderModules.h index d6144793..529a9115 100644 --- a/src/renderer/vulkan/ShaderModules.h +++ b/src/renderer/vulkan/ShaderModules.h @@ -17,6 +17,7 @@ struct VulkanBackend; struct ShaderModules { VkShaderModule copy_to_depth_fs; + VkShaderModule copy_to_depth_from_hzb_fs; VkShaderModule cull_meshlet_cs; VkShaderModule cull_triangle_batch_cs; VkShaderModule debug_geometry_build_cmds_cs; diff --git a/src/renderer/vulkan/renderpass/HZBPass.cpp b/src/renderer/vulkan/renderpass/HZBPass.cpp index 57393ac0..58cc8392 100644 --- a/src/renderer/vulkan/renderpass/HZBPass.cpp +++ b/src/renderer/vulkan/renderpass/HZBPass.cpp @@ -81,12 +81,14 @@ void update_hzb_pass_descriptor_set(DescriptorWriteHelper& write_helper, const H write_helper.append(resources.descriptor_set, Slot_SceneDepth, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, scene_depth.default_view_handle, scene_depth.image_layout); - const u32 hzb_mip_count = 4; // FIXME + const u32 hzb_mip_count = hzb_texture.properties.mip_count; std::span hzb_mips = write_helper.new_image_infos(hzb_mip_count); + Assert(hzb_texture.additional_views.size() == hzb_mip_count); + for (u32 index = 0; index < hzb_mips.size(); index += 1) { - hzb_mips[index] = create_descriptor_image_info(hzb_texture.default_view_handle, hzb_texture.image_layout); + hzb_mips[index] = create_descriptor_image_info(hzb_texture.additional_views[index], hzb_texture.image_layout); } write_helper.writes.push_back(create_image_descriptor_write(resources.descriptor_set, Slot_HZB_mips, @@ -94,13 +96,12 @@ void update_hzb_pass_descriptor_set(DescriptorWriteHelper& write_helper, const H } void record_hzb_command_buffer(CommandBuffer& cmdBuffer, const HZBPassResources& pass_resources, - VkExtent2D depth_extent) + VkExtent2D depth_extent, VkExtent2D hzb_extent) { vkCmdBindPipeline(cmdBuffer.handle, VK_PIPELINE_BIND_POINT_COMPUTE, pass_resources.hzb_pipe.handle); HZBReducePushConstants push_constants; - push_constants.extent_ts = glm::uvec2(depth_extent.width, depth_extent.height); - push_constants.extent_ts_inv = + push_constants.depth_extent_ts_inv = glm::fvec2(1.f / static_cast(depth_extent.width), 1.f / static_cast(depth_extent.height)); vkCmdPushConstants(cmdBuffer.handle, pass_resources.hzb_pipe.layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, @@ -109,10 +110,9 @@ void record_hzb_command_buffer(CommandBuffer& cmdBuffer, const HZBPassResources& vkCmdBindDescriptorSets(cmdBuffer.handle, VK_PIPELINE_BIND_POINT_COMPUTE, pass_resources.hzb_pipe.layout, 0, 1, &pass_resources.descriptor_set, 0, nullptr); - // Assert(HZBRes % (HZBReduceThreadCountX * HZBReduceThreadCountY) == 0); vkCmdDispatch(cmdBuffer.handle, - div_round_up(depth_extent.width, HZBReduceThreadCountX * 2), - div_round_up(depth_extent.height, HZBReduceThreadCountY * 2), + div_round_up(hzb_extent.width, HZBReduceThreadCountX), + div_round_up(hzb_extent.height, HZBReduceThreadCountY), 1); } } // namespace Reaper diff --git a/src/renderer/vulkan/renderpass/HZBPass.h b/src/renderer/vulkan/renderpass/HZBPass.h index 7d566633..2a0dbb71 100644 --- a/src/renderer/vulkan/renderpass/HZBPass.h +++ b/src/renderer/vulkan/renderpass/HZBPass.h @@ -53,6 +53,6 @@ void update_hzb_pass_descriptor_set(DescriptorWriteHelper& write_helper, const H struct CommandBuffer; void record_hzb_command_buffer(CommandBuffer& cmdBuffer, const HZBPassResources& pass_resources, - VkExtent2D depth_extent); + VkExtent2D depth_extent, VkExtent2D hzb_extent); } // namespace Reaper diff --git a/src/renderer/vulkan/renderpass/TestGraphics.cpp b/src/renderer/vulkan/renderpass/TestGraphics.cpp index 3cffe01d..3e1acda3 100644 --- a/src/renderer/vulkan/renderpass/TestGraphics.cpp +++ b/src/renderer/vulkan/renderpass/TestGraphics.cpp @@ -381,13 +381,11 @@ void backend_execute_frame(ReaperRoot& root, VulkanBackend& backend, CommandBuff VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL}); GPUTextureProperties hzb_properties = - default_texture_properties(scene_depth_properties.width / 2, scene_depth_properties.height / 2, + default_texture_properties(tiled_lighting_frame.tile_count_x * 8, tiled_lighting_frame.tile_count_y * 8, PixelFormat::R16G16_UNORM, GPUTextureUsage::Storage | GPUTextureUsage::Sampled); hzb_properties.mip_count = 4; // FIXME - GPUTextureView hzb_mip_view = default_texture_view(hzb_properties); - - std::vector hzb_mip_views(hzb_properties.mip_count, hzb_mip_view); + std::vector hzb_mip_views(hzb_properties.mip_count, default_texture_view(hzb_properties)); for (u32 i = 0; i < hzb_mip_views.size(); i++) { @@ -400,41 +398,10 @@ void backend_execute_frame(ReaperRoot& root, VulkanBackend& backend, CommandBuff GPUTextureAccess{VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL}, hzb_mip_views); - // Depth Downsample - struct TileDepthFrameGraphData - { - RenderPassHandle pass_handle; - ResourceUsageHandle scene_depth; - ResourceUsageHandle depth_min_storage; - ResourceUsageHandle depth_max_storage; - } tile_depth; - - tile_depth.pass_handle = builder.create_render_pass("Depth Downsample"); - - tile_depth.scene_depth = - builder.read_texture(tile_depth.pass_handle, visibility.depth, - GPUTextureAccess{VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_READ_BIT, - VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL}); - - const GPUTextureProperties tile_depth_storage_properties = - default_texture_properties(tiled_lighting_frame.tile_count_x, tiled_lighting_frame.tile_count_y, - PixelFormat::R16_UNORM, GPUTextureUsage::Storage | GPUTextureUsage::Sampled); - - GPUTextureAccess tile_depth_create_access = {VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, - VK_IMAGE_LAYOUT_GENERAL}; - - tile_depth.depth_min_storage = builder.create_texture(tile_depth.pass_handle, "Tile Depth Min Storage", - tile_depth_storage_properties, tile_depth_create_access); - - tile_depth.depth_max_storage = builder.create_texture(tile_depth.pass_handle, "Tile Depth Max Storage", - tile_depth_storage_properties, tile_depth_create_access); - // Depth copy struct TileDepthCopyFrameGraphData { RenderPassHandle pass_handle; - ResourceUsageHandle depth_min_src; - ResourceUsageHandle depth_max_src; ResourceUsageHandle depth_min; ResourceUsageHandle depth_max; ResourceUsageHandle hzb_texture; @@ -444,33 +411,27 @@ void backend_execute_frame(ReaperRoot& root, VulkanBackend& backend, CommandBuff tile_depth_copy.pass_handle = builder.create_render_pass("Tile Depth Copy"); - const GPUTextureAccess tile_depth_copy_src_access = { - VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT, VK_ACCESS_2_SHADER_READ_BIT, VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL}; - - tile_depth_copy.depth_min_src = - builder.read_texture(tile_depth_copy.pass_handle, tile_depth.depth_min_storage, tile_depth_copy_src_access); - tile_depth_copy.depth_max_src = - builder.read_texture(tile_depth_copy.pass_handle, tile_depth.depth_max_storage, tile_depth_copy_src_access); - - GPUTextureProperties tile_depth_copy_properties = tile_depth_storage_properties; - tile_depth_copy_properties.usage_flags = GPUTextureUsage::DepthStencilAttachment | GPUTextureUsage::Sampled; - tile_depth_copy_properties.format = MainPassDepthFormat; + const GPUTextureProperties tile_depth_properties = default_texture_properties( + tiled_lighting_frame.tile_count_x, tiled_lighting_frame.tile_count_y, MainPassDepthFormat, + GPUTextureUsage::DepthStencilAttachment | GPUTextureUsage::Sampled); const GPUTextureAccess tile_depth_copy_dst_access = { VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL}; tile_depth_copy.depth_min = builder.create_texture(tile_depth_copy.pass_handle, "Tile Depth Min", - tile_depth_copy_properties, tile_depth_copy_dst_access); + tile_depth_properties, tile_depth_copy_dst_access); tile_depth_copy.depth_max = builder.create_texture(tile_depth_copy.pass_handle, "Tile Depth Max", - tile_depth_copy_properties, tile_depth_copy_dst_access); + tile_depth_properties, tile_depth_copy_dst_access); - // FIXME { GPUTextureView hzb_view = default_texture_view(hzb_properties); hzb_view.subresource.mip_count = 1; hzb_view.subresource.mip_offset = 3; + Assert(tile_depth_properties.width == hzb_properties.width >> hzb_view.subresource.mip_offset); + Assert(tile_depth_properties.height == hzb_properties.height >> hzb_view.subresource.mip_offset); + tile_depth_copy.hzb_texture = builder.read_texture( tile_depth_copy.pass_handle, hzb_reduce.hzb_texture, {VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT, VK_ACCESS_2_SHADER_READ_BIT, VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL}, @@ -479,8 +440,7 @@ void backend_execute_frame(ReaperRoot& root, VulkanBackend& backend, CommandBuff tile_depth_copy.light_list_clear = builder.create_buffer( tile_depth_copy.pass_handle, "Light lists", - DefaultGPUBufferProperties(ElementsPerTile * tile_depth_storage_properties.width - * tile_depth_storage_properties.height, + DefaultGPUBufferProperties(ElementsPerTile * tile_depth_properties.width * tile_depth_properties.height, sizeof(u32), GPUBufferUsage::StorageBuffer | GPUBufferUsage::TransferDst), GPUBufferAccess{VK_PIPELINE_STAGE_2_CLEAR_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT}); @@ -614,9 +574,8 @@ void backend_execute_frame(ReaperRoot& root, VulkanBackend& backend, CommandBuff GPUTextureAccess{VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL}); - const GPUBufferProperties tile_debug_properties = - DefaultGPUBufferProperties(tile_depth_storage_properties.width * tile_depth_storage_properties.height, - sizeof(TileDebug), GPUBufferUsage::StorageBuffer); + const GPUBufferProperties tile_debug_properties = DefaultGPUBufferProperties( + tile_depth_properties.width * tile_depth_properties.height, sizeof(TileDebug), GPUBufferUsage::StorageBuffer); tiled_lighting.tile_debug_texture = builder.create_buffer(tiled_lighting.pass_handle, "Tile debug", tile_debug_properties, @@ -928,16 +887,9 @@ void backend_execute_frame(ReaperRoot& root, VulkanBackend& backend, CommandBuff resources.samplers_resources, resources.material_resources, resources.mesh_cache, resources.lighting_resources, forward_shadow_map_views); - update_lighting_depth_downsample_descriptor_set( - descriptor_write_helper, resources.tiled_raster_resources, resources.samplers_resources, - get_frame_graph_texture(resources.framegraph_resources, framegraph, tile_depth.scene_depth), - get_frame_graph_texture(resources.framegraph_resources, framegraph, tile_depth.depth_min_storage), - get_frame_graph_texture(resources.framegraph_resources, framegraph, tile_depth.depth_max_storage)); - update_depth_copy_pass_descriptor_set( descriptor_write_helper, resources.tiled_raster_resources, - get_frame_graph_texture(resources.framegraph_resources, framegraph, tile_depth_copy.depth_min_src), - get_frame_graph_texture(resources.framegraph_resources, framegraph, tile_depth_copy.depth_max_src)); + get_frame_graph_texture(resources.framegraph_resources, framegraph, tile_depth_copy.hzb_texture)); update_classify_descriptor_set( descriptor_write_helper, resources.tiled_raster_resources, @@ -1189,7 +1141,8 @@ void backend_execute_frame(ReaperRoot& root, VulkanBackend& backend, CommandBuff record_hzb_command_buffer( cmdBuffer, resources.hzb_pass_resources, - VkExtent2D{.width = scene_depth_properties.width, .height = scene_depth_properties.height}); + VkExtent2D{.width = scene_depth_properties.width, .height = scene_depth_properties.height}, + VkExtent2D{.width = hzb_properties.width, .height = hzb_properties.height}); record_framegraph_barriers(cmdBuffer, schedule, framegraph, resources.framegraph_resources, hzb_reduce.pass_handle, false); @@ -1206,17 +1159,6 @@ void backend_execute_frame(ReaperRoot& root, VulkanBackend& backend, CommandBuff visibility_gbuffer.pass_handle, false); } - { - REAPER_GPU_SCOPE(cmdBuffer, "Tile Depth"); - record_framegraph_barriers(cmdBuffer, schedule, framegraph, resources.framegraph_resources, - tile_depth.pass_handle, true); - - record_tile_depth_pass_command_buffer(cmdBuffer, resources.tiled_raster_resources, backbufferExtent); - - record_framegraph_barriers(cmdBuffer, schedule, framegraph, resources.framegraph_resources, - tile_depth.pass_handle, false); - } - { REAPER_GPU_SCOPE(cmdBuffer, "Tile Depth Copy"); record_framegraph_barriers(cmdBuffer, schedule, framegraph, resources.framegraph_resources, @@ -1278,9 +1220,8 @@ void backend_execute_frame(ReaperRoot& root, VulkanBackend& backend, CommandBuff record_framegraph_barriers(cmdBuffer, schedule, framegraph, resources.framegraph_resources, tiled_lighting.pass_handle, true); - record_tiled_lighting_command_buffer( - cmdBuffer, resources.tiled_lighting_resources, backbufferExtent, - VkExtent2D{tile_depth_copy_properties.width, tile_depth_copy_properties.height}); + record_tiled_lighting_command_buffer(cmdBuffer, resources.tiled_lighting_resources, backbufferExtent, + VkExtent2D{tile_depth_properties.width, tile_depth_properties.height}); record_framegraph_barriers(cmdBuffer, schedule, framegraph, resources.framegraph_resources, tiled_lighting.pass_handle, false); @@ -1293,7 +1234,7 @@ void backend_execute_frame(ReaperRoot& root, VulkanBackend& backend, CommandBuff record_tiled_lighting_debug_command_buffer( cmdBuffer, resources.tiled_lighting_resources, backbufferExtent, - VkExtent2D{tile_depth_copy_properties.width, tile_depth_copy_properties.height}); + VkExtent2D{tile_depth_properties.width, tile_depth_properties.height}); record_framegraph_barriers(cmdBuffer, schedule, framegraph, resources.framegraph_resources, tiled_lighting_debug.pass_handle, false); diff --git a/src/renderer/vulkan/renderpass/TiledRasterPass.cpp b/src/renderer/vulkan/renderpass/TiledRasterPass.cpp index d76b0c85..ff84e584 100644 --- a/src/renderer/vulkan/renderpass/TiledRasterPass.cpp +++ b/src/renderer/vulkan/renderpass/TiledRasterPass.cpp @@ -29,6 +29,7 @@ #include "mesh/ModelLoader.h" #include "profiling/Scope.h" +#include "copy_to_depth_from_hzb.share.hlsl" #include "tiled_lighting/classify_volume.share.hlsl" #include "tiled_lighting/tile_depth_downsample.share.hlsl" @@ -84,7 +85,8 @@ TiledRasterResources create_tiled_raster_pass_resources(ReaperRoot& root, Vulkan std::vector shader_stages = { default_pipeline_shader_stage_create_info(VK_SHADER_STAGE_VERTEX_BIT, shader_modules.fullscreen_triangle_vs), - default_pipeline_shader_stage_create_info(VK_SHADER_STAGE_FRAGMENT_BIT, shader_modules.copy_to_depth_fs), + default_pipeline_shader_stage_create_info(VK_SHADER_STAGE_FRAGMENT_BIT, + shader_modules.copy_to_depth_from_hzb_fs), }; std::vector descriptorSetLayoutBinding = { @@ -94,8 +96,11 @@ TiledRasterResources create_tiled_raster_pass_resources(ReaperRoot& root, Vulkan VkDescriptorSetLayout descriptor_set_layout = create_descriptor_set_layout(backend.device, descriptorSetLayoutBinding); - VkPipelineLayout pipeline_layout = - create_pipeline_layout(backend.device, std::span(&descriptor_set_layout, 1)); + const VkPushConstantRange constant_range = {VK_SHADER_STAGE_FRAGMENT_BIT, 0, + sizeof(CopyDepthFromHZBPushConstants)}; + + VkPipelineLayout pipeline_layout = create_pipeline_layout( + backend.device, std::span(&descriptor_set_layout, 1), std::span(&constant_range, 1)); GraphicsPipelineProperties pipeline_properties = default_graphics_pipeline_properties(); pipeline_properties.depth_stencil.depthTestEnable = VK_TRUE; @@ -185,19 +190,18 @@ TiledRasterResources create_tiled_raster_pass_resources(ReaperRoot& root, Vulkan } std::vector dset_layouts = { - resources.tile_depth_descriptor_set_layout, resources.depth_copy_descriptor_set_layout, - resources.depth_copy_descriptor_set_layout, resources.classify_descriptor_set_layout, - resources.light_raster_descriptor_set_layout, resources.light_raster_descriptor_set_layout}; + resources.tile_depth_descriptor_set_layout, resources.depth_copy_descriptor_set_layout, + resources.classify_descriptor_set_layout, resources.light_raster_descriptor_set_layout, + resources.light_raster_descriptor_set_layout}; std::vector dsets(dset_layouts.size()); allocate_descriptor_sets(backend.device, backend.global_descriptor_pool, dset_layouts, dsets); resources.tile_depth_descriptor_set = dsets[0]; - resources.depth_copy_descriptor_sets[0] = dsets[1]; - resources.depth_copy_descriptor_sets[1] = dsets[2]; - resources.classify_descriptor_set = dsets[3]; - resources.light_raster_descriptor_sets[0] = dsets[4]; - resources.light_raster_descriptor_sets[1] = dsets[5]; + resources.depth_copy_descriptor_set = dsets[1]; + resources.classify_descriptor_set = dsets[2]; + resources.light_raster_descriptor_sets[0] = dsets[3]; + resources.light_raster_descriptor_sets[1] = dsets[4]; { const GPUBufferProperties properties = DefaultGPUBufferProperties( @@ -280,13 +284,10 @@ void update_lighting_depth_downsample_descriptor_set(DescriptorWriteHelper& void update_depth_copy_pass_descriptor_set(DescriptorWriteHelper& write_helper, const TiledRasterResources& resources, - const FrameGraphTexture& depth_min_src, - const FrameGraphTexture& depth_max_src) + const FrameGraphTexture& hzb_texture) { - write_helper.append(resources.depth_copy_descriptor_sets[0], 0, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - depth_min_src.default_view_handle, depth_min_src.image_layout); - write_helper.append(resources.depth_copy_descriptor_sets[1], 0, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - depth_max_src.default_view_handle, depth_max_src.image_layout); + write_helper.append(resources.depth_copy_descriptor_set, 0, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + hzb_texture.additional_views[0], hzb_texture.image_layout); } void update_classify_descriptor_set(DescriptorWriteHelper& write_helper, const TiledRasterResources& resources, @@ -384,6 +385,9 @@ void record_depth_copy(CommandBuffer& cmdBuffer, const TiledRasterResources& res vkCmdSetViewport(cmdBuffer.handle, 0, 1, &viewport); vkCmdSetScissor(cmdBuffer.handle, 0, 1, &pass_rect); + vkCmdBindDescriptorSets(cmdBuffer.handle, VK_PIPELINE_BIND_POINT_GRAPHICS, resources.depth_copy_pipeline_layout, 0, + 1, &resources.depth_copy_descriptor_set, 0, nullptr); + for (u32 depth_index = 0; depth_index < 2; depth_index++) { const FrameGraphTexture depth_dst = depth_dsts[depth_index]; @@ -395,8 +399,11 @@ void record_depth_copy(CommandBuffer& cmdBuffer, const TiledRasterResources& res vkCmdBeginRendering(cmdBuffer.handle, &rendering_info); - vkCmdBindDescriptorSets(cmdBuffer.handle, VK_PIPELINE_BIND_POINT_GRAPHICS, resources.depth_copy_pipeline_layout, - 0, 1, &resources.depth_copy_descriptor_sets[depth_index], 0, nullptr); + CopyDepthFromHZBPushConstants consts; + consts.copy_min = (depth_index == 0) ? 1 : 0; + + vkCmdPushConstants(cmdBuffer.handle, resources.depth_copy_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, + sizeof(consts), &consts); vkCmdDraw(cmdBuffer.handle, 3, 1, 0, 0); diff --git a/src/renderer/vulkan/renderpass/TiledRasterPass.h b/src/renderer/vulkan/renderpass/TiledRasterPass.h index e65b734d..d5557539 100644 --- a/src/renderer/vulkan/renderpass/TiledRasterPass.h +++ b/src/renderer/vulkan/renderpass/TiledRasterPass.h @@ -34,7 +34,7 @@ struct TiledRasterResources VkPipelineLayout depth_copy_pipeline_layout; VkPipeline depth_copy_pipeline; - std::array depth_copy_descriptor_sets; + VkDescriptorSet depth_copy_descriptor_set; VkDescriptorSetLayout light_raster_descriptor_set_layout; VkPipelineLayout light_raster_pipeline_layout; @@ -80,8 +80,7 @@ void update_lighting_depth_downsample_descriptor_set(DescriptorWriteHelper& void update_depth_copy_pass_descriptor_set(DescriptorWriteHelper& write_helper, const TiledRasterResources& resources, - const FrameGraphTexture& depth_min_src, - const FrameGraphTexture& depth_max_src); + const FrameGraphTexture& hzb_texture); void update_classify_descriptor_set(DescriptorWriteHelper& write_helper, const TiledRasterResources& resources, const FrameGraphBuffer& classification_counters,