Skip to content

Commit

Permalink
vulkan: hook HZB to multiple mip views for downsampling
Browse files Browse the repository at this point in the history
Remove previous downsample step
  • Loading branch information
Ryp committed Aug 25, 2023
1 parent ca36da4 commit d0ef23b
Show file tree
Hide file tree
Showing 12 changed files with 161 additions and 151 deletions.
1 change: 1 addition & 0 deletions src/renderer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ endif()
set(REAPER_SHADER_DIR ${CMAKE_CURRENT_SOURCE_DIR}/shader)
set(REAPER_SHADER_SRCS
${REAPER_SHADER_DIR}/copy_to_depth.frag.hlsl
${REAPER_SHADER_DIR}/copy_to_depth_from_hzb.frag.hlsl
${REAPER_SHADER_DIR}/debug_geometry/build_cmds.comp.hlsl
${REAPER_SHADER_DIR}/debug_geometry/draw.frag.hlsl
${REAPER_SHADER_DIR}/debug_geometry/draw.vert.hlsl
Expand Down
32 changes: 32 additions & 0 deletions src/renderer/shader/copy_to_depth_from_hzb.frag.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#include "lib/base.hlsl"

#include "copy_to_depth_from_hzb.share.hlsl"

VK_PUSH_CONSTANT_HELPER(CopyDepthFromHZBPushConstants) consts;

VK_BINDING(0, 0) Texture2D<float2> HZBMip;

struct PS_INPUT
{
float4 PositionCS : SV_Position;
float2 PositionUV : TEXCOORD0;
};

struct PS_OUTPUT
{
float depth : SV_Depth;
};

void main(in PS_INPUT input, out PS_OUTPUT output)
{
const float2 depth_min_max_cs = HZBMip.Load(int3(input.PositionCS.xy, 0));

if (consts.copy_min)
{
output.depth = depth_min_max_cs.x;
}
else
{
output.depth = depth_min_max_cs.y;
}
}
18 changes: 18 additions & 0 deletions src/renderer/shader/copy_to_depth_from_hzb.share.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
////////////////////////////////////////////////////////////////////////////////
/// Reaper
///
/// Copyright (c) 2015-2022 Thibault Schueller
/// This file is distributed under the MIT License
////////////////////////////////////////////////////////////////////////////////

#ifndef COPY_DEPTH_FROM_HZB_SHARE_INCLUDED
#define COPY_DEPTH_FROM_HZB_SHARE_INCLUDED

#include "shared_types.hlsl"

struct CopyDepthFromHZBPushConstants
{
hlsl_uint copy_min;
};

#endif
87 changes: 47 additions & 40 deletions src/renderer/shader/hzb_reduce.comp.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
#include "lib/morton.hlsl"
#include "hzb_reduce.share.hlsl"

#define USE_QUAD_INTRINSICS 0

VK_PUSH_CONSTANT_HELPER(HZBReducePushConstants) consts;

VK_BINDING(0, Slot_LinearClampSampler) SamplerState LinearClampSampler;
Expand All @@ -13,7 +11,7 @@ VK_BINDING(0, Slot_HZB_mips) RWTexture2D<float2> HZB_mips[HZBMaxMipCount];

static const uint ThreadCount = HZBReduceThreadCountX * HZBReduceThreadCountY;

groupshared float2 lds_depth_min_max[ThreadCount];
groupshared float2 lds_depth_min_max[ThreadCount / MinWaveLaneCount];

[numthreads(ThreadCount, 1, 1)]
void main(uint3 gtid : SV_GroupThreadID,
Expand All @@ -24,65 +22,74 @@ void main(uint3 gtid : SV_GroupThreadID,
float depth_min = 1.0;
float depth_max = 0.0;

const uint2 local_position_ts = gid.xy * uint2(HZBReduceThreadCountX, HZBReduceThreadCountY) * 2;
const uint2 position_ts = local_position_ts + decode_morton_2d(gi) * 2 + 1;
const float2 position_uv = (float2)position_ts * consts.extent_ts_inv;

if (any(position_ts >= consts.extent_ts))
{
return;
}
const uint2 offset_ts = gid.xy * uint2(HZBReduceThreadCountX, HZBReduceThreadCountY);
const uint2 position_ts = offset_ts + decode_morton_2d(gi);
const float2 gather_uv = (float2)(position_ts * 2 + 1) * consts.depth_extent_ts_inv;

const float4 quad_depth = SceneDepth.GatherRed(LinearClampSampler, position_uv);
const float4 quad_depth = SceneDepth.GatherRed(LinearClampSampler, gather_uv);

depth_min = min(min(quad_depth.x, quad_depth.y), min(quad_depth.z, quad_depth.w));
depth_max = max(max(quad_depth.x, quad_depth.y), max(quad_depth.z, quad_depth.w));

const uint2 position_hzb_mip0_ts = position_ts >> 1;
HZB_mips[0][position_ts] = float2(depth_min, depth_max);

if (true)
{
HZB_mips[0][position_hzb_mip0_ts] = float2(depth_min, depth_max);
}
uint lane_index = WaveGetLaneIndex();

#if USE_QUAD_INTRINSICS
depth_min = min(depth_min, QuadReadAcrossX(depth_min));
depth_max = max(depth_max, QuadReadAcrossX(depth_max));
depth_min = min(depth_min, WaveReadLaneAt(depth_min, lane_index ^ 1));
depth_max = max(depth_max, WaveReadLaneAt(depth_max, lane_index ^ 1));

depth_min = min(depth_min, QuadReadAcrossY(depth_min));
depth_max = max(depth_max, QuadReadAcrossY(depth_max));
#else
depth_min = min(depth_min, WaveReadLaneAt(depth_min, gi ^ 1));
depth_max = max(depth_max, WaveReadLaneAt(depth_max, gi ^ 1));

depth_min = min(depth_min, WaveReadLaneAt(depth_min, gi ^ 2));
depth_max = max(depth_max, WaveReadLaneAt(depth_max, gi ^ 2));
#endif
depth_min = min(depth_min, WaveReadLaneAt(depth_min, lane_index ^ 2));
depth_max = max(depth_max, WaveReadLaneAt(depth_max, lane_index ^ 2));

if (gi % 4 == 0)
{
HZB_mips[1][position_hzb_mip0_ts >> 1] = float2(depth_min, depth_max);
HZB_mips[1][position_ts >> 1] = float2(depth_min, depth_max);
}

depth_min = min(depth_min, WaveReadLaneAt(depth_min, gi ^ 4));
depth_max = max(depth_max, WaveReadLaneAt(depth_max, gi ^ 4));
depth_min = min(depth_min, WaveReadLaneAt(depth_min, lane_index ^ 4));
depth_max = max(depth_max, WaveReadLaneAt(depth_max, lane_index ^ 4));

depth_min = min(depth_min, WaveReadLaneAt(depth_min, gi ^ 8));
depth_max = max(depth_max, WaveReadLaneAt(depth_max, gi ^ 8));
depth_min = min(depth_min, WaveReadLaneAt(depth_min, lane_index ^ 8));
depth_max = max(depth_max, WaveReadLaneAt(depth_max, lane_index ^ 8));

if (gi % 16 == 0)
{
HZB_mips[2][position_hzb_mip0_ts >> 2] = float2(depth_min, depth_max);
HZB_mips[2][position_ts >> 2] = float2(depth_min, depth_max);
}

depth_min = min(depth_min, WaveReadLaneAt(depth_min, gi ^ 16));
depth_max = max(depth_max, WaveReadLaneAt(depth_max, gi ^ 16));
#if 0
depth_min = min(depth_min, WaveReadLaneAt(depth_min, lane_index ^ 16));
depth_max = max(depth_max, WaveReadLaneAt(depth_max, lane_index ^ 16));

depth_min = min(depth_min, WaveReadLaneAt(depth_min, gi ^ 32));
depth_max = max(depth_max, WaveReadLaneAt(depth_max, gi ^ 32));
depth_min = min(depth_min, WaveReadLaneAt(depth_min, lane_index ^ 32));
depth_max = max(depth_max, WaveReadLaneAt(depth_max, lane_index ^ 32));
#else
if (gi % 16 == 0)
{
lds_depth_min_max[gi / 16] = float2(depth_min, depth_max);
}

// Reduce using LDS
uint active_threads = ThreadCount / 16;

for (uint threads = active_threads / 2; threads > 0; threads /= 2)
{
GroupMemoryBarrierWithGroupSync();

if (gi < threads)
{
const float2 depth_min_max_neighbor = lds_depth_min_max[gi + threads];

depth_min = min(depth_min, depth_min_max_neighbor.x);
depth_max = max(depth_max, depth_min_max_neighbor.y);

lds_depth_min_max[gi] = float2(depth_min, depth_max);
}
}
#endif

if (gi % 64 == 0)
{
HZB_mips[3][position_hzb_mip0_ts >> 3] = float2(depth_min, depth_max);
HZB_mips[3][position_ts >> 3] = float2(depth_min, depth_max);
}
}
5 changes: 3 additions & 2 deletions src/renderer/shader/hzb_reduce.share.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,16 @@
#define Slot_SceneDepth 1
#define Slot_HZB_mips 2

static const hlsl_uint MinWaveLaneCount = 8; // FIXME

static const hlsl_uint HZBMaxMipCount = 10;

static const hlsl_uint HZBReduceThreadCountX = 8;
static const hlsl_uint HZBReduceThreadCountY = 8;

struct HZBReducePushConstants
{
hlsl_uint2 extent_ts;
hlsl_float2 extent_ts_inv;
hlsl_float2 depth_extent_ts_inv;
};

#endif
2 changes: 2 additions & 0 deletions src/renderer/vulkan/ShaderModules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ ShaderModules create_shader_modules(ReaperRoot& /*root*/, VulkanBackend& backend
ShaderModules modules = {};

modules.copy_to_depth_fs = create_shader_module(backend.device, "copy_to_depth.frag.spv");
modules.copy_to_depth_from_hzb_fs = create_shader_module(backend.device, "copy_to_depth_from_hzb.frag.spv");
modules.cull_meshlet_cs = create_shader_module(backend.device, "meshlet/cull_meshlet.comp.spv");
modules.cull_triangle_batch_cs = create_shader_module(backend.device, "meshlet/cull_triangle_batch.comp.spv");
modules.debug_geometry_build_cmds_cs = create_shader_module(backend.device, "debug_geometry/build_cmds.comp.spv");
Expand Down Expand Up @@ -82,6 +83,7 @@ ShaderModules create_shader_modules(ReaperRoot& /*root*/, VulkanBackend& backend
void destroy_shader_modules(VulkanBackend& backend, ShaderModules& shader_modules)
{
vkDestroyShaderModule(backend.device, shader_modules.copy_to_depth_fs, nullptr);
vkDestroyShaderModule(backend.device, shader_modules.copy_to_depth_from_hzb_fs, nullptr);
vkDestroyShaderModule(backend.device, shader_modules.cull_meshlet_cs, nullptr);
vkDestroyShaderModule(backend.device, shader_modules.cull_triangle_batch_cs, nullptr);
vkDestroyShaderModule(backend.device, shader_modules.debug_geometry_build_cmds_cs, nullptr);
Expand Down
1 change: 1 addition & 0 deletions src/renderer/vulkan/ShaderModules.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ struct VulkanBackend;
struct ShaderModules
{
VkShaderModule copy_to_depth_fs;
VkShaderModule copy_to_depth_from_hzb_fs;
VkShaderModule cull_meshlet_cs;
VkShaderModule cull_triangle_batch_cs;
VkShaderModule debug_geometry_build_cmds_cs;
Expand Down
16 changes: 8 additions & 8 deletions src/renderer/vulkan/renderpass/HZBPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,26 +81,27 @@ void update_hzb_pass_descriptor_set(DescriptorWriteHelper& write_helper, const H
write_helper.append(resources.descriptor_set, Slot_SceneDepth, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
scene_depth.default_view_handle, scene_depth.image_layout);

const u32 hzb_mip_count = 4; // FIXME
const u32 hzb_mip_count = hzb_texture.properties.mip_count;
std::span<VkDescriptorImageInfo> hzb_mips = write_helper.new_image_infos(hzb_mip_count);

Assert(hzb_texture.additional_views.size() == hzb_mip_count);

for (u32 index = 0; index < hzb_mips.size(); index += 1)
{
hzb_mips[index] = create_descriptor_image_info(hzb_texture.default_view_handle, hzb_texture.image_layout);
hzb_mips[index] = create_descriptor_image_info(hzb_texture.additional_views[index], hzb_texture.image_layout);
}

write_helper.writes.push_back(create_image_descriptor_write(resources.descriptor_set, Slot_HZB_mips,
VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, hzb_mips));
}

void record_hzb_command_buffer(CommandBuffer& cmdBuffer, const HZBPassResources& pass_resources,
VkExtent2D depth_extent)
VkExtent2D depth_extent, VkExtent2D hzb_extent)
{
vkCmdBindPipeline(cmdBuffer.handle, VK_PIPELINE_BIND_POINT_COMPUTE, pass_resources.hzb_pipe.handle);

HZBReducePushConstants push_constants;
push_constants.extent_ts = glm::uvec2(depth_extent.width, depth_extent.height);
push_constants.extent_ts_inv =
push_constants.depth_extent_ts_inv =
glm::fvec2(1.f / static_cast<float>(depth_extent.width), 1.f / static_cast<float>(depth_extent.height));

vkCmdPushConstants(cmdBuffer.handle, pass_resources.hzb_pipe.layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
Expand All @@ -109,10 +110,9 @@ void record_hzb_command_buffer(CommandBuffer& cmdBuffer, const HZBPassResources&
vkCmdBindDescriptorSets(cmdBuffer.handle, VK_PIPELINE_BIND_POINT_COMPUTE, pass_resources.hzb_pipe.layout, 0, 1,
&pass_resources.descriptor_set, 0, nullptr);

// Assert(HZBRes % (HZBReduceThreadCountX * HZBReduceThreadCountY) == 0);
vkCmdDispatch(cmdBuffer.handle,
div_round_up(depth_extent.width, HZBReduceThreadCountX * 2),
div_round_up(depth_extent.height, HZBReduceThreadCountY * 2),
div_round_up(hzb_extent.width, HZBReduceThreadCountX),
div_round_up(hzb_extent.height, HZBReduceThreadCountY),
1);
}
} // namespace Reaper
2 changes: 1 addition & 1 deletion src/renderer/vulkan/renderpass/HZBPass.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,6 @@ void update_hzb_pass_descriptor_set(DescriptorWriteHelper& write_helper, const H
struct CommandBuffer;

void record_hzb_command_buffer(CommandBuffer& cmdBuffer, const HZBPassResources& pass_resources,
VkExtent2D depth_extent);
VkExtent2D depth_extent, VkExtent2D hzb_extent);

} // namespace Reaper
Loading

0 comments on commit d0ef23b

Please sign in to comment.