Skip to content

Commit

Permalink
Merge pull request #76832 from RandomShaper/cluster_render_prevail
Browse files Browse the repository at this point in the history
Save cluster render shader from being optimized out entirely
  • Loading branch information
akien-mga committed May 9, 2023
2 parents d550fdd + 6465432 commit 668cf3c
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 7 deletions.
3 changes: 3 additions & 0 deletions drivers/vulkan/rendering_device_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9380,6 +9380,9 @@ bool RenderingDeviceVulkan::has_feature(const Features p_feature) const {
VulkanContext::VRSCapabilities vrs_capabilities = context->get_vrs_capabilities();
return vrs_capabilities.attachment_vrs_supported && context->get_physical_device_features().shaderStorageImageExtendedFormats;
} break;
case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: {
return true;
} break;
default: {
return false;
}
Expand Down
20 changes: 17 additions & 3 deletions servers/rendering/renderer_rd/cluster_builder_rd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,29 @@ ClusterBuilderSharedDataRD::ClusterBuilderSharedDataRD() {
}

{
RD::FramebufferFormatID fb_format;
RD::PipelineColorBlendState blend_state;
String defines;
if (RD::get_singleton()->has_feature(RD::SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS)) {
fb_format = RD::get_singleton()->framebuffer_format_create_empty();
blend_state = RD::PipelineColorBlendState::create_disabled();
} else {
Vector<RD::AttachmentFormat> afs;
afs.push_back(RD::AttachmentFormat());
afs.write[0].usage_flags = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;
fb_format = RD::get_singleton()->framebuffer_format_create(afs);
defines = "\n#define USE_ATTACHMENT\n";
}

Vector<String> versions;
versions.push_back("");
cluster_render.cluster_render_shader.initialize(versions);
cluster_render.cluster_render_shader.initialize(versions, defines);
cluster_render.shader_version = cluster_render.cluster_render_shader.version_create();
cluster_render.shader = cluster_render.cluster_render_shader.version_get_shader(cluster_render.shader_version, 0);
cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0);
cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, 0);
RD::PipelineMultisampleState ms;
ms.sample_count = RD::TEXTURE_SAMPLES_4;
cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), ms, RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0);
cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), ms, RD::PipelineDepthStencilState(), blend_state, 0);
}
{
Vector<String> versions;
Expand Down
18 changes: 14 additions & 4 deletions servers/rendering/renderer_rd/shaders/cluster_render.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ layout(set = 0, binding = 3, std430) buffer restrict ClusterRender {
}
cluster_render;

#ifdef USE_ATTACHMENT
layout(location = 0) out vec4 frag_color;
#endif

void main() {
//convert from screen to cluster
uvec2 cluster = uvec2(gl_FragCoord.xy) >> state.screen_to_clusters_shift;
Expand All @@ -113,6 +117,8 @@ void main() {
uint usage_write_offset = cluster_offset + (element_index >> 5);
uint usage_write_bit = 1 << (element_index & 0x1F);

uint aux = 0;

#ifdef USE_SUBGROUPS

uint cluster_thread_group_index;
Expand All @@ -138,7 +144,7 @@ void main() {
cluster_thread_group_index = subgroupBallotExclusiveBitCount(mask);

if (cluster_thread_group_index == 0) {
atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
aux = atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
}
}
#else
Expand All @@ -147,7 +153,7 @@ void main() {
if (!gl_HelperInvocation)
#endif
{
atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
aux = atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
}
#endif
//find the current element in the depth usage list and mark the current depth as used
Expand All @@ -162,7 +168,7 @@ void main() {
if (!gl_HelperInvocation) {
z_write_bit = subgroupOr(z_write_bit); //merge all Zs
if (cluster_thread_group_index == 0) {
atomicOr(cluster_render.data[z_write_offset], z_write_bit);
aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit);
}
}
#else
Expand All @@ -171,7 +177,11 @@ void main() {
if (!gl_HelperInvocation)
#endif
{
atomicOr(cluster_render.data[z_write_offset], z_write_bit);
aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit);
}
#endif

#ifdef USE_ATTACHMENT
frag_color = vec4(float(aux));
#endif
}
2 changes: 2 additions & 0 deletions servers/rendering/rendering_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,8 @@ class RenderingDevice : public Object {
SUPPORTS_MULTIVIEW,
SUPPORTS_FSR_HALF_FLOAT,
SUPPORTS_ATTACHMENT_VRS,
// If not supported, a fragment shader with only side effets (i.e., writes to buffers, but doesn't output to attachments), may be optimized down to no-op by the GPU driver.
SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS,
};
virtual bool has_feature(const Features p_feature) const = 0;

Expand Down

0 comments on commit 668cf3c

Please sign in to comment.