Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement basic depth texturing for OpenGL #14042

Merged
merged 5 commits into from
Jul 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Common/GPU/OpenGL/GLQueueRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -793,6 +793,7 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
bool blendEnabled = false;
bool cullEnabled = false;
bool ditherEnabled = false;
bool depthClampEnabled = false;
#ifndef USING_GLES2
int logicOp = -1;
bool logicEnabled = false;
Expand Down Expand Up @@ -1283,6 +1284,17 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
glDisable(GL_DITHER);
ditherEnabled = false;
}
#ifndef USING_GLES2
if (c.raster.depthClampEnable) {
if (!depthClampEnabled) {
glEnable(GL_DEPTH_CLAMP);
depthClampEnabled = true;
}
} else if (!c.raster.depthClampEnable && depthClampEnabled) {
glDisable(GL_DEPTH_CLAMP);
depthClampEnabled = false;
}
#endif
CHECK_GL_ERROR_IF_DEBUG();
break;
default:
Expand Down Expand Up @@ -1322,6 +1334,8 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
if (cullEnabled)
glDisable(GL_CULL_FACE);
#ifndef USING_GLES2
if (depthClampEnabled)
glDisable(GL_DEPTH_CLAMP);
if (!gl_extensions.IsGLES && logicEnabled) {
glDisable(GL_COLOR_LOGIC_OP);
}
Expand Down
1 change: 1 addition & 0 deletions Common/GPU/OpenGL/GLQueueRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ struct GLRRenderData {
GLenum frontFace;
GLenum cullFace;
GLboolean ditherEnable;
GLboolean depthClampEnable;
} raster;
};
};
Expand Down
3 changes: 2 additions & 1 deletion Common/GPU/OpenGL/GLRenderManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -813,13 +813,14 @@ class GLRenderManager {
curRenderStep_->commands.push_back(data);
}

void SetRaster(GLboolean cullEnable, GLenum frontFace, GLenum cullFace, GLboolean ditherEnable) {
void SetRaster(GLboolean cullEnable, GLenum frontFace, GLenum cullFace, GLboolean ditherEnable, GLboolean depthClamp) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::RASTER };
data.raster.cullEnable = cullEnable;
data.raster.frontFace = frontFace;
data.raster.cullFace = cullFace;
data.raster.ditherEnable = ditherEnable;
data.raster.depthClampEnable = depthClamp;
curRenderStep_->commands.push_back(data);
}

Expand Down
8 changes: 7 additions & 1 deletion Common/GPU/OpenGL/thin3d_gl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ class OpenGLDepthStencilState : public DepthStencilState {
class OpenGLRasterState : public RasterState {
public:
void Apply(GLRenderManager *render) {
render->SetRaster(cullEnable, frontFace, cullMode, false);
render->SetRaster(cullEnable, frontFace, cullMode, GL_FALSE, GL_FALSE);
}

GLboolean cullEnable;
Expand Down Expand Up @@ -533,6 +533,12 @@ OpenGLContext::OpenGLContext() {
}
caps_.framebufferBlitSupported = gl_extensions.NV_framebuffer_blit || gl_extensions.ARB_framebuffer_object;
caps_.framebufferDepthBlitSupported = caps_.framebufferBlitSupported;
caps_.depthClampSupported = gl_extensions.ARB_depth_clamp;

// Interesting potential hack for emulating GL_DEPTH_CLAMP (use a separate varying, force depth in fragment shader):
// This will induce a performance penalty on many architectures though so a blanket enable of this
// is probably not a good idea.
// https://stackoverflow.com/questions/5960757/how-to-emulate-gl-depth-clamp-nv
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably should do this, but I'd think we might need a setting - it might make things a lot slower in some games where people might be willing to withstand weird clipping issues near the edges for speed...

In some games depending on how they draw, it might not be that big a difference I suppose.

-[Unknown]

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, will depend a lot on the GPU architectures too, changing gl_FragDepth can have between nearly none to a very large performance penalty, I believe. So a blanket enable is probably not realistic, adding that to the comment.


switch (gl_extensions.gpuVendor) {
case GPU_VENDOR_AMD: caps_.vendor = GPUVendor::VENDOR_AMD; break;
Expand Down
7 changes: 7 additions & 0 deletions GPU/Common/DepalettizeShaderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "Common/Log.h"
#include "Core/Reporting.h"
#include "GPU/GPUState.h"
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/Common/DepalettizeShaderCommon.h"

#define WRITE p+=sprintf
Expand Down Expand Up @@ -71,6 +72,12 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang
WRITE(p, "out vec4 fragColor0;\n");
WRITE(p, "uniform sampler2D tex;\n");
WRITE(p, "uniform sampler2D pal;\n");

if (pixelFormat == GE_FORMAT_DEPTH16) {
DepthScaleFactors factors = GetDepthScaleFactors();
WRITE(p, "const float z_scale = %f;\n", factors.scale);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case, I'd recommend factors.scale * 1.0f / 65535.0f. Then below it'd just be color = (color - z_offset) * z_scale; right?

These values wouldn't vary, so we could avoid the formula when offset is 0 and scale is 65535, which will be true if clamp is enabled.

-[Unknown]

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, I don't see how dividing by 65535 helps here. Then we'd just have to scale it up again to turn it into an integer index later...

Indeed, the values are constant, but I'm thinking that we might be able to also support this when we use the depth rescaling/offset stuff to simulate clamping, in some cases, so want to keep the flexibility. The distortion you mentioned earlier kind of prevents that though I guess.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I missed the other code below that wants it as an int. This doesn't already work with the 1/4 scale thing?

Also in that case, how does WRITE(p, " float color = tex.Sample(texSamp, v_texcoord0).x;\n"); work with color.x later?

-[Unknown]

Copy link
Owner Author

@hrydgard hrydgard Feb 1, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure honestly if it works correctly with the 1/4 scale. But hopefully it does.

You can take .x of a float in HLSL, it's effectively a vec1. However it does look confusing...

WRITE(p, "const float z_offset = %f;\n", factors.offset);
}
}

if (language == HLSL_D3D11) {
Expand Down
2 changes: 1 addition & 1 deletion GPU/GLES/DepthBufferGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ void FramebufferManagerGLES::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int
// We must bind the program after starting the render pass, and set the color mask after clearing.
render_->SetScissor({ 0, 0, vfb->renderWidth, vfb->renderHeight });
render_->SetDepth(false, false, GL_ALWAYS);
render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE);
render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE, GL_FALSE);
render_->BindProgram(depthDownloadProgram_);

if (!gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {
Expand Down
2 changes: 1 addition & 1 deletion GPU/GLES/FramebufferManagerGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ void FramebufferManagerGLES::DrawActiveTexture(float x, float y, float w, float

// We always want a plain state here, well, except for when it's used by the stencil stuff...
render_->SetDepth(false, false, GL_ALWAYS);
render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE);
render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE, GL_FALSE);
if (!(flags & DRAWTEX_KEEP_STENCIL_ALPHA)) {
render_->SetNoBlendAndMask(0xF);
render_->SetStencilDisabled();
Expand Down
8 changes: 8 additions & 0 deletions GPU/GLES/GPU_GLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,14 @@ void GPU_GLES::CheckGPUFeatures() {
if (gl_extensions.ARB_texture_float || gl_extensions.OES_texture_float)
features |= GPU_SUPPORTS_TEXTURE_FLOAT;

if (draw_->GetDeviceCaps().depthClampSupported) {
features |= GPU_SUPPORTS_DEPTH_CLAMP | GPU_SUPPORTS_ACCURATE_DEPTH;
// Our implementation of depth texturing needs simple Z range, so can't
// use the extension hacks (yet).
if (gl_extensions.GLES3)
features |= GPU_SUPPORTS_DEPTH_TEXTURE;
}

// If we already have a 16-bit depth buffer, we don't need to round.
bool prefer24 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D24_S8;
if (prefer24) {
Expand Down
18 changes: 17 additions & 1 deletion GPU/GLES/StateMappingGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,23 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
GLenum cullMode = cullingMode[gstate.getCullMode() ^ !useBufferedRendering];

cullEnable = !gstate.isModeClear() && prim != GE_PRIM_RECTANGLES && gstate.isCullEnabled();
renderManager->SetRaster(cullEnable, GL_CCW, cullMode, dither);

bool depthClampEnable = false;
if (gstate.isModeClear() || gstate.isModeThrough()) {
// TODO: Might happen in clear mode if not through...
depthClampEnable = false;
} else {
if (gstate.getDepthRangeMin() == 0 || gstate.getDepthRangeMax() == 65535) {
// TODO: Still has a bug where we clamp to depth range if one is not the full range.
// But the alternate is not clamping in either direction...
depthClampEnable = gstate.isDepthClampEnabled() && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP);
} else {
// We just want to clip in this case, the clamp would be clipped anyway.
depthClampEnable = false;
}
}

renderManager->SetRaster(cullEnable, GL_CCW, cullMode, dither, depthClampEnable);
}

if (gstate_c.IsDirty(DIRTY_DEPTHSTENCIL_STATE)) {
Expand Down
2 changes: 1 addition & 1 deletion GPU/GLES/StencilBufferGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, StencilUplo
render_->SetDepth(false, false, GL_ALWAYS);
render_->Clear(0, 0, 0, GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT, 0x8, 0, 0, 0, 0);
render_->SetStencilFunc(GL_TRUE, GL_ALWAYS, 0xFF, 0xFF);
render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE);
render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE, GL_FALSE);
render_->BindProgram(stencilUploadProgram_);
render_->SetNoBlendAndMask(0x8);

Expand Down
15 changes: 9 additions & 6 deletions GPU/GLES/TextureCacheGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,13 +341,15 @@ class TextureShaderApplier {
};

void TextureCacheGLES::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) {
DepalShader *depal = nullptr;
DepalShader *depalShader = nullptr;
uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
bool need_depalettize = IsClutFormat(texFormat);

bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && (gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300) || gstate_c.Supports(GPU_SUPPORTS_GLSL_330));
bool depth = channel == NOTIFY_FB_DEPTH;
bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && (gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300) || gstate_c.Supports(GPU_SUPPORTS_GLSL_330)) && !depth;
if (!gstate_c.Supports(GPU_SUPPORTS_32BIT_INT_FSHADER)) {
useShaderDepal = false;
depth = false; // Can't support this
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, is it better to use the color part of the framebuf or just ignore the attachment I wonder?

-[Unknown]

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The color part probably isn't usually very meaningful when you're trying to read the depth, so ignoring is probably better... unfortunately currently no good way to bind an empty texture here? Hm.

Copy link
Collaborator

@unknownbrackets unknownbrackets Jul 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the better thing would be a Supports flag and handle this when choosing candidates... (maybe we already even do that?)

-[Unknown]

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think we already do...

}

if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) {
Expand Down Expand Up @@ -375,10 +377,10 @@ void TextureCacheGLES::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer,
return;
}

depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat);
gstate_c.SetUseShaderDepal(false);
depal = depalShaderCache_->GetDepalettizeShader(clutMode, framebuffer->drawnFormat);
}
if (depal) {
if (depalShader) {
shaderManager_->DirtyLastShader();

const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
Expand All @@ -388,11 +390,12 @@ void TextureCacheGLES::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer,

render_->SetScissor(GLRect2D{ 0, 0, (int)framebuffer->renderWidth, (int)framebuffer->renderHeight });
render_->SetViewport(GLRViewport{ 0.0f, 0.0f, (float)framebuffer->renderWidth, (float)framebuffer->renderHeight, 0.0f, 1.0f });
TextureShaderApplier shaderApply(depal, framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight);
TextureShaderApplier shaderApply(depalShader, framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight);
shaderApply.ApplyBounds(gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset);
shaderApply.Use(render_, drawEngine_, shadeInputLayout_);

framebufferManagerGL_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_SKIP_COPY | BINDFBCOLOR_FORCE_SELF);
draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0);

render_->BindTexture(TEX_SLOT_CLUT, clutTexture);
render_->SetTextureSampler(TEX_SLOT_CLUT, GL_REPEAT, GL_CLAMP_TO_EDGE, GL_NEAREST, GL_NEAREST, 0.0f);

Expand Down
2 changes: 1 addition & 1 deletion GPU/Vulkan/GPU_Vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ void GPU_Vulkan::CheckGPUFeatures() {
}

// Might enable this later - in the first round we are mostly looking at depth/stencil/discard.
// if (g_Config.bDisableVendorBugChecks)
// if (!g_Config.bEnableVendorBugChecks)
// features |= GPU_SUPPORTS_ACCURATE_DEPTH;

// Mandatory features on Vulkan, which may be checked in "centralized" code
Expand Down