hrydgard · hrydgard · Jul 9, 2021 · Nov 10, 2020 · Aug 9, 2020 · Jan 31, 2021
diff --git a/Common/GPU/OpenGL/GLQueueRunner.cpp b/Common/GPU/OpenGL/GLQueueRunner.cpp
@@ -793,6 +793,7 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
 	bool blendEnabled = false;
 	bool cullEnabled = false;
 	bool ditherEnabled = false;
+	bool depthClampEnabled = false;
 #ifndef USING_GLES2
 	int logicOp = -1;
 	bool logicEnabled = false;
@@ -1283,6 +1284,17 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
 				glDisable(GL_DITHER);
 				ditherEnabled = false;
 			}
+#ifndef USING_GLES2
+			if (c.raster.depthClampEnable) {
+				if (!depthClampEnabled) {
+					glEnable(GL_DEPTH_CLAMP);
+					depthClampEnabled = true;
+				}
+			} else if (!c.raster.depthClampEnable && depthClampEnabled) {
+				glDisable(GL_DEPTH_CLAMP);
+				depthClampEnabled = false;
+			}
+#endif
 			CHECK_GL_ERROR_IF_DEBUG();
 			break;
 		default:
@@ -1322,6 +1334,8 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
 	if (cullEnabled)
 		glDisable(GL_CULL_FACE);
 #ifndef USING_GLES2
+	if (depthClampEnabled)
+		glDisable(GL_DEPTH_CLAMP);
 	if (!gl_extensions.IsGLES && logicEnabled) {
 		glDisable(GL_COLOR_LOGIC_OP);
 	}

diff --git a/Common/GPU/OpenGL/GLQueueRunner.h b/Common/GPU/OpenGL/GLQueueRunner.h
@@ -196,6 +196,7 @@ struct GLRRenderData {
 			GLenum frontFace;
 			GLenum cullFace;
 			GLboolean ditherEnable;
+			GLboolean depthClampEnable;
 		} raster;
 	};
 };

diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h
@@ -813,13 +813,14 @@ class GLRenderManager {
 		curRenderStep_->commands.push_back(data);
 	}
 
-	void SetRaster(GLboolean cullEnable, GLenum frontFace, GLenum cullFace, GLboolean ditherEnable) {
+	void SetRaster(GLboolean cullEnable, GLenum frontFace, GLenum cullFace, GLboolean ditherEnable, GLboolean depthClamp) {
 		_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
 		GLRRenderData data{ GLRRenderCommand::RASTER };
 		data.raster.cullEnable = cullEnable;
 		data.raster.frontFace = frontFace;
 		data.raster.cullFace = cullFace;
 		data.raster.ditherEnable = ditherEnable;
+		data.raster.depthClampEnable = depthClamp;
 		curRenderStep_->commands.push_back(data);
 	}
 

diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp
@@ -188,7 +188,7 @@ class OpenGLDepthStencilState : public DepthStencilState {
 class OpenGLRasterState : public RasterState {
 public:
 	void Apply(GLRenderManager *render) {
-		render->SetRaster(cullEnable, frontFace, cullMode, false);
+		render->SetRaster(cullEnable, frontFace, cullMode, GL_FALSE, GL_FALSE);
 	}
 
 	GLboolean cullEnable;
@@ -533,6 +533,12 @@ OpenGLContext::OpenGLContext() {
 	}
 	caps_.framebufferBlitSupported = gl_extensions.NV_framebuffer_blit || gl_extensions.ARB_framebuffer_object;
 	caps_.framebufferDepthBlitSupported = caps_.framebufferBlitSupported;
+	caps_.depthClampSupported = gl_extensions.ARB_depth_clamp;
+
+	// Interesting potential hack for emulating GL_DEPTH_CLAMP (use a separate varying, force depth in fragment shader):
+	// This will induce a performance penalty on many architectures though so a blanket enable of this
+	// is probably not a good idea.
+	// https://stackoverflow.com/questions/5960757/how-to-emulate-gl-depth-clamp-nv
 
 	switch (gl_extensions.gpuVendor) {
 	case GPU_VENDOR_AMD: caps_.vendor = GPUVendor::VENDOR_AMD; break;

diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp
@@ -25,6 +25,7 @@
 #include "Common/Log.h"
 #include "Core/Reporting.h"
 #include "GPU/GPUState.h"
+#include "GPU/Common/GPUStateUtils.h"
 #include "GPU/Common/DepalettizeShaderCommon.h"
 
 #define WRITE p+=sprintf
@@ -71,6 +72,12 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang
 		WRITE(p, "out vec4 fragColor0;\n");
 		WRITE(p, "uniform sampler2D tex;\n");
 		WRITE(p, "uniform sampler2D pal;\n");
+
+		if (pixelFormat == GE_FORMAT_DEPTH16) {
+			DepthScaleFactors factors = GetDepthScaleFactors();
+			WRITE(p, "const float z_scale = %f;\n", factors.scale);
+			WRITE(p, "const float z_offset = %f;\n", factors.offset);
+		}
 	}
 
 	if (language == HLSL_D3D11) {

diff --git a/GPU/GLES/DepthBufferGLES.cpp b/GPU/GLES/DepthBufferGLES.cpp
@@ -133,7 +133,7 @@ void FramebufferManagerGLES::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int
 		// We must bind the program after starting the render pass, and set the color mask after clearing.
 		render_->SetScissor({ 0, 0, vfb->renderWidth, vfb->renderHeight });
 		render_->SetDepth(false, false, GL_ALWAYS);
-		render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE);
+		render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE, GL_FALSE);
 		render_->BindProgram(depthDownloadProgram_);
 
 		if (!gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {

diff --git a/GPU/GLES/FramebufferManagerGLES.cpp b/GPU/GLES/FramebufferManagerGLES.cpp
@@ -203,7 +203,7 @@ void FramebufferManagerGLES::DrawActiveTexture(float x, float y, float w, float
 
 	// We always want a plain state here, well, except for when it's used by the stencil stuff...
 	render_->SetDepth(false, false, GL_ALWAYS);
-	render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE);
+	render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE, GL_FALSE);
 	if (!(flags & DRAWTEX_KEEP_STENCIL_ALPHA)) {
 		render_->SetNoBlendAndMask(0xF);
 		render_->SetStencilDisabled();

diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp
@@ -221,6 +221,14 @@ void GPU_GLES::CheckGPUFeatures() {
 	if (gl_extensions.ARB_texture_float || gl_extensions.OES_texture_float)
 		features |= GPU_SUPPORTS_TEXTURE_FLOAT;
 
+	if (draw_->GetDeviceCaps().depthClampSupported) {
+		features |= GPU_SUPPORTS_DEPTH_CLAMP | GPU_SUPPORTS_ACCURATE_DEPTH;
+		// Our implementation of depth texturing needs simple Z range, so can't
+		// use the extension hacks (yet).
+		if (gl_extensions.GLES3)
+			features |= GPU_SUPPORTS_DEPTH_TEXTURE;
+	}
+
 	// If we already have a 16-bit depth buffer, we don't need to round.
 	bool prefer24 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D24_S8;
 	if (prefer24) {

diff --git a/GPU/GLES/StateMappingGLES.cpp b/GPU/GLES/StateMappingGLES.cpp
@@ -230,7 +230,23 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
 		GLenum cullMode = cullingMode[gstate.getCullMode() ^ !useBufferedRendering];
 
 		cullEnable = !gstate.isModeClear() && prim != GE_PRIM_RECTANGLES && gstate.isCullEnabled();
-		renderManager->SetRaster(cullEnable, GL_CCW, cullMode, dither);
+
+		bool depthClampEnable = false;
+		if (gstate.isModeClear() || gstate.isModeThrough()) {
+			// TODO: Might happen in clear mode if not through...
+			depthClampEnable = false;
+		} else {
+			if (gstate.getDepthRangeMin() == 0 || gstate.getDepthRangeMax() == 65535) {
+				// TODO: Still has a bug where we clamp to depth range if one is not the full range.
+				// But the alternate is not clamping in either direction...
+				depthClampEnable = gstate.isDepthClampEnabled() && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP);
+			} else {
+				// We just want to clip in this case, the clamp would be clipped anyway.
+				depthClampEnable = false;
+			}
+		}
+
+		renderManager->SetRaster(cullEnable, GL_CCW, cullMode, dither, depthClampEnable);
 	}
 
 	if (gstate_c.IsDirty(DIRTY_DEPTHSTENCIL_STATE)) {

diff --git a/GPU/GLES/StencilBufferGLES.cpp b/GPU/GLES/StencilBufferGLES.cpp
@@ -191,7 +191,7 @@ bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, StencilUplo
 	render_->SetDepth(false, false, GL_ALWAYS);
 	render_->Clear(0, 0, 0, GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT, 0x8, 0, 0, 0, 0);
 	render_->SetStencilFunc(GL_TRUE, GL_ALWAYS, 0xFF, 0xFF);
-	render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE);
+	render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE, GL_FALSE);
 	render_->BindProgram(stencilUploadProgram_);
 	render_->SetNoBlendAndMask(0x8);
 

diff --git a/GPU/GLES/TextureCacheGLES.cpp b/GPU/GLES/TextureCacheGLES.cpp
@@ -341,13 +341,15 @@ class TextureShaderApplier {
 };
 
 void TextureCacheGLES::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) {
-	DepalShader *depal = nullptr;
+	DepalShader *depalShader = nullptr;
 	uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
 	bool need_depalettize = IsClutFormat(texFormat);
 
-	bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && (gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300) || gstate_c.Supports(GPU_SUPPORTS_GLSL_330));
+	bool depth = channel == NOTIFY_FB_DEPTH;
+	bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && (gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300) || gstate_c.Supports(GPU_SUPPORTS_GLSL_330)) && !depth;
 	if (!gstate_c.Supports(GPU_SUPPORTS_32BIT_INT_FSHADER)) {
 		useShaderDepal = false;
+		depth = false;  // Can't support this
 	}
 
 	if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) {
@@ -375,10 +377,10 @@ void TextureCacheGLES::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer,
 			return;
 		}
 
+		depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat);
 		gstate_c.SetUseShaderDepal(false);
-		depal = depalShaderCache_->GetDepalettizeShader(clutMode, framebuffer->drawnFormat);
 	}
-	if (depal) {
+	if (depalShader) {
 		shaderManager_->DirtyLastShader();
 
 		const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
@@ -388,11 +390,12 @@ void TextureCacheGLES::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer,
 
 		render_->SetScissor(GLRect2D{ 0, 0, (int)framebuffer->renderWidth, (int)framebuffer->renderHeight });
 		render_->SetViewport(GLRViewport{ 0.0f, 0.0f, (float)framebuffer->renderWidth, (float)framebuffer->renderHeight, 0.0f, 1.0f });
-		TextureShaderApplier shaderApply(depal, framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight);
+		TextureShaderApplier shaderApply(depalShader, framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight);
 		shaderApply.ApplyBounds(gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset);
 		shaderApply.Use(render_, drawEngine_, shadeInputLayout_);
 
-		framebufferManagerGL_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_SKIP_COPY | BINDFBCOLOR_FORCE_SELF);
+		draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0);
+
 		render_->BindTexture(TEX_SLOT_CLUT, clutTexture);
 		render_->SetTextureSampler(TEX_SLOT_CLUT, GL_REPEAT, GL_CLAMP_TO_EDGE, GL_NEAREST, GL_NEAREST, 0.0f);
 

diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp
@@ -220,7 +220,7 @@ void GPU_Vulkan::CheckGPUFeatures() {
 	}
 
 	// Might enable this later - in the first round we are mostly looking at depth/stencil/discard.
-	// if (g_Config.bDisableVendorBugChecks)
+	// if (!g_Config.bEnableVendorBugChecks)
 	// 	features |= GPU_SUPPORTS_ACCURATE_DEPTH;
 
 	// Mandatory features on Vulkan, which may be checked in "centralized" code