From f5e6754ac013a97698ef4c3ded5780176f43d58c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 20 Jul 2022 00:05:07 +0200 Subject: [PATCH 1/4] Special case depal lookups for Test Drive's strange usage. This implements the hack I suggested in #13355, where instead of first reducing the color to RGB565 as the real game does, we just take each channel at full precision and do the lookup according to the mask, linearly filtering the palette. This makes the game look a lot nicer and is also a small optimization, but the hack is very specific so kinda ugly in a way. --- Core/Compatibility.cpp | 1 + Core/Compatibility.h | 1 + GPU/Common/FragmentShaderGenerator.cpp | 25 +++++++++++++++++++++++++ GPU/Vulkan/DrawEngineVulkan.cpp | 4 ++-- GPU/Vulkan/DrawEngineVulkan.h | 2 +- assets/compat.ini | 7 +++++++ 6 files changed, 37 insertions(+), 3 deletions(-) diff --git a/Core/Compatibility.cpp b/Core/Compatibility.cpp index f17c1fd6a1f4..7a5e12d34915 100644 --- a/Core/Compatibility.cpp +++ b/Core/Compatibility.cpp @@ -98,6 +98,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) { CheckSetting(iniFile, gameID, "AllowLargeFBTextureOffsets", &flags_.AllowLargeFBTextureOffsets); CheckSetting(iniFile, gameID, "AtracLoopHack", &flags_.AtracLoopHack); CheckSetting(iniFile, gameID, "DeswizzleDepth", &flags_.DeswizzleDepth); + CheckSetting(iniFile, gameID, "SmoothedDepal", &flags_.SmoothedDepal); } void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) { diff --git a/Core/Compatibility.h b/Core/Compatibility.h index 29345aee42ca..5d52e3aec928 100644 --- a/Core/Compatibility.h +++ b/Core/Compatibility.h @@ -88,6 +88,7 @@ struct CompatFlags { bool AllowLargeFBTextureOffsets; bool AtracLoopHack; bool DeswizzleDepth; + bool SmoothedDepal; }; class IniFile; diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index f956217b3884..e7fd8b5fedb6 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -23,8 +23,10 @@ #include "Common/GPU/OpenGL/GLFeatures.h" #include "Common/GPU/ShaderWriter.h" #include "Common/GPU/thin3d.h" +#include "Core/Compatibility.h" #include "Core/Reporting.h" #include "Core/Config.h" +#include "Core/System.h" #include "GPU/Common/GPUStateUtils.h" #include "GPU/Common/ShaderId.h" #include "GPU/Common/ShaderUniforms.h" @@ -88,6 +90,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool doFlatShading = id.Bit(FS_BIT_FLATSHADE) && !flatBug; bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL) && !texture3D; // combination with texture3D not supported. Enforced elsewhere too. + bool smoothedDepal = PSP_CoreParameter().compat.flags().SmoothedDepal; bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE); bool colorWriteMask = id.Bit(FS_BIT_COLOR_WRITEMASK) && compat.bitwiseOps; @@ -590,6 +593,28 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } } } + } else if (shaderDepal && smoothedDepal) { + // Specific mode for Test Drive. Fixes the banding. + if (doTextureProjection) { + // We don't use textureProj because we need better control and it's probably not much of a savings anyway. + // However it is good for precision on older hardware like PowerVR. + WRITE(p, " vec2 uv = %s.xy/%s.z;\n vec2 uv_round;\n", texcoord, texcoord); + } else { + WRITE(p, " vec2 uv = %s.xy;\n vec2 uv_round;\n", texcoord); + } + WRITE(p, " vec4 t = %s(tex, %s.xy);\n", compat.texture, texcoord); + WRITE(p, " uint depalShift = (u_depal_mask_shift_off_fmt >> 8) & 0xFFU;\n"); + WRITE(p, " uint depalFmt = (u_depal_mask_shift_off_fmt >> 24) & 0x3U;\n"); + WRITE(p, " float index0 = t.r;\n"); + WRITE(p, " float mul = 32.0 / 256.0;\n"); + WRITE(p, " if (depalFmt == 0) {\n"); // yes, different versions of Test Drive use different formats. Could do compile time by adding more compat flags but meh. + WRITE(p, " if (depalShift == 5) { index0 = t.g; mul = 64.0 / 256.0; }\n"); + WRITE(p, " else if (depalShift == 11) { index0 = t.b; }\n"); + WRITE(p, " } else {\n"); + WRITE(p, " if (depalShift == 5) { index0 = t.g; }\n"); + WRITE(p, " else if (depalShift == 10) { index0 = t.b; }\n"); + WRITE(p, " }\n"); + WRITE(p, " t = %s(pal, vec2(index0 * mul, 0.0));\n", compat.texture); } else { if (doTextureProjection) { // We don't use textureProj because we need better control and it's probably not much of a savings anyway. diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 256c1adbaca1..27d1fda33f95 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -185,8 +185,8 @@ void DrawEngineVulkan::InitDeviceObjects() { samp.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; samp.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; samp.flags = 0; - samp.magFilter = VK_FILTER_NEAREST; - samp.minFilter = VK_FILTER_NEAREST; + samp.magFilter = VK_FILTER_LINEAR; + samp.minFilter = VK_FILTER_LINEAR; res = vkCreateSampler(device, &samp, nullptr, &samplerSecondary_); _dbg_assert_(VK_SUCCESS == res); res = vkCreateSampler(device, &samp, nullptr, &nullSampler_); diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index 48b8369fd7ef..e4a75f2a35ea 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -218,7 +218,7 @@ class DrawEngineVulkan : public DrawEngineCommon { // Secondary texture for shader blending VkImageView boundSecondary_ = VK_NULL_HANDLE; VkImageView boundDepal_ = VK_NULL_HANDLE; - VkSampler samplerSecondary_ = VK_NULL_HANDLE; // This one is actually never used since we use fetch. + VkSampler samplerSecondary_ = VK_NULL_HANDLE; // This one is actually never used since we use fetch (except in SmoothedDepal mode for Test Drive). PrehashMap vai_; VulkanPushBuffer *vertexCache_; diff --git a/assets/compat.ini b/assets/compat.ini index 974a6216defd..55ddf688fc57 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -1276,3 +1276,10 @@ UCKS45048 = true UCJS18030 = true UCJS18047 = true NPJG00015 = true + +[SmoothedDepal] +# Test Drive Unlimited smoothed CLUT lookups. See comments in #13355 +ULET00386 = true +ULES00637 = true +ULKS46126 = true +ULUS10249 = true From 6558bde0f6fdc54d6d7787af472725d020090c40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 21 Aug 2022 23:46:01 +0200 Subject: [PATCH 2/4] Remove SmoothedDepal compat setting, instead detect the ramp directly. --- Core/Compatibility.cpp | 1 - Core/Compatibility.h | 1 - GPU/Common/FragmentShaderGenerator.cpp | 2 +- GPU/Common/ShaderId.cpp | 2 ++ GPU/Common/ShaderId.h | 1 + GPU/Common/TextureCacheCommon.cpp | 14 ++++----- GPU/Common/TextureShaderCommon.cpp | 41 +++++++++++++++++++++----- GPU/Common/TextureShaderCommon.h | 3 +- GPU/GLES/TextureCacheGLES.cpp | 2 +- GPU/GPUState.h | 4 ++- GPU/Vulkan/TextureCacheVulkan.cpp | 2 +- assets/compat.ini | 7 ----- 12 files changed, 51 insertions(+), 29 deletions(-) diff --git a/Core/Compatibility.cpp b/Core/Compatibility.cpp index 7a5e12d34915..f17c1fd6a1f4 100644 --- a/Core/Compatibility.cpp +++ b/Core/Compatibility.cpp @@ -98,7 +98,6 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) { CheckSetting(iniFile, gameID, "AllowLargeFBTextureOffsets", &flags_.AllowLargeFBTextureOffsets); CheckSetting(iniFile, gameID, "AtracLoopHack", &flags_.AtracLoopHack); CheckSetting(iniFile, gameID, "DeswizzleDepth", &flags_.DeswizzleDepth); - CheckSetting(iniFile, gameID, "SmoothedDepal", &flags_.SmoothedDepal); } void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) { diff --git a/Core/Compatibility.h b/Core/Compatibility.h index 5d52e3aec928..29345aee42ca 100644 --- a/Core/Compatibility.h +++ b/Core/Compatibility.h @@ -88,7 +88,6 @@ struct CompatFlags { bool AllowLargeFBTextureOffsets; bool AtracLoopHack; bool DeswizzleDepth; - bool SmoothedDepal; }; class IniFile; diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index e7fd8b5fedb6..a38e70fd5c69 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -90,7 +90,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool doFlatShading = id.Bit(FS_BIT_FLATSHADE) && !flatBug; bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL) && !texture3D; // combination with texture3D not supported. Enforced elsewhere too. - bool smoothedDepal = PSP_CoreParameter().compat.flags().SmoothedDepal; + bool smoothedDepal = id.Bit(FS_BIT_SHADER_SMOOTHED_DEPAL); bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE); bool colorWriteMask = id.Bit(FS_BIT_COLOR_WRITEMASK) && compat.bitwiseOps; diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 510e90e9297e..849a1b8b193d 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -261,6 +261,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) { bool doTextureAlpha = gstate.isTextureAlphaUsed(); bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT; bool useShaderDepal = gstate_c.useShaderDepal; + bool useSmoothedDepal = gstate_c.useSmoothedShaderDepal; bool colorWriteMask = IsColorWriteMaskComplex(gstate_c.allowFramebufferRead); // Note how we here recompute some of the work already done in state mapping. @@ -290,6 +291,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) { } id.SetBit(FS_BIT_BGRA_TEXTURE, gstate_c.bgraTexture); id.SetBit(FS_BIT_SHADER_DEPAL, useShaderDepal); + id.SetBit(FS_BIT_SHADER_SMOOTHED_DEPAL, useSmoothedDepal); id.SetBit(FS_BIT_3D_TEXTURE, gstate_c.curTextureIs3D); } diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index 8dcee32c1ea4..a105af67462c 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -94,6 +94,7 @@ enum FShaderBit : uint8_t { FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL = 49, FS_BIT_COLOR_WRITEMASK = 50, FS_BIT_3D_TEXTURE = 51, + FS_BIT_SHADER_SMOOTHED_DEPAL = 52, }; static inline FShaderBit operator +(FShaderBit bit, int i) { diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index de6c840722a6..6d14e0cd4ed5 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -1886,8 +1886,8 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); // Very icky conflation here of native and thin3d rendering. This will need careful work per backend in BindAsClutTexture. - Draw::Texture *clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); - BindAsClutTexture(clutTexture); + ClutTexture clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); + BindAsClutTexture(clutTexture.texture); framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); // Vulkan needs to do some extra work here to pick out the native handle from Draw. @@ -1901,7 +1901,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer // Since we started/ended render passes, might need these. gstate_c.Dirty(DIRTY_DEPAL); - gstate_c.SetUseShaderDepal(true); + gstate_c.SetUseShaderDepal(true, gstate.getClutIndexStartPos() == 0 && gstate.getClutIndexMask() <= clutTexture.rampLength); gstate_c.depalFramebufferFormat = framebuffer->drawnFormat; const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16); const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor; @@ -1914,12 +1914,12 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer } textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); - gstate_c.SetUseShaderDepal(false); + gstate_c.SetUseShaderDepal(false, false); } if (textureShader) { const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); - Draw::Texture *clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); + ClutTexture clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); Draw::Framebuffer *depalFBO = framebufferManager_->GetTempFBO(TempFBO::DEPAL, framebuffer->renderWidth, framebuffer->renderHeight); draw_->BindTexture(0, nullptr); draw_->BindTexture(1, nullptr); @@ -1930,7 +1930,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer draw_->SetViewports(1, &vp); draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0); - draw_->BindTexture(1, clutTexture); + draw_->BindTexture(1, clutTexture.texture); Draw::SamplerState *nearest = textureShaderCache_->GetSampler(); draw_->BindSamplerStates(0, 1, &nearest); draw_->BindSamplerStates(1, 1, &nearest); @@ -1958,7 +1958,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); BoundFramebufferTexture(); - gstate_c.SetUseShaderDepal(false); + gstate_c.SetUseShaderDepal(false, false); gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650); } diff --git a/GPU/Common/TextureShaderCommon.cpp b/GPU/Common/TextureShaderCommon.cpp index defbeb1575bc..1f684af5da7c 100644 --- a/GPU/Common/TextureShaderCommon.cpp +++ b/GPU/Common/TextureShaderCommon.cpp @@ -51,22 +51,22 @@ void TextureShaderCache::DeviceLost() { Clear(); } -Draw::Texture *TextureShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut) { +ClutTexture TextureShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut) { // Simplistic, but works well enough. u32 clutId = clutHash ^ (uint32_t)clutFormat; auto oldtex = texCache_.find(clutId); if (oldtex != texCache_.end()) { oldtex->second->lastFrame = gpuStats.numFlips; - return oldtex->second->texture; + return *oldtex->second; } - int texturePixels = clutFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512; + int maxClutEntries = clutFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512; ClutTexture *tex = new ClutTexture(); Draw::TextureDesc desc{}; - desc.width = texturePixels; + desc.width = maxClutEntries; desc.height = 1; desc.depth = 1; desc.mipLevels = 1; @@ -81,24 +81,49 @@ Draw::Texture *TextureShaderCache::GetClutTexture(GEPaletteFormat clutFormat, co desc.initData.push_back((const uint8_t *)rawClut); break; case GEPaletteFormat::GE_CMODE_16BIT_BGR5650: - ConvertRGB565ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, texturePixels); + ConvertRGB565ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, maxClutEntries); desc.initData.push_back(convTemp); break; case GEPaletteFormat::GE_CMODE_16BIT_ABGR5551: - ConvertRGBA5551ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, texturePixels); + ConvertRGBA5551ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, maxClutEntries); desc.initData.push_back(convTemp); break; case GEPaletteFormat::GE_CMODE_16BIT_ABGR4444: - ConvertRGBA4444ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, texturePixels); + ConvertRGBA4444ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, maxClutEntries); desc.initData.push_back(convTemp); break; } + int lastR = 0; + int lastG = 0; + int lastB = 0; + int lastA = 0; + + int rampLength = 0; + // Quick check for how many continouosly growing entries we have at the start. + // Bilinearly filtering CLUTs only really makes sense for this kind of ramp. + for (int i = 0; i < maxClutEntries; i++) { + rampLength = i + 1; + int r = desc.initData[0][i * 4]; + int g = desc.initData[0][i * 4 + 1]; + int b = desc.initData[0][i * 4 + 2]; + int a = desc.initData[0][i * 4 + 3]; + if (r < lastR || g < lastG || b < lastB || a < lastA) { + break; + } else { + lastR = r; + lastG = g; + lastB = b; + lastA = a; + } + } + tex->texture = draw_->CreateTexture(desc); tex->lastFrame = gpuStats.numFlips; + tex->rampLength = rampLength; texCache_[clutId] = tex; - return tex->texture; + return *tex; } void TextureShaderCache::Clear() { diff --git a/GPU/Common/TextureShaderCommon.h b/GPU/Common/TextureShaderCommon.h index e2967ea89c3d..f5ff0af8dcd8 100644 --- a/GPU/Common/TextureShaderCommon.h +++ b/GPU/Common/TextureShaderCommon.h @@ -39,6 +39,7 @@ class ClutTexture { public: Draw::Texture *texture; int lastFrame; + int rampLength; }; // For CLUT depal shaders, and other pre-bind texture shaders. @@ -49,7 +50,7 @@ class TextureShaderCache { ~TextureShaderCache(); TextureShader *GetDepalettizeShader(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat); - Draw::Texture *GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut); + ClutTexture GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut); Draw::SamplerState *GetSampler(); diff --git a/GPU/GLES/TextureCacheGLES.cpp b/GPU/GLES/TextureCacheGLES.cpp index 6fb19338d9d3..3af948856057 100644 --- a/GPU/GLES/TextureCacheGLES.cpp +++ b/GPU/GLES/TextureCacheGLES.cpp @@ -225,7 +225,7 @@ void TextureCacheGLES::BindTexture(TexCacheEntry *entry) { int maxLevel = (entry->status & TexCacheEntry::STATUS_NO_MIPS) ? 0 : entry->maxLevel; SamplerCacheKey samplerKey = GetSamplingParams(maxLevel, entry); ApplySamplingParams(samplerKey); - gstate_c.SetUseShaderDepal(false); + gstate_c.SetUseShaderDepal(false, false); } void TextureCacheGLES::Unbind() { diff --git a/GPU/GPUState.h b/GPU/GPUState.h index ae1c2c651045..f75a09fdade6 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -529,9 +529,10 @@ struct GPUStateCache { bool IsDirty(u64 what) const { return (dirty & what) != 0ULL; } - void SetUseShaderDepal(bool depal) { + void SetUseShaderDepal(bool depal, bool smoothed) { if (depal != useShaderDepal) { useShaderDepal = depal; + useSmoothedShaderDepal = smoothed; Dirty(DIRTY_FRAGMENTSHADER_STATE); } } @@ -635,6 +636,7 @@ struct GPUStateCache { int spline_num_points_u; bool useShaderDepal; + bool useSmoothedShaderDepal; GEBufferFormat depalFramebufferFormat; u32 getRelativeAddress(u32 data) const; diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 6c15264b1f71..522d88ce5a05 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -402,7 +402,7 @@ void TextureCacheVulkan::BindTexture(TexCacheEntry *entry) { curSampler_ = samplerCache_.GetOrCreateSampler(samplerKey); imageView_ = entry->vkTex->GetImageView(); drawEngine_->SetDepalTexture(VK_NULL_HANDLE); - gstate_c.SetUseShaderDepal(false); + gstate_c.SetUseShaderDepal(false, false); } void TextureCacheVulkan::ApplySamplingParams(const SamplerCacheKey &key) { diff --git a/assets/compat.ini b/assets/compat.ini index 55ddf688fc57..974a6216defd 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -1276,10 +1276,3 @@ UCKS45048 = true UCJS18030 = true UCJS18047 = true NPJG00015 = true - -[SmoothedDepal] -# Test Drive Unlimited smoothed CLUT lookups. See comments in #13355 -ULET00386 = true -ULES00637 = true -ULKS46126 = true -ULUS10249 = true From 2a6015c1e3b7ae3ac0ee87584d4edd32cfaceb13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 22 Aug 2022 10:32:02 +0200 Subject: [PATCH 3/4] Better checks for smoothed depal --- GPU/Common/FragmentShaderGenerator.cpp | 3 +++ GPU/Common/TextureCacheCommon.cpp | 28 +++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index a38e70fd5c69..f278363e5f1f 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -602,6 +602,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } else { WRITE(p, " vec2 uv = %s.xy;\n vec2 uv_round;\n", texcoord); } + // Restrictions on this are checked before setting the smoothed flag. + // Only RGB565 and RGBA5551 are supported, and only the specific shifts hitting the + // channels directly. WRITE(p, " vec4 t = %s(tex, %s.xy);\n", compat.texture, texcoord); WRITE(p, " uint depalShift = (u_depal_mask_shift_off_fmt >> 8) & 0xFFU;\n"); WRITE(p, " uint depalFmt = (u_depal_mask_shift_off_fmt >> 24) & 0x3U;\n"); diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 6d14e0cd4ed5..93d120128679 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -1859,6 +1859,31 @@ bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) { } } +// If the palette is detected as a smooth ramp, we can interpolate for higher color precision. +// But we only do it if the mask/shift exactly matches a color channel, else something different might be going +// on and we definitely don't want to interpolate. +// Great enhancement for Test Drive. +static bool CanUseSmoothDepal(const GPUgstate &gstate, GEBufferFormat framebufferFormat, int rampLength) { + if (gstate.getClutIndexStartPos() == 0 && + gstate.getClutIndexMask() <= rampLength) { + switch (framebufferFormat) { + case GE_FORMAT_565: + if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 11) { + return gstate.getClutIndexMask() == 0x1F; + } else if (gstate.getClutIndexShift() == 5) { + return gstate.getClutIndexMask() == 0x3F; + } + break; + case GE_FORMAT_5551: + if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 5 || gstate.getClutIndexShift() == 10) { + return gstate.getClutIndexMask() == 0x1F; + } + break; + } + } + return false; +} + void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, RasterChannel channel) { TextureShader *textureShader = nullptr; uint32_t clutMode = gstate.clutformat & 0xFFFFFF; @@ -1901,7 +1926,8 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer // Since we started/ended render passes, might need these. gstate_c.Dirty(DIRTY_DEPAL); - gstate_c.SetUseShaderDepal(true, gstate.getClutIndexStartPos() == 0 && gstate.getClutIndexMask() <= clutTexture.rampLength); + + gstate_c.SetUseShaderDepal(true, CanUseSmoothDepal(gstate, framebuffer->drawnFormat, clutTexture.rampLength)); gstate_c.depalFramebufferFormat = framebuffer->drawnFormat; const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16); const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor; From e3943f6d0ddf7ce9d5512d571cfa7ca0d3c8db8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 22 Aug 2022 11:07:25 +0200 Subject: [PATCH 4/4] Implement smoothed depal for the "old" depal path as well. --- GPU/Common/DepalettizeShaderCommon.cpp | 69 ++++++++++++++++++++------ GPU/Common/DepalettizeShaderCommon.h | 1 + GPU/Common/TextureCacheCommon.cpp | 18 ++++--- GPU/Common/TextureShaderCommon.cpp | 36 ++++++++++---- GPU/Common/TextureShaderCommon.h | 5 +- 5 files changed, 98 insertions(+), 31 deletions(-) diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp index 2b9ddca41348..6e34d0240242 100644 --- a/GPU/Common/DepalettizeShaderCommon.cpp +++ b/GPU/Common/DepalettizeShaderCommon.cpp @@ -45,7 +45,7 @@ static const VaryingDef varyings[1] = { }; // Uses integer instructions available since OpenGL 3.0, ES 3.0 (and 2.0 with extensions), and of course Vulkan and D3D11. -void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) { +void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) { const int shift = config.shift; const int mask = config.mask; @@ -140,7 +140,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con } // FP only, to suit GL(ES) 2.0 and DX9 -void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) { +void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config) { char lookupMethod[128] = "index.r"; const int shift = config.shift; @@ -288,23 +288,64 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n"); } +void GenerateDepalSmoothed(ShaderWriter &writer, const DepalConfig &config) { + const char *sourceChannel = "error"; + float indexMultiplier = 32.0f; + + if (config.bufferFormat == GE_FORMAT_5551) { + _dbg_assert_(config.mask == 0x1F); + switch (config.shift) { + case 0: sourceChannel = "r"; break; + case 5: sourceChannel = "g"; break; + case 10: sourceChannel = "b"; break; + default: _dbg_assert_(false); + } + } else if (config.bufferFormat == GE_FORMAT_565) { + _dbg_assert_(config.mask == 0x1F || config.mask == 0x3F); + switch (config.shift) { + case 0: sourceChannel = "r"; break; + case 5: sourceChannel = "g"; indexMultiplier = 64.0f; break; + case 11: sourceChannel = "b"; break; + default: _dbg_assert_(false); + } + } else { + _dbg_assert_(false); + } + + writer.C(" float index = ").SampleTexture2D("tex", "v_texcoord").F(".%s * %0.1f;\n", sourceChannel, indexMultiplier); + + float texturePixels = 256.f; + if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) { + texturePixels = 512.f; + } + + writer.F(" float coord = (index + 0.5) * %f;\n", 1.0 / texturePixels); + writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n"); +} + void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang) { ShaderWriter writer(buffer, lang, ShaderStage::Fragment); writer.DeclareSamplers(samplers); writer.HighPrecisionFloat(); writer.BeginFSMain(Slice::empty(), varyings, FSFLAG_NONE); - switch (lang.shaderLanguage) { - case HLSL_D3D9: - case GLSL_1xx: - GenerateDepalShaderFloat(writer, config, lang); - break; - case GLSL_VULKAN: - case GLSL_3xx: - case HLSL_D3D11: - GenerateDepalShader300(writer, config, lang); - break; - default: - _assert_msg_(false, "Depal shader language not supported: %d", (int)lang.shaderLanguage); + if (config.smoothedDepal) { + // Handles a limited set of cases, but doesn't need any integer math so we don't + // need two variants. + GenerateDepalSmoothed(writer, config); + } else { + switch (lang.shaderLanguage) { + case HLSL_D3D9: + case GLSL_1xx: + GenerateDepalShaderFloat(writer, config); + break; + case GLSL_VULKAN: + case GLSL_3xx: + case HLSL_D3D11: + GenerateDepalShader300(writer, config); + break; + default: + _assert_msg_(false, "Depal shader language not supported: %d", (int)lang.shaderLanguage); + } } writer.EndFSMain("outColor", FSFLAG_NONE); } diff --git a/GPU/Common/DepalettizeShaderCommon.h b/GPU/Common/DepalettizeShaderCommon.h index 91186f5d2038..322784c0f0d3 100644 --- a/GPU/Common/DepalettizeShaderCommon.h +++ b/GPU/Common/DepalettizeShaderCommon.h @@ -31,6 +31,7 @@ struct DepalConfig { GEPaletteFormat clutFormat; GETextureFormat textureFormat; GEBufferFormat bufferFormat; + bool smoothedDepal; }; void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang); diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 93d120128679..0f3e82ff49d3 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -1906,12 +1906,17 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer break; } + const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); + ClutTexture clutTexture{}; + bool smoothedDepal = false; + if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) { + clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); + smoothedDepal = CanUseSmoothDepal(gstate, framebuffer->drawnFormat, clutTexture.rampLength); + if (useShaderDepal) { - const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); // Very icky conflation here of native and thin3d rendering. This will need careful work per backend in BindAsClutTexture. - ClutTexture clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); BindAsClutTexture(clutTexture.texture); framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); @@ -1927,7 +1932,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer // Since we started/ended render passes, might need these. gstate_c.Dirty(DIRTY_DEPAL); - gstate_c.SetUseShaderDepal(true, CanUseSmoothDepal(gstate, framebuffer->drawnFormat, clutTexture.rampLength)); + gstate_c.SetUseShaderDepal(true, smoothedDepal); gstate_c.depalFramebufferFormat = framebuffer->drawnFormat; const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16); const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor; @@ -1939,7 +1944,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer return; } - textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); + textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat, smoothedDepal); gstate_c.SetUseShaderDepal(false, false); } @@ -1957,9 +1962,10 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0); draw_->BindTexture(1, clutTexture.texture); - Draw::SamplerState *nearest = textureShaderCache_->GetSampler(); + Draw::SamplerState *nearest = textureShaderCache_->GetSampler(false); + Draw::SamplerState *clutSampler = textureShaderCache_->GetSampler(smoothedDepal); draw_->BindSamplerStates(0, 1, &nearest); - draw_->BindSamplerStates(1, 1, &nearest); + draw_->BindSamplerStates(1, 1, &clutSampler); textureShaderCache_->ApplyShader(textureShader, framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight, diff --git a/GPU/Common/TextureShaderCommon.cpp b/GPU/Common/TextureShaderCommon.cpp index 1f684af5da7c..c30ccb0204a7 100644 --- a/GPU/Common/TextureShaderCommon.cpp +++ b/GPU/Common/TextureShaderCommon.cpp @@ -147,6 +147,10 @@ void TextureShaderCache::Clear() { nearestSampler_->Release(); nearestSampler_ = nullptr; } + if (linearSampler_) { + linearSampler_->Release(); + linearSampler_ = nullptr; + } } void TextureShaderCache::Decimate() { @@ -161,15 +165,28 @@ void TextureShaderCache::Decimate() { } } -Draw::SamplerState *TextureShaderCache::GetSampler() { - if (!nearestSampler_) { - Draw::SamplerStateDesc desc{}; - desc.wrapU = Draw::TextureAddressMode::CLAMP_TO_EDGE; - desc.wrapV = Draw::TextureAddressMode::CLAMP_TO_EDGE; - desc.wrapW = Draw::TextureAddressMode::CLAMP_TO_EDGE; - nearestSampler_ = draw_->CreateSamplerState(desc); +Draw::SamplerState *TextureShaderCache::GetSampler(bool linearFilter) { + if (linearFilter) { + if (!linearSampler_) { + Draw::SamplerStateDesc desc{}; + desc.magFilter = Draw::TextureFilter::LINEAR; + desc.minFilter = Draw::TextureFilter::LINEAR; + desc.wrapU = Draw::TextureAddressMode::CLAMP_TO_EDGE; + desc.wrapV = Draw::TextureAddressMode::CLAMP_TO_EDGE; + desc.wrapW = Draw::TextureAddressMode::CLAMP_TO_EDGE; + linearSampler_ = draw_->CreateSamplerState(desc); + } + return linearSampler_; + } else { + if (!nearestSampler_) { + Draw::SamplerStateDesc desc{}; + desc.wrapU = Draw::TextureAddressMode::CLAMP_TO_EDGE; + desc.wrapV = Draw::TextureAddressMode::CLAMP_TO_EDGE; + desc.wrapW = Draw::TextureAddressMode::CLAMP_TO_EDGE; + nearestSampler_ = draw_->CreateSamplerState(desc); + } + return nearestSampler_; } - return nearestSampler_; } TextureShader *TextureShaderCache::CreateShader(const char *fs) { @@ -220,7 +237,7 @@ TextureShader *TextureShaderCache::CreateShader(const char *fs) { return depal; } -TextureShader *TextureShaderCache::GetDepalettizeShader(uint32_t clutMode, GETextureFormat textureFormat, GEBufferFormat bufferFormat) { +TextureShader *TextureShaderCache::GetDepalettizeShader(uint32_t clutMode, GETextureFormat textureFormat, GEBufferFormat bufferFormat, bool smoothedDepal) { using namespace Draw; // Generate an ID for depal shaders. @@ -240,6 +257,7 @@ TextureShader *TextureShaderCache::GetDepalettizeShader(uint32_t clutMode, GETex config.mask = gstate.getClutIndexMask(); config.bufferFormat = bufferFormat; config.textureFormat = textureFormat; + config.smoothedDepal = smoothedDepal; char *buffer = new char[4096]; GenerateDepalFs(buffer, config, draw_->GetShaderLanguageDesc()); diff --git a/GPU/Common/TextureShaderCommon.h b/GPU/Common/TextureShaderCommon.h index f5ff0af8dcd8..583aa3d516f3 100644 --- a/GPU/Common/TextureShaderCommon.h +++ b/GPU/Common/TextureShaderCommon.h @@ -49,10 +49,10 @@ class TextureShaderCache { TextureShaderCache(Draw::DrawContext *draw); ~TextureShaderCache(); - TextureShader *GetDepalettizeShader(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat); + TextureShader *GetDepalettizeShader(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat, bool smoothedDepal); ClutTexture GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut); - Draw::SamplerState *GetSampler(); + Draw::SamplerState *GetSampler(bool linearFilter); void ApplyShader(TextureShader *shader, float bufferW, float bufferH, int renderW, int renderH, const KnownVertexBounds &bounds, u32 uoff, u32 voff); @@ -70,6 +70,7 @@ class TextureShaderCache { Draw::DrawContext *draw_; Draw::ShaderModule *vertexShader_ = nullptr; Draw::SamplerState *nearestSampler_ = nullptr; + Draw::SamplerState *linearSampler_ = nullptr; std::map depalCache_; std::map texCache_;