diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp index 2b9ddca41348..6e34d0240242 100644 --- a/GPU/Common/DepalettizeShaderCommon.cpp +++ b/GPU/Common/DepalettizeShaderCommon.cpp @@ -45,7 +45,7 @@ static const VaryingDef varyings[1] = { }; // Uses integer instructions available since OpenGL 3.0, ES 3.0 (and 2.0 with extensions), and of course Vulkan and D3D11. -void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) { +void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) { const int shift = config.shift; const int mask = config.mask; @@ -140,7 +140,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con } // FP only, to suit GL(ES) 2.0 and DX9 -void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) { +void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config) { char lookupMethod[128] = "index.r"; const int shift = config.shift; @@ -288,23 +288,64 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n"); } +void GenerateDepalSmoothed(ShaderWriter &writer, const DepalConfig &config) { + const char *sourceChannel = "error"; + float indexMultiplier = 32.0f; + + if (config.bufferFormat == GE_FORMAT_5551) { + _dbg_assert_(config.mask == 0x1F); + switch (config.shift) { + case 0: sourceChannel = "r"; break; + case 5: sourceChannel = "g"; break; + case 10: sourceChannel = "b"; break; + default: _dbg_assert_(false); + } + } else if (config.bufferFormat == GE_FORMAT_565) { + _dbg_assert_(config.mask == 0x1F || config.mask == 0x3F); + switch (config.shift) { + case 0: sourceChannel = "r"; break; + case 5: sourceChannel = "g"; indexMultiplier = 64.0f; break; + case 11: sourceChannel = "b"; break; + default: _dbg_assert_(false); + } + } else { + _dbg_assert_(false); + } + + writer.C(" float index = ").SampleTexture2D("tex", "v_texcoord").F(".%s * %0.1f;\n", sourceChannel, indexMultiplier); + + float texturePixels = 256.f; + if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) { + texturePixels = 512.f; + } + + writer.F(" float coord = (index + 0.5) * %f;\n", 1.0 / texturePixels); + writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n"); +} + void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang) { ShaderWriter writer(buffer, lang, ShaderStage::Fragment); writer.DeclareSamplers(samplers); writer.HighPrecisionFloat(); writer.BeginFSMain(Slice::empty(), varyings, FSFLAG_NONE); - switch (lang.shaderLanguage) { - case HLSL_D3D9: - case GLSL_1xx: - GenerateDepalShaderFloat(writer, config, lang); - break; - case GLSL_VULKAN: - case GLSL_3xx: - case HLSL_D3D11: - GenerateDepalShader300(writer, config, lang); - break; - default: - _assert_msg_(false, "Depal shader language not supported: %d", (int)lang.shaderLanguage); + if (config.smoothedDepal) { + // Handles a limited set of cases, but doesn't need any integer math so we don't + // need two variants. + GenerateDepalSmoothed(writer, config); + } else { + switch (lang.shaderLanguage) { + case HLSL_D3D9: + case GLSL_1xx: + GenerateDepalShaderFloat(writer, config); + break; + case GLSL_VULKAN: + case GLSL_3xx: + case HLSL_D3D11: + GenerateDepalShader300(writer, config); + break; + default: + _assert_msg_(false, "Depal shader language not supported: %d", (int)lang.shaderLanguage); + } } writer.EndFSMain("outColor", FSFLAG_NONE); } diff --git a/GPU/Common/DepalettizeShaderCommon.h b/GPU/Common/DepalettizeShaderCommon.h index 91186f5d2038..322784c0f0d3 100644 --- a/GPU/Common/DepalettizeShaderCommon.h +++ b/GPU/Common/DepalettizeShaderCommon.h @@ -31,6 +31,7 @@ struct DepalConfig { GEPaletteFormat clutFormat; GETextureFormat textureFormat; GEBufferFormat bufferFormat; + bool smoothedDepal; }; void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang); diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index f956217b3884..f278363e5f1f 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -23,8 +23,10 @@ #include "Common/GPU/OpenGL/GLFeatures.h" #include "Common/GPU/ShaderWriter.h" #include "Common/GPU/thin3d.h" +#include "Core/Compatibility.h" #include "Core/Reporting.h" #include "Core/Config.h" +#include "Core/System.h" #include "GPU/Common/GPUStateUtils.h" #include "GPU/Common/ShaderId.h" #include "GPU/Common/ShaderUniforms.h" @@ -88,6 +90,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool doFlatShading = id.Bit(FS_BIT_FLATSHADE) && !flatBug; bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL) && !texture3D; // combination with texture3D not supported. Enforced elsewhere too. + bool smoothedDepal = id.Bit(FS_BIT_SHADER_SMOOTHED_DEPAL); bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE); bool colorWriteMask = id.Bit(FS_BIT_COLOR_WRITEMASK) && compat.bitwiseOps; @@ -590,6 +593,31 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } } } + } else if (shaderDepal && smoothedDepal) { + // Specific mode for Test Drive. Fixes the banding. + if (doTextureProjection) { + // We don't use textureProj because we need better control and it's probably not much of a savings anyway. + // However it is good for precision on older hardware like PowerVR. + WRITE(p, " vec2 uv = %s.xy/%s.z;\n vec2 uv_round;\n", texcoord, texcoord); + } else { + WRITE(p, " vec2 uv = %s.xy;\n vec2 uv_round;\n", texcoord); + } + // Restrictions on this are checked before setting the smoothed flag. + // Only RGB565 and RGBA5551 are supported, and only the specific shifts hitting the + // channels directly. + WRITE(p, " vec4 t = %s(tex, %s.xy);\n", compat.texture, texcoord); + WRITE(p, " uint depalShift = (u_depal_mask_shift_off_fmt >> 8) & 0xFFU;\n"); + WRITE(p, " uint depalFmt = (u_depal_mask_shift_off_fmt >> 24) & 0x3U;\n"); + WRITE(p, " float index0 = t.r;\n"); + WRITE(p, " float mul = 32.0 / 256.0;\n"); + WRITE(p, " if (depalFmt == 0) {\n"); // yes, different versions of Test Drive use different formats. Could do compile time by adding more compat flags but meh. + WRITE(p, " if (depalShift == 5) { index0 = t.g; mul = 64.0 / 256.0; }\n"); + WRITE(p, " else if (depalShift == 11) { index0 = t.b; }\n"); + WRITE(p, " } else {\n"); + WRITE(p, " if (depalShift == 5) { index0 = t.g; }\n"); + WRITE(p, " else if (depalShift == 10) { index0 = t.b; }\n"); + WRITE(p, " }\n"); + WRITE(p, " t = %s(pal, vec2(index0 * mul, 0.0));\n", compat.texture); } else { if (doTextureProjection) { // We don't use textureProj because we need better control and it's probably not much of a savings anyway. diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 510e90e9297e..849a1b8b193d 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -261,6 +261,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) { bool doTextureAlpha = gstate.isTextureAlphaUsed(); bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT; bool useShaderDepal = gstate_c.useShaderDepal; + bool useSmoothedDepal = gstate_c.useSmoothedShaderDepal; bool colorWriteMask = IsColorWriteMaskComplex(gstate_c.allowFramebufferRead); // Note how we here recompute some of the work already done in state mapping. @@ -290,6 +291,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) { } id.SetBit(FS_BIT_BGRA_TEXTURE, gstate_c.bgraTexture); id.SetBit(FS_BIT_SHADER_DEPAL, useShaderDepal); + id.SetBit(FS_BIT_SHADER_SMOOTHED_DEPAL, useSmoothedDepal); id.SetBit(FS_BIT_3D_TEXTURE, gstate_c.curTextureIs3D); } diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index 8dcee32c1ea4..a105af67462c 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -94,6 +94,7 @@ enum FShaderBit : uint8_t { FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL = 49, FS_BIT_COLOR_WRITEMASK = 50, FS_BIT_3D_TEXTURE = 51, + FS_BIT_SHADER_SMOOTHED_DEPAL = 52, }; static inline FShaderBit operator +(FShaderBit bit, int i) { diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index de6c840722a6..0f3e82ff49d3 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -1859,6 +1859,31 @@ bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) { } } +// If the palette is detected as a smooth ramp, we can interpolate for higher color precision. +// But we only do it if the mask/shift exactly matches a color channel, else something different might be going +// on and we definitely don't want to interpolate. +// Great enhancement for Test Drive. +static bool CanUseSmoothDepal(const GPUgstate &gstate, GEBufferFormat framebufferFormat, int rampLength) { + if (gstate.getClutIndexStartPos() == 0 && + gstate.getClutIndexMask() <= rampLength) { + switch (framebufferFormat) { + case GE_FORMAT_565: + if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 11) { + return gstate.getClutIndexMask() == 0x1F; + } else if (gstate.getClutIndexShift() == 5) { + return gstate.getClutIndexMask() == 0x3F; + } + break; + case GE_FORMAT_5551: + if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 5 || gstate.getClutIndexShift() == 10) { + return gstate.getClutIndexMask() == 0x1F; + } + break; + } + } + return false; +} + void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, RasterChannel channel) { TextureShader *textureShader = nullptr; uint32_t clutMode = gstate.clutformat & 0xFFFFFF; @@ -1881,13 +1906,18 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer break; } + const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); + ClutTexture clutTexture{}; + bool smoothedDepal = false; + if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) { + clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); + smoothedDepal = CanUseSmoothDepal(gstate, framebuffer->drawnFormat, clutTexture.rampLength); + if (useShaderDepal) { - const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); // Very icky conflation here of native and thin3d rendering. This will need careful work per backend in BindAsClutTexture. - Draw::Texture *clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); - BindAsClutTexture(clutTexture); + BindAsClutTexture(clutTexture.texture); framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); // Vulkan needs to do some extra work here to pick out the native handle from Draw. @@ -1901,7 +1931,8 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer // Since we started/ended render passes, might need these. gstate_c.Dirty(DIRTY_DEPAL); - gstate_c.SetUseShaderDepal(true); + + gstate_c.SetUseShaderDepal(true, smoothedDepal); gstate_c.depalFramebufferFormat = framebuffer->drawnFormat; const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16); const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor; @@ -1913,13 +1944,13 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer return; } - textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); - gstate_c.SetUseShaderDepal(false); + textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat, smoothedDepal); + gstate_c.SetUseShaderDepal(false, false); } if (textureShader) { const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); - Draw::Texture *clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); + ClutTexture clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_); Draw::Framebuffer *depalFBO = framebufferManager_->GetTempFBO(TempFBO::DEPAL, framebuffer->renderWidth, framebuffer->renderHeight); draw_->BindTexture(0, nullptr); draw_->BindTexture(1, nullptr); @@ -1930,10 +1961,11 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer draw_->SetViewports(1, &vp); draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0); - draw_->BindTexture(1, clutTexture); - Draw::SamplerState *nearest = textureShaderCache_->GetSampler(); + draw_->BindTexture(1, clutTexture.texture); + Draw::SamplerState *nearest = textureShaderCache_->GetSampler(false); + Draw::SamplerState *clutSampler = textureShaderCache_->GetSampler(smoothedDepal); draw_->BindSamplerStates(0, 1, &nearest); - draw_->BindSamplerStates(1, 1, &nearest); + draw_->BindSamplerStates(1, 1, &clutSampler); textureShaderCache_->ApplyShader(textureShader, framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight, @@ -1958,7 +1990,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); BoundFramebufferTexture(); - gstate_c.SetUseShaderDepal(false); + gstate_c.SetUseShaderDepal(false, false); gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650); } diff --git a/GPU/Common/TextureShaderCommon.cpp b/GPU/Common/TextureShaderCommon.cpp index defbeb1575bc..c30ccb0204a7 100644 --- a/GPU/Common/TextureShaderCommon.cpp +++ b/GPU/Common/TextureShaderCommon.cpp @@ -51,22 +51,22 @@ void TextureShaderCache::DeviceLost() { Clear(); } -Draw::Texture *TextureShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut) { +ClutTexture TextureShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut) { // Simplistic, but works well enough. u32 clutId = clutHash ^ (uint32_t)clutFormat; auto oldtex = texCache_.find(clutId); if (oldtex != texCache_.end()) { oldtex->second->lastFrame = gpuStats.numFlips; - return oldtex->second->texture; + return *oldtex->second; } - int texturePixels = clutFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512; + int maxClutEntries = clutFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512; ClutTexture *tex = new ClutTexture(); Draw::TextureDesc desc{}; - desc.width = texturePixels; + desc.width = maxClutEntries; desc.height = 1; desc.depth = 1; desc.mipLevels = 1; @@ -81,24 +81,49 @@ Draw::Texture *TextureShaderCache::GetClutTexture(GEPaletteFormat clutFormat, co desc.initData.push_back((const uint8_t *)rawClut); break; case GEPaletteFormat::GE_CMODE_16BIT_BGR5650: - ConvertRGB565ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, texturePixels); + ConvertRGB565ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, maxClutEntries); desc.initData.push_back(convTemp); break; case GEPaletteFormat::GE_CMODE_16BIT_ABGR5551: - ConvertRGBA5551ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, texturePixels); + ConvertRGBA5551ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, maxClutEntries); desc.initData.push_back(convTemp); break; case GEPaletteFormat::GE_CMODE_16BIT_ABGR4444: - ConvertRGBA4444ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, texturePixels); + ConvertRGBA4444ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, maxClutEntries); desc.initData.push_back(convTemp); break; } + int lastR = 0; + int lastG = 0; + int lastB = 0; + int lastA = 0; + + int rampLength = 0; + // Quick check for how many continouosly growing entries we have at the start. + // Bilinearly filtering CLUTs only really makes sense for this kind of ramp. + for (int i = 0; i < maxClutEntries; i++) { + rampLength = i + 1; + int r = desc.initData[0][i * 4]; + int g = desc.initData[0][i * 4 + 1]; + int b = desc.initData[0][i * 4 + 2]; + int a = desc.initData[0][i * 4 + 3]; + if (r < lastR || g < lastG || b < lastB || a < lastA) { + break; + } else { + lastR = r; + lastG = g; + lastB = b; + lastA = a; + } + } + tex->texture = draw_->CreateTexture(desc); tex->lastFrame = gpuStats.numFlips; + tex->rampLength = rampLength; texCache_[clutId] = tex; - return tex->texture; + return *tex; } void TextureShaderCache::Clear() { @@ -122,6 +147,10 @@ void TextureShaderCache::Clear() { nearestSampler_->Release(); nearestSampler_ = nullptr; } + if (linearSampler_) { + linearSampler_->Release(); + linearSampler_ = nullptr; + } } void TextureShaderCache::Decimate() { @@ -136,15 +165,28 @@ void TextureShaderCache::Decimate() { } } -Draw::SamplerState *TextureShaderCache::GetSampler() { - if (!nearestSampler_) { - Draw::SamplerStateDesc desc{}; - desc.wrapU = Draw::TextureAddressMode::CLAMP_TO_EDGE; - desc.wrapV = Draw::TextureAddressMode::CLAMP_TO_EDGE; - desc.wrapW = Draw::TextureAddressMode::CLAMP_TO_EDGE; - nearestSampler_ = draw_->CreateSamplerState(desc); +Draw::SamplerState *TextureShaderCache::GetSampler(bool linearFilter) { + if (linearFilter) { + if (!linearSampler_) { + Draw::SamplerStateDesc desc{}; + desc.magFilter = Draw::TextureFilter::LINEAR; + desc.minFilter = Draw::TextureFilter::LINEAR; + desc.wrapU = Draw::TextureAddressMode::CLAMP_TO_EDGE; + desc.wrapV = Draw::TextureAddressMode::CLAMP_TO_EDGE; + desc.wrapW = Draw::TextureAddressMode::CLAMP_TO_EDGE; + linearSampler_ = draw_->CreateSamplerState(desc); + } + return linearSampler_; + } else { + if (!nearestSampler_) { + Draw::SamplerStateDesc desc{}; + desc.wrapU = Draw::TextureAddressMode::CLAMP_TO_EDGE; + desc.wrapV = Draw::TextureAddressMode::CLAMP_TO_EDGE; + desc.wrapW = Draw::TextureAddressMode::CLAMP_TO_EDGE; + nearestSampler_ = draw_->CreateSamplerState(desc); + } + return nearestSampler_; } - return nearestSampler_; } TextureShader *TextureShaderCache::CreateShader(const char *fs) { @@ -195,7 +237,7 @@ TextureShader *TextureShaderCache::CreateShader(const char *fs) { return depal; } -TextureShader *TextureShaderCache::GetDepalettizeShader(uint32_t clutMode, GETextureFormat textureFormat, GEBufferFormat bufferFormat) { +TextureShader *TextureShaderCache::GetDepalettizeShader(uint32_t clutMode, GETextureFormat textureFormat, GEBufferFormat bufferFormat, bool smoothedDepal) { using namespace Draw; // Generate an ID for depal shaders. @@ -215,6 +257,7 @@ TextureShader *TextureShaderCache::GetDepalettizeShader(uint32_t clutMode, GETex config.mask = gstate.getClutIndexMask(); config.bufferFormat = bufferFormat; config.textureFormat = textureFormat; + config.smoothedDepal = smoothedDepal; char *buffer = new char[4096]; GenerateDepalFs(buffer, config, draw_->GetShaderLanguageDesc()); diff --git a/GPU/Common/TextureShaderCommon.h b/GPU/Common/TextureShaderCommon.h index e2967ea89c3d..583aa3d516f3 100644 --- a/GPU/Common/TextureShaderCommon.h +++ b/GPU/Common/TextureShaderCommon.h @@ -39,6 +39,7 @@ class ClutTexture { public: Draw::Texture *texture; int lastFrame; + int rampLength; }; // For CLUT depal shaders, and other pre-bind texture shaders. @@ -48,10 +49,10 @@ class TextureShaderCache { TextureShaderCache(Draw::DrawContext *draw); ~TextureShaderCache(); - TextureShader *GetDepalettizeShader(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat); - Draw::Texture *GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut); + TextureShader *GetDepalettizeShader(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat, bool smoothedDepal); + ClutTexture GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut); - Draw::SamplerState *GetSampler(); + Draw::SamplerState *GetSampler(bool linearFilter); void ApplyShader(TextureShader *shader, float bufferW, float bufferH, int renderW, int renderH, const KnownVertexBounds &bounds, u32 uoff, u32 voff); @@ -69,6 +70,7 @@ class TextureShaderCache { Draw::DrawContext *draw_; Draw::ShaderModule *vertexShader_ = nullptr; Draw::SamplerState *nearestSampler_ = nullptr; + Draw::SamplerState *linearSampler_ = nullptr; std::map depalCache_; std::map texCache_; diff --git a/GPU/GLES/TextureCacheGLES.cpp b/GPU/GLES/TextureCacheGLES.cpp index 6fb19338d9d3..3af948856057 100644 --- a/GPU/GLES/TextureCacheGLES.cpp +++ b/GPU/GLES/TextureCacheGLES.cpp @@ -225,7 +225,7 @@ void TextureCacheGLES::BindTexture(TexCacheEntry *entry) { int maxLevel = (entry->status & TexCacheEntry::STATUS_NO_MIPS) ? 0 : entry->maxLevel; SamplerCacheKey samplerKey = GetSamplingParams(maxLevel, entry); ApplySamplingParams(samplerKey); - gstate_c.SetUseShaderDepal(false); + gstate_c.SetUseShaderDepal(false, false); } void TextureCacheGLES::Unbind() { diff --git a/GPU/GPUState.h b/GPU/GPUState.h index ae1c2c651045..f75a09fdade6 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -529,9 +529,10 @@ struct GPUStateCache { bool IsDirty(u64 what) const { return (dirty & what) != 0ULL; } - void SetUseShaderDepal(bool depal) { + void SetUseShaderDepal(bool depal, bool smoothed) { if (depal != useShaderDepal) { useShaderDepal = depal; + useSmoothedShaderDepal = smoothed; Dirty(DIRTY_FRAGMENTSHADER_STATE); } } @@ -635,6 +636,7 @@ struct GPUStateCache { int spline_num_points_u; bool useShaderDepal; + bool useSmoothedShaderDepal; GEBufferFormat depalFramebufferFormat; u32 getRelativeAddress(u32 data) const; diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 256c1adbaca1..27d1fda33f95 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -185,8 +185,8 @@ void DrawEngineVulkan::InitDeviceObjects() { samp.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; samp.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; samp.flags = 0; - samp.magFilter = VK_FILTER_NEAREST; - samp.minFilter = VK_FILTER_NEAREST; + samp.magFilter = VK_FILTER_LINEAR; + samp.minFilter = VK_FILTER_LINEAR; res = vkCreateSampler(device, &samp, nullptr, &samplerSecondary_); _dbg_assert_(VK_SUCCESS == res); res = vkCreateSampler(device, &samp, nullptr, &nullSampler_); diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index 48b8369fd7ef..e4a75f2a35ea 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -218,7 +218,7 @@ class DrawEngineVulkan : public DrawEngineCommon { // Secondary texture for shader blending VkImageView boundSecondary_ = VK_NULL_HANDLE; VkImageView boundDepal_ = VK_NULL_HANDLE; - VkSampler samplerSecondary_ = VK_NULL_HANDLE; // This one is actually never used since we use fetch. + VkSampler samplerSecondary_ = VK_NULL_HANDLE; // This one is actually never used since we use fetch (except in SmoothedDepal mode for Test Drive). PrehashMap vai_; VulkanPushBuffer *vertexCache_; diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 6c15264b1f71..522d88ce5a05 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -402,7 +402,7 @@ void TextureCacheVulkan::BindTexture(TexCacheEntry *entry) { curSampler_ = samplerCache_.GetOrCreateSampler(samplerKey); imageView_ = entry->vkTex->GetImageView(); drawEngine_->SetDepalTexture(VK_NULL_HANDLE); - gstate_c.SetUseShaderDepal(false); + gstate_c.SetUseShaderDepal(false, false); } void TextureCacheVulkan::ApplySamplingParams(const SamplerCacheKey &key) {