Skip to content

Commit

Permalink
Merge pull request #15710 from hrydgard/test-drive-smooth-depal
Browse files Browse the repository at this point in the history
Implement smooth depal lookups for Test Drive's strange usage.
  • Loading branch information
hrydgard authored Aug 22, 2022
2 parents 0e780be + e3943f6 commit 15f51c3
Show file tree
Hide file tree
Showing 13 changed files with 203 additions and 51 deletions.
69 changes: 55 additions & 14 deletions GPU/Common/DepalettizeShaderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ static const VaryingDef varyings[1] = {
};

// Uses integer instructions available since OpenGL 3.0, ES 3.0 (and 2.0 with extensions), and of course Vulkan and D3D11.
void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
const int shift = config.shift;
const int mask = config.mask;

Expand Down Expand Up @@ -140,7 +140,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con
}

// FP only, to suit GL(ES) 2.0 and DX9
void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config) {
char lookupMethod[128] = "index.r";

const int shift = config.shift;
Expand Down Expand Up @@ -288,23 +288,64 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");
}

void GenerateDepalSmoothed(ShaderWriter &writer, const DepalConfig &config) {
const char *sourceChannel = "error";
float indexMultiplier = 32.0f;

if (config.bufferFormat == GE_FORMAT_5551) {
_dbg_assert_(config.mask == 0x1F);
switch (config.shift) {
case 0: sourceChannel = "r"; break;
case 5: sourceChannel = "g"; break;
case 10: sourceChannel = "b"; break;
default: _dbg_assert_(false);
}
} else if (config.bufferFormat == GE_FORMAT_565) {
_dbg_assert_(config.mask == 0x1F || config.mask == 0x3F);
switch (config.shift) {
case 0: sourceChannel = "r"; break;
case 5: sourceChannel = "g"; indexMultiplier = 64.0f; break;
case 11: sourceChannel = "b"; break;
default: _dbg_assert_(false);
}
} else {
_dbg_assert_(false);
}

writer.C(" float index = ").SampleTexture2D("tex", "v_texcoord").F(".%s * %0.1f;\n", sourceChannel, indexMultiplier);

float texturePixels = 256.f;
if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) {
texturePixels = 512.f;
}

writer.F(" float coord = (index + 0.5) * %f;\n", 1.0 / texturePixels);
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");
}

void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
ShaderWriter writer(buffer, lang, ShaderStage::Fragment);
writer.DeclareSamplers(samplers);
writer.HighPrecisionFloat();
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_NONE);
switch (lang.shaderLanguage) {
case HLSL_D3D9:
case GLSL_1xx:
GenerateDepalShaderFloat(writer, config, lang);
break;
case GLSL_VULKAN:
case GLSL_3xx:
case HLSL_D3D11:
GenerateDepalShader300(writer, config, lang);
break;
default:
_assert_msg_(false, "Depal shader language not supported: %d", (int)lang.shaderLanguage);
if (config.smoothedDepal) {
// Handles a limited set of cases, but doesn't need any integer math so we don't
// need two variants.
GenerateDepalSmoothed(writer, config);
} else {
switch (lang.shaderLanguage) {
case HLSL_D3D9:
case GLSL_1xx:
GenerateDepalShaderFloat(writer, config);
break;
case GLSL_VULKAN:
case GLSL_3xx:
case HLSL_D3D11:
GenerateDepalShader300(writer, config);
break;
default:
_assert_msg_(false, "Depal shader language not supported: %d", (int)lang.shaderLanguage);
}
}
writer.EndFSMain("outColor", FSFLAG_NONE);
}
Expand Down
1 change: 1 addition & 0 deletions GPU/Common/DepalettizeShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ struct DepalConfig {
GEPaletteFormat clutFormat;
GETextureFormat textureFormat;
GEBufferFormat bufferFormat;
bool smoothedDepal;
};

void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang);
Expand Down
28 changes: 28 additions & 0 deletions GPU/Common/FragmentShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@
#include "Common/GPU/OpenGL/GLFeatures.h"
#include "Common/GPU/ShaderWriter.h"
#include "Common/GPU/thin3d.h"
#include "Core/Compatibility.h"
#include "Core/Reporting.h"
#include "Core/Config.h"
#include "Core/System.h"
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/Common/ShaderId.h"
#include "GPU/Common/ShaderUniforms.h"
Expand Down Expand Up @@ -88,6 +90,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu

bool doFlatShading = id.Bit(FS_BIT_FLATSHADE) && !flatBug;
bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL) && !texture3D; // combination with texture3D not supported. Enforced elsewhere too.
bool smoothedDepal = id.Bit(FS_BIT_SHADER_SMOOTHED_DEPAL);
bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE);
bool colorWriteMask = id.Bit(FS_BIT_COLOR_WRITEMASK) && compat.bitwiseOps;

Expand Down Expand Up @@ -590,6 +593,31 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
}
}
}
} else if (shaderDepal && smoothedDepal) {
// Specific mode for Test Drive. Fixes the banding.
if (doTextureProjection) {
// We don't use textureProj because we need better control and it's probably not much of a savings anyway.
// However it is good for precision on older hardware like PowerVR.
WRITE(p, " vec2 uv = %s.xy/%s.z;\n vec2 uv_round;\n", texcoord, texcoord);
} else {
WRITE(p, " vec2 uv = %s.xy;\n vec2 uv_round;\n", texcoord);
}
// Restrictions on this are checked before setting the smoothed flag.
// Only RGB565 and RGBA5551 are supported, and only the specific shifts hitting the
// channels directly.
WRITE(p, " vec4 t = %s(tex, %s.xy);\n", compat.texture, texcoord);
WRITE(p, " uint depalShift = (u_depal_mask_shift_off_fmt >> 8) & 0xFFU;\n");
WRITE(p, " uint depalFmt = (u_depal_mask_shift_off_fmt >> 24) & 0x3U;\n");
WRITE(p, " float index0 = t.r;\n");
WRITE(p, " float mul = 32.0 / 256.0;\n");
WRITE(p, " if (depalFmt == 0) {\n"); // yes, different versions of Test Drive use different formats. Could do compile time by adding more compat flags but meh.
WRITE(p, " if (depalShift == 5) { index0 = t.g; mul = 64.0 / 256.0; }\n");
WRITE(p, " else if (depalShift == 11) { index0 = t.b; }\n");
WRITE(p, " } else {\n");
WRITE(p, " if (depalShift == 5) { index0 = t.g; }\n");
WRITE(p, " else if (depalShift == 10) { index0 = t.b; }\n");
WRITE(p, " }\n");
WRITE(p, " t = %s(pal, vec2(index0 * mul, 0.0));\n", compat.texture);
} else {
if (doTextureProjection) {
// We don't use textureProj because we need better control and it's probably not much of a savings anyway.
Expand Down
2 changes: 2 additions & 0 deletions GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) {
bool doTextureAlpha = gstate.isTextureAlphaUsed();
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;
bool useShaderDepal = gstate_c.useShaderDepal;
bool useSmoothedDepal = gstate_c.useSmoothedShaderDepal;
bool colorWriteMask = IsColorWriteMaskComplex(gstate_c.allowFramebufferRead);

// Note how we here recompute some of the work already done in state mapping.
Expand Down Expand Up @@ -290,6 +291,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) {
}
id.SetBit(FS_BIT_BGRA_TEXTURE, gstate_c.bgraTexture);
id.SetBit(FS_BIT_SHADER_DEPAL, useShaderDepal);
id.SetBit(FS_BIT_SHADER_SMOOTHED_DEPAL, useSmoothedDepal);
id.SetBit(FS_BIT_3D_TEXTURE, gstate_c.curTextureIs3D);
}

Expand Down
1 change: 1 addition & 0 deletions GPU/Common/ShaderId.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ enum FShaderBit : uint8_t {
FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL = 49,
FS_BIT_COLOR_WRITEMASK = 50,
FS_BIT_3D_TEXTURE = 51,
FS_BIT_SHADER_SMOOTHED_DEPAL = 52,
};

static inline FShaderBit operator +(FShaderBit bit, int i) {
Expand Down
54 changes: 43 additions & 11 deletions GPU/Common/TextureCacheCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1859,6 +1859,31 @@ bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) {
}
}

// If the palette is detected as a smooth ramp, we can interpolate for higher color precision.
// But we only do it if the mask/shift exactly matches a color channel, else something different might be going
// on and we definitely don't want to interpolate.
// Great enhancement for Test Drive.
static bool CanUseSmoothDepal(const GPUgstate &gstate, GEBufferFormat framebufferFormat, int rampLength) {
if (gstate.getClutIndexStartPos() == 0 &&
gstate.getClutIndexMask() <= rampLength) {
switch (framebufferFormat) {
case GE_FORMAT_565:
if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 11) {
return gstate.getClutIndexMask() == 0x1F;
} else if (gstate.getClutIndexShift() == 5) {
return gstate.getClutIndexMask() == 0x3F;
}
break;
case GE_FORMAT_5551:
if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 5 || gstate.getClutIndexShift() == 10) {
return gstate.getClutIndexMask() == 0x1F;
}
break;
}
}
return false;
}

void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, RasterChannel channel) {
TextureShader *textureShader = nullptr;
uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
Expand All @@ -1881,13 +1906,18 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
break;
}

const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
ClutTexture clutTexture{};
bool smoothedDepal = false;

if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) {
clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
smoothedDepal = CanUseSmoothDepal(gstate, framebuffer->drawnFormat, clutTexture.rampLength);

if (useShaderDepal) {
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();

// Very icky conflation here of native and thin3d rendering. This will need careful work per backend in BindAsClutTexture.
Draw::Texture *clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
BindAsClutTexture(clutTexture);
BindAsClutTexture(clutTexture.texture);

framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
// Vulkan needs to do some extra work here to pick out the native handle from Draw.
Expand All @@ -1901,7 +1931,8 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer

// Since we started/ended render passes, might need these.
gstate_c.Dirty(DIRTY_DEPAL);
gstate_c.SetUseShaderDepal(true);

gstate_c.SetUseShaderDepal(true, smoothedDepal);
gstate_c.depalFramebufferFormat = framebuffer->drawnFormat;
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
Expand All @@ -1913,13 +1944,13 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
return;
}

textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat);
gstate_c.SetUseShaderDepal(false);
textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat, smoothedDepal);
gstate_c.SetUseShaderDepal(false, false);
}

if (textureShader) {
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
Draw::Texture *clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
ClutTexture clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
Draw::Framebuffer *depalFBO = framebufferManager_->GetTempFBO(TempFBO::DEPAL, framebuffer->renderWidth, framebuffer->renderHeight);
draw_->BindTexture(0, nullptr);
draw_->BindTexture(1, nullptr);
Expand All @@ -1930,10 +1961,11 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
draw_->SetViewports(1, &vp);

draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0);
draw_->BindTexture(1, clutTexture);
Draw::SamplerState *nearest = textureShaderCache_->GetSampler();
draw_->BindTexture(1, clutTexture.texture);
Draw::SamplerState *nearest = textureShaderCache_->GetSampler(false);
Draw::SamplerState *clutSampler = textureShaderCache_->GetSampler(smoothedDepal);
draw_->BindSamplerStates(0, 1, &nearest);
draw_->BindSamplerStates(1, 1, &nearest);
draw_->BindSamplerStates(1, 1, &clutSampler);

textureShaderCache_->ApplyShader(textureShader,
framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight,
Expand All @@ -1958,7 +1990,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
BoundFramebufferTexture();

gstate_c.SetUseShaderDepal(false);
gstate_c.SetUseShaderDepal(false, false);
gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650);
}

Expand Down
Loading

0 comments on commit 15f51c3

Please sign in to comment.