Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable logic op emulation in-shader #15960

Merged
merged 4 commits into from
Sep 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Core/Compatibility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,5 +102,10 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) {
void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) {
if (ignored_.find(option) == ignored_.end()) {
iniFile.Get(option, gameID.c_str(), flag, *flag);

// Shortcut for debugging, sometimes useful to globally enable compat flags.
bool all = false;
iniFile.Get(option, "ALL", &all, false);
*flag |= all;
}
}
55 changes: 40 additions & 15 deletions GPU/Common/FragmentShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
blueToAlpha = true;
}

GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4);
GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4);
GEBlendMode replaceBlendEq = (GEBlendMode)id.Bits(FS_BIT_BLENDEQ, 3);
StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4);

bool isModeClear = id.Bit(FS_BIT_CLEARMODE);

const char *shading = "";
Expand All @@ -121,7 +116,16 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu

bool useDiscardStencilBugWorkaround = id.Bit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL);

bool readFramebuffer = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask;
GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4);
GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4);
GEBlendMode replaceBlendEq = (GEBlendMode)id.Bits(FS_BIT_BLENDEQ, 3);
StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a shame this isn't 3 bits. I think we could use the uniform more or merge STENCIL_VALUE_INVERT + STENCIL_VALUE_ONE for the purposes of the shader id. If it had one less value it could fit in 3 bits... well anyway, not related to this change really.

-[Unknown]

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, but we still aren't running out of frag id bits, so I'm inclined to just do that when we need to :) As you say unrelated anyway.


// Distinct from the logic op simulation support.
GELogicOp replaceLogicOpType = isModeClear ? GE_LOGIC_COPY : (GELogicOp)id.Bits(FS_BIT_REPLACE_LOGIC_OP, 4);
bool replaceLogicOp = replaceLogicOpType != GE_LOGIC_COPY && compat.bitwiseOps;

bool readFramebuffer = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask || replaceLogicOp;
bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);

bool needFragCoord = readFramebuffer || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
Expand Down Expand Up @@ -425,7 +429,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
}

// Provide implementations of packUnorm4x8 and unpackUnorm4x8 if not available.
if (colorWriteMask && !hasPackUnorm4x8) {
if ((colorWriteMask || replaceLogicOp) && !hasPackUnorm4x8) {
WRITE(p, "uint packUnorm4x8(%svec4 v) {\n", compat.shaderLanguage == GLSL_VULKAN ? "highp " : "");
WRITE(p, " highp vec4 f = clamp(v, 0.0, 1.0);\n");
WRITE(p, " uvec4 u = uvec4(255.0 * f);\n");
Expand Down Expand Up @@ -1078,16 +1082,37 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
return false;
}

// Final color computed - apply color write mask.
// TODO: Maybe optimize to only do math on the affected channels?
// Or .. meh. That would require more shader bits. Though we could
// of course optimize for the common mask 0xF00000, though again, blue-to-alpha
// does a better job with that.
if (colorWriteMask) {
// Final color computed - apply logic ops and bitwise color write mask, through shader blending, if specified.
if (colorWriteMask || replaceLogicOp) {
hrydgard marked this conversation as resolved.
Show resolved Hide resolved
WRITE(p, " highp uint v32 = packUnorm4x8(%s);\n", compat.fragColor0);
WRITE(p, " highp uint d32 = packUnorm4x8(destColor);\n");
// Note that the mask has been flipped to the PC way - 1 means write.
WRITE(p, " v32 = (v32 & u_colorWriteMask) | (d32 & ~u_colorWriteMask);\n");

// v32 is both the "s" to the logical operation, and the value that we'll merge to the destination with masking later.
// d32 is the "d" to the logical operation.
// NOTE: Alpha of v32 needs to be preserved. Same equations as in the software renderer.
switch (replaceLogicOpType) {
case GE_LOGIC_CLEAR: p.C(" v32 &= 0xFF000000u;\n"); break;
case GE_LOGIC_AND: p.C(" v32 = v32 & (d32 | 0xFF000000u);\n"); break;
case GE_LOGIC_AND_REVERSE: p.C(" v32 = v32 & (~d32 | 0xFF000000u);\n"); break;
case GE_LOGIC_COPY: break; // source to dest, do nothing. Will be set to this, if not used.
case GE_LOGIC_AND_INVERTED: p.C(" v32 = (~v32 & (d32 & 0x00FFFFFFu)) | (v32 & 0xFF000000u);\n"); break;
case GE_LOGIC_NOOP: p.C(" v32 = (d32 & 0x00FFFFFFu) | (v32 & 0xFF000000u);\n"); break;
case GE_LOGIC_XOR: p.C(" v32 = v32 ^ (d32 & 0x00FFFFFFu);\n"); break;
case GE_LOGIC_OR: p.C(" v32 = v32 | (d32 & 0x00FFFFFFu);\n"); break;
case GE_LOGIC_NOR: p.C(" v32 = (~(v32 | d32) & 0x00FFFFFFu) | (v32 & 0xFF000000u);\n"); break;
case GE_LOGIC_EQUIV: p.C(" v32 = (~(v32 ^ d32) & 0x00FFFFFFu) | (v32 & 0xFF000000u);\n"); break;
case GE_LOGIC_INVERTED: p.C(" v32 = (~d32 & 0x00FFFFFFu) | (v32 & 0xFF000000u);\n"); break;
case GE_LOGIC_OR_REVERSE: p.C(" v32 = v32 | (~d32 & 0x00FFFFFFu);\n"); break;
case GE_LOGIC_COPY_INVERTED: p.C(" v32 = (~v32 & 0x00FFFFFFu) | (v32 &0xFF000000u);\n"); break;
case GE_LOGIC_OR_INVERTED: p.C(" v32 = ((~v32 | d32) & 0x00FFFFFFu) | (v32 & 0xFF000000u);\n"); break;
case GE_LOGIC_NAND: p.C(" v32 = (~(v32 & d32) & 0x00FFFFFFu) | (v32 & 0xFF000000u);\n"); break;
case GE_LOGIC_SET: p.C(" v32 |= 0x00FFFFFF;\n"); break;
}

// Note that the mask has already been flipped to the PC way - 1 means write.
if (colorWriteMask) {
WRITE(p, " v32 = (v32 & u_colorWriteMask) | (d32 & ~u_colorWriteMask);\n");
}
WRITE(p, " %s = unpackUnorm4x8(v32);\n", compat.fragColor0);
}

Expand Down
53 changes: 37 additions & 16 deletions GPU/Common/GPUStateUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -996,7 +996,7 @@ void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithS
// we read from the framebuffer (or a copy of it).
// We also prepare uniformMask so that if doing this in the shader gets forced-on,
// we have the right mask already.
void ConvertMaskState(GenericMaskState &maskState, bool shaderBitOpsSupported) {
static void ConvertMaskState(GenericMaskState &maskState, bool shaderBitOpsSupported) {
if (gstate_c.blueToAlpha) {
maskState.applyFramebufferRead = false;
maskState.uniformMask = 0xFF000000;
Expand Down Expand Up @@ -1042,7 +1042,7 @@ void ConvertMaskState(GenericMaskState &maskState, bool shaderBitOpsSupported) {
}

// Called even if AlphaBlendEnable == false - it also deals with stencil-related blend state.
void ConvertBlendState(GenericBlendState &blendState, bool forceReplaceBlend) {
static void ConvertBlendState(GenericBlendState &blendState, bool forceReplaceBlend) {
// Blending is a bit complex to emulate. This is due to several reasons:
//
// * Doubled blend modes (src, dst, inversed) aren't supported in OpenGL.
Expand Down Expand Up @@ -1334,6 +1334,33 @@ void ConvertBlendState(GenericBlendState &blendState, bool forceReplaceBlend) {
blendState.setEquation(colorEq, alphaEq);
}

static void ConvertLogicOpState(GenericLogicState &logicOpState, bool logicSupported, bool shaderBitOpsSupported, bool forceApplyFramebuffer) {
// TODO: We can get more detailed with checks here. Some logic ops don't involve the destination at all.
// Several can be trivially supported even without any bitwise logic.
if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY && forceApplyFramebuffer && shaderBitOpsSupported) {
// We have to emulate logic ops in the shader.
logicOpState.logicOpEnabled = false; // Don't use any hardware logic op, supported or not.
logicOpState.applyFramebufferRead = true;
logicOpState.logicOp = gstate.getLogicOp();
} else if (logicSupported) {
// We can use hardware logic ops, if needed.
logicOpState.applyFramebufferRead = false;
if (gstate.isLogicOpEnabled()) {
logicOpState.logicOpEnabled = true;
logicOpState.logicOp = gstate.getLogicOp();
} else {
logicOpState.logicOpEnabled = false;
logicOpState.logicOp = GE_LOGIC_COPY;
}
} else {
// In this case, the SIMULATE fallback should kick in.
// Need to make sure this is checking for the same things though...
logicOpState.logicOpEnabled = false;
logicOpState.logicOp = GE_LOGIC_COPY;
logicOpState.applyFramebufferRead = false;
}
}

static void ConvertStencilFunc5551(GenericStencilFuncState &state) {
// Flaws:
// - INVERT should convert 1, 5, 0xFF to 0. Currently it won't always.
Expand Down Expand Up @@ -1466,19 +1493,6 @@ static void ConvertStencilFunc5551(GenericStencilFuncState &state) {
}
}

void ConvertLogicOpState(GenericLogicState &logicOpState, bool logicSupported, bool shaderBitOpsSupported) {
// Just do the non-shader case for now.
if (gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP)) {
logicOpState.logicOpEnabled = gstate.isLogicOpEnabled() && logicSupported;
logicOpState.logicOp = gstate.isLogicOpEnabled() ? gstate.getLogicOp() : GE_LOGIC_COPY;
logicOpState.applyFramebufferRead = false;
} else {
logicOpState.logicOpEnabled = false;
logicOpState.logicOp = GE_LOGIC_COPY;
logicOpState.applyFramebufferRead = false; // true later?
}
}

static void ConvertStencilMask5551(GenericStencilFuncState &state) {
state.writeMask = state.writeMask >= 0x80 ? 0xff : 0x00;
}
Expand Down Expand Up @@ -1538,6 +1552,13 @@ void GenericBlendState::Log() {

void ComputedPipelineState::Convert(bool shaderBitOpsSuppported) {
ConvertMaskState(maskState, shaderBitOpsSuppported);
ConvertLogicOpState(logicState, gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP), shaderBitOpsSuppported);
ConvertLogicOpState(logicState, gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP), shaderBitOpsSuppported, maskState.applyFramebufferRead);
ConvertBlendState(blendState, maskState.applyFramebufferRead);

// Note: If the blend state decided it had to use framebuffer reads,
// we need to switch mask and logic over to also use it, otherwise things will go wrong.
if (blendState.applyFramebufferRead) {
maskState.ConvertToShaderBlend();
logicState.ConvertToShaderBlend();
}
}
5 changes: 0 additions & 5 deletions GPU/Common/GPUStateUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,6 @@ struct GenericBlendState {
void Log();
};

void ConvertBlendState(GenericBlendState &blendState, bool forceReplaceBlend);
void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithStencil, GenericBlendState &blendState);

struct GenericMaskState {
Expand All @@ -204,8 +203,6 @@ struct GenericMaskState {
void Log();
};

void ConvertMaskState(GenericMaskState &maskState, bool shaderBitOpsSupported);

struct GenericStencilFuncState {
bool enabled;
GEComparison testFunc;
Expand Down Expand Up @@ -240,8 +237,6 @@ struct GenericLogicState {
void Log();
};

void ConvertLogicOpState(GenericLogicState &logicOpState, bool logicSupported, bool shaderBitOpsSupported);

struct ComputedPipelineState {
GenericBlendState blendState;
GenericMaskState maskState;
Expand Down
4 changes: 4 additions & 0 deletions GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
ReplaceBlendType replaceBlend = pipelineState.blendState.replaceBlend;
ReplaceAlphaType stencilToAlpha = pipelineState.blendState.replaceAlphaWithStencil;
SimulateLogicOpType simulateLogicOpType = pipelineState.blendState.simulateLogicOpType;
GELogicOp replaceLogicOpType = pipelineState.logicState.applyFramebufferRead ? pipelineState.logicState.logicOp : GE_LOGIC_COPY;

// All texfuncs except replace are the same for RGB as for RGBA with full alpha.
// Note that checking this means that we must dirty the fragment shader ID whenever textureFullAlpha changes.
Expand Down Expand Up @@ -325,6 +326,9 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
// 2 bits.
id.SetBits(FS_BIT_SIMULATE_LOGIC_OP_TYPE, 2, simulateLogicOpType);

// 4 bits. Set to GE_LOGIC_COPY if not used, which does nothing in the shader generator.
id.SetBits(FS_BIT_REPLACE_LOGIC_OP, 4, (int)replaceLogicOpType);

// If replaceBlend == REPLACE_BLEND_STANDARD (or REPLACE_BLEND_NO) nothing is done, so we kill these bits.
if (replaceBlend == REPLACE_BLEND_BLUE_TO_ALPHA) {
id.SetBits(FS_BIT_REPLACE_BLEND, 3, replaceBlend);
Expand Down
1 change: 1 addition & 0 deletions GPU/Common/ShaderId.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ enum FShaderBit : uint8_t {
FS_BIT_COLOR_WRITEMASK = 50,
FS_BIT_3D_TEXTURE = 51,
FS_BIT_SHADER_SMOOTHED_DEPAL = 52,
FS_BIT_REPLACE_LOGIC_OP = 53, // 4 bits. GE_LOGIC_COPY means no-op/off.
};

static inline FShaderBit operator +(FShaderBit bit, int i) {
Expand Down
2 changes: 1 addition & 1 deletion GPU/Vulkan/ShaderManagerVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ VulkanFragmentShader *ShaderManagerVulkan::GetFragmentShaderFromModule(VkShaderM
// instantaneous.

#define CACHE_HEADER_MAGIC 0xff51f420
#define CACHE_VERSION 21
#define CACHE_VERSION 22
struct VulkanCacheHeader {
uint32_t magic;
uint32_t version;
Expand Down
13 changes: 10 additions & 3 deletions assets/compat.ini
Original file line number Diff line number Diff line change
Expand Up @@ -1120,14 +1120,12 @@ ULJM05812 = true
NPJH50371 = true

# Colin McRae's DiRT 2 - issue #13012 (car lighting)
# Previously used ReinterpretFramebuffers + ShaderColorBitmask
ULUS10471 = true
ULJM05533 = true
NPJH50006 = true
ULES01301 = true

# Outrun 2006: Coast to Coast - issue #11358 (car reflections)
# Previously used ReinterpretFramebuffers + ShaderColorBitmask
ULES00262 = true
ULUS10064 = true
ULKS46087 = true
Expand All @@ -1138,7 +1136,16 @@ NPUZ00043 = true
NPEZ00198 = true

[ShaderColorBitmask]
# No users right now, but keeping it around as a more accurate option than BlueToAlpha, for debugging mainly Outrun.
# Colin McRae's DiRT 2 - issue #13012 (water)
ULUS10471 = true
ULJM05533 = true
NPJH50006 = true
ULES01301 = true

# Outrun 2006: Coast to Coast - issue #11358 (car reflections), #11928 (water)
ULES00262 = true
ULUS10064 = true
ULKS46087 = true

[DisableFirstFrameReadback]
# Wipeout Pure: Temporary workaround for lens flare flicker. See #13344
Expand Down