diff --git a/Common/Data/Convert/ColorConv.cpp b/Common/Data/Convert/ColorConv.cpp index 489a3f172f82..55686f5c00af 100644 --- a/Common/Data/Convert/ColorConv.cpp +++ b/Common/Data/Convert/ColorConv.cpp @@ -34,30 +34,6 @@ #endif #endif -inline u16 RGBA8888toRGB565(u32 px) { - return ((px >> 3) & 0x001F) | ((px >> 5) & 0x07E0) | ((px >> 8) & 0xF800); -} - -inline u16 RGBA8888toRGBA4444(u32 px) { - return ((px >> 4) & 0x000F) | ((px >> 8) & 0x00F0) | ((px >> 12) & 0x0F00) | ((px >> 16) & 0xF000); -} - -inline u16 BGRA8888toRGB565(u32 px) { - return ((px >> 19) & 0x001F) | ((px >> 5) & 0x07E0) | ((px << 8) & 0xF800); -} - -inline u16 BGRA8888toRGBA4444(u32 px) { - return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000); -} - -inline u16 BGRA8888toRGBA5551(u32 px) { - return ((px >> 19) & 0x001F) | ((px >> 6) & 0x03E0) | ((px << 7) & 0x7C00) | ((px >> 16) & 0x8000); -} - -inline u16 RGBA8888toRGBA5551(u32 px) { - return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000); -} - // convert 4444 image to 8888, parallelizable void convert4444_gl(u16* data, u32* out, int width, int l, int u) { for (int y = l; y < u; ++y) { diff --git a/Common/Data/Convert/ColorConv.h b/Common/Data/Convert/ColorConv.h index 66bd47b9cf56..077533171b17 100644 --- a/Common/Data/Convert/ColorConv.h +++ b/Common/Data/Convert/ColorConv.h @@ -35,6 +35,30 @@ inline u8 Convert6To8(u8 v) { return (v << 2) | (v >> 4); } +inline u16 RGBA8888toRGB565(u32 px) { + return ((px >> 3) & 0x001F) | ((px >> 5) & 0x07E0) | ((px >> 8) & 0xF800); +} + +inline u16 RGBA8888toRGBA4444(u32 px) { + return ((px >> 4) & 0x000F) | ((px >> 8) & 0x00F0) | ((px >> 12) & 0x0F00) | ((px >> 16) & 0xF000); +} + +inline u16 BGRA8888toRGB565(u32 px) { + return ((px >> 19) & 0x001F) | ((px >> 5) & 0x07E0) | ((px << 8) & 0xF800); +} + +inline u16 BGRA8888toRGBA4444(u32 px) { + return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000); +} + +inline u16 BGRA8888toRGBA5551(u32 px) { + return ((px >> 19) & 0x001F) | ((px >> 6) & 0x03E0) | ((px << 7) & 0x7C00) | ((px >> 16) & 0x8000); +} + +inline u16 RGBA8888toRGBA5551(u32 px) { + return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000); +} + inline u32 RGBA4444ToRGBA8888(u16 src) { const u32 r = (src & 0x000F) << 0; const u32 g = (src & 0x00F0) << 4; diff --git a/Core/Compatibility.cpp b/Core/Compatibility.cpp index b8de96d22bbd..7a27543ab4e6 100644 --- a/Core/Compatibility.cpp +++ b/Core/Compatibility.cpp @@ -79,6 +79,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) { CheckSetting(iniFile, gameID, "DisableFirstFrameReadback", &flags_.DisableFirstFrameReadback); CheckSetting(iniFile, gameID, "DisableRangeCulling", &flags_.DisableRangeCulling); CheckSetting(iniFile, gameID, "MpegAvcWarmUp", &flags_.MpegAvcWarmUp); + CheckSetting(iniFile, gameID, "BlueToAlpha", &flags_.BlueToAlpha); } void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) { diff --git a/Core/Compatibility.h b/Core/Compatibility.h index 48eeddc197f5..fd3e76d7435f 100644 --- a/Core/Compatibility.h +++ b/Core/Compatibility.h @@ -44,6 +44,7 @@ // // We already have the Action Replay-based cheat system for such use cases. +// TODO: Turn into bitfield for smaller mem footprint. Though I think it still fits in a cacheline... struct CompatFlags { bool VertexDepthRounding; bool PixelDepthRounding; @@ -77,6 +78,7 @@ struct CompatFlags { bool DisableFirstFrameReadback; bool DisableRangeCulling; bool MpegAvcWarmUp; + bool BlueToAlpha; }; class IniFile; diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 3fc0e16ffcc4..cb965bf78201 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -95,6 +95,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu ReplaceBlendType replaceBlend = static_cast(id.Bits(FS_BIT_REPLACE_BLEND, 3)); + bool blueToAlpha = false; + if (replaceBlend == ReplaceBlendType::REPLACE_BLEND_BLUE_TO_ALPHA) { + blueToAlpha = true; + } + GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4); GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4); GEBlendMode replaceBlendEq = (GEBlendMode)id.Bits(FS_BIT_BLENDEQ, 3); @@ -853,7 +858,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, " v.rgb = v.rgb * 2.0;\n"); } - if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) { + // In some cases we need to replicate the first half of the blend equation here. + // In case of blue-to-alpha, it's since we overwrite alpha with blue before the actual blend equation runs. + if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA || replaceBlend == REPLACE_BLEND_BLUE_TO_ALPHA) { const char *srcFactor = "ERROR"; switch (replaceBlendFuncA) { case GE_SRCBLEND_DSTCOLOR: srcFactor = "ERROR"; break; @@ -1025,6 +1032,10 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, " %s = unpackUnorm4x8(v32);\n", compat.fragColor0); } + if (blueToAlpha) { + WRITE(p, " %s = vec4(0.0, 0.0, 0.0, %s.z); // blue to alpha\n", compat.fragColor0, compat.fragColor0); + } + if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) { const double scale = DepthSliceFactor() * 65535.0; diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 1b609e34ffd0..05b1dc30a6f4 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -218,7 +218,7 @@ void GetFramebufferHeuristicInputs(FramebufferHeuristicParams *params, const GPU params->z_stride = 0; } - params->fmt = gstate.FrameBufFormat(); + params->fmt = gstate_c.framebufFormat; params->isClearingDepth = gstate.isModeClear() && gstate.isClearModeDepthMask(); // Technically, it may write depth later, but we're trying to detect it only when it's really true. @@ -589,7 +589,7 @@ void FramebufferManagerCommon::ReinterpretFramebuffer(VirtualFramebuffer *vfb, G // Games that are marked as doing reinterpret just ignore this - better to keep the data than to clear. // Fixes #13717. - if (!PSP_CoreParameter().compat.flags().ReinterpretFramebuffers) { + if (!PSP_CoreParameter().compat.flags().ReinterpretFramebuffers && !PSP_CoreParameter().compat.flags().BlueToAlpha) { draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "FakeReinterpret"); // Need to dirty anything that has command buffer dynamic state, in case we started a new pass above. // Should find a way to feed that information back, maybe... Or simply correct the issue in the rendermanager. @@ -670,9 +670,11 @@ void FramebufferManagerCommon::ReinterpretFramebuffer(VirtualFramebuffer *vfb, G // Copy to a temp framebuffer. Draw::Framebuffer *temp = GetTempFBO(TempFBO::REINTERPRET, vfb->renderWidth, vfb->renderHeight); + // Ideally on Vulkan this should be using the original framebuffer as an input attachment, allowing it to read from + // itself while writing. draw_->InvalidateCachedState(); draw_->CopyFramebufferImage(vfb->fbo, 0, 0, 0, 0, temp, 0, 0, 0, 0, vfb->renderWidth, vfb->renderHeight, 1, Draw::FBChannel::FB_COLOR_BIT, "reinterpret_prep"); - draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, reinterpretStrings[(int)oldFormat][(int)newFormat]); + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, reinterpretStrings[(int)oldFormat][(int)newFormat]); draw_->BindPipeline(pipeline); draw_->BindFramebufferAsTexture(temp, 0, Draw::FBChannel::FB_COLOR_BIT, 0); draw_->BindSamplerStates(0, 1, &reinterpretSampler_); @@ -691,7 +693,7 @@ void FramebufferManagerCommon::ReinterpretFramebuffer(VirtualFramebuffer *vfb, G shaderManager_->DirtyLastShader(); textureCache_->ForgetLastTexture(); - gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE); + gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS); if (currentRenderVfb_ != vfb) { // In case ReinterpretFramebuffer was called from the texture manager. @@ -1642,8 +1644,7 @@ VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFram // Create a new fbo if none was found for the size if (!nvfb) { - nvfb = new VirtualFramebuffer(); - memset(nvfb, 0, sizeof(VirtualFramebuffer)); + nvfb = new VirtualFramebuffer{}; nvfb->fbo = nullptr; nvfb->fb_address = vfb->fb_address; nvfb->fb_stride = vfb->fb_stride; @@ -1688,20 +1689,21 @@ void FramebufferManagerCommon::ApplyClearToMemory(int x1, int y1, int x2, int y2 return; } } + if (!Memory::IsValidAddress(gstate.getFrameBufAddress())) { return; } u8 *addr = Memory::GetPointerUnchecked(gstate.getFrameBufAddress()); - const int bpp = gstate.FrameBufFormat() == GE_FORMAT_8888 ? 4 : 2; + const int bpp = gstate_c.framebufFormat == GE_FORMAT_8888 ? 4 : 2; u32 clearBits = clearColor; if (bpp == 2) { u16 clear16 = 0; - switch (gstate.FrameBufFormat()) { - case GE_FORMAT_565: ConvertRGBA8888ToRGB565(&clear16, &clearColor, 1); break; - case GE_FORMAT_5551: ConvertRGBA8888ToRGBA5551(&clear16, &clearColor, 1); break; - case GE_FORMAT_4444: ConvertRGBA8888ToRGBA4444(&clear16, &clearColor, 1); break; + switch (gstate_c.framebufFormat) { + case GE_FORMAT_565: clear16 = RGBA8888toRGB565(clearColor); break; + case GE_FORMAT_5551: clear16 = RGBA8888toRGBA5551(clearColor); break; + case GE_FORMAT_4444: clear16 = RGBA8888toRGBA4444(clearColor); break; default: _dbg_assert_(0); break; } clearBits = clear16 | (clear16 << 16); diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 1182c73e3248..2c31d5e668fd 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -44,6 +44,7 @@ enum { FB_USAGE_CLUT = 8, FB_USAGE_DOWNLOAD = 16, FB_USAGE_DOWNLOAD_CLEAR = 32, + FB_USAGE_BLUE_TO_ALPHA = 64, }; enum { diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp index d3eec7c9ecfd..8b27be2d34b5 100644 --- a/GPU/Common/GPUStateUtils.cpp +++ b/GPU/Common/GPUStateUtils.cpp @@ -38,7 +38,7 @@ bool IsStencilTestOutputDisabled() { // The mask applies on all stencil ops. if (gstate.isStencilTestEnabled() && (gstate.pmska & 0xFF) != 0xFF) { - if (gstate.FrameBufFormat() == GE_FORMAT_565) { + if (gstate_c.framebufFormat == GE_FORMAT_565) { return true; } return gstate.getStencilOpZPass() == GE_STENCILOP_KEEP && gstate.getStencilOpZFail() == GE_STENCILOP_KEEP && gstate.getStencilOpSFail() == GE_STENCILOP_KEEP; @@ -191,11 +191,15 @@ ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend) { } } + if (replaceBlend == ReplaceBlendType::REPLACE_BLEND_BLUE_TO_ALPHA) { + return REPLACE_ALPHA_NO; // irrelevant + } + return REPLACE_ALPHA_YES; } StencilValueType ReplaceAlphaWithStencilType() { - switch (gstate.FrameBufFormat()) { + switch (gstate_c.framebufFormat) { case GE_FORMAT_565: // There's never a stencil value. Maybe the right alpha is 1? return STENCIL_VALUE_ONE; @@ -236,10 +240,10 @@ StencilValueType ReplaceAlphaWithStencilType() { return STENCIL_VALUE_ZERO; case GE_STENCILOP_DECR: - return gstate.FrameBufFormat() == GE_FORMAT_4444 ? STENCIL_VALUE_DECR_4 : STENCIL_VALUE_DECR_8; + return gstate_c.framebufFormat == GE_FORMAT_4444 ? STENCIL_VALUE_DECR_4 : STENCIL_VALUE_DECR_8; case GE_STENCILOP_INCR: - return gstate.FrameBufFormat() == GE_FORMAT_4444 ? STENCIL_VALUE_INCR_4 : STENCIL_VALUE_INCR_8; + return gstate_c.framebufFormat == GE_FORMAT_4444 ? STENCIL_VALUE_INCR_4 : STENCIL_VALUE_INCR_8; case GE_STENCILOP_INVERT: return STENCIL_VALUE_INVERT; @@ -254,6 +258,10 @@ StencilValueType ReplaceAlphaWithStencilType() { } ReplaceBlendType ReplaceBlendWithShader(bool allowFramebufferRead, GEBufferFormat bufferFormat) { + if (gstate_c.blueToAlpha) { + return REPLACE_BLEND_BLUE_TO_ALPHA; + } + if (!gstate.isAlphaBlendEnabled() || gstate.isModeClear()) { return REPLACE_BLEND_NO; } @@ -976,6 +984,11 @@ bool IsColorWriteMaskComplex(bool allowFramebufferRead) { return false; } + if (gstate_c.blueToAlpha) { + // We'll generate a simple ___A mask. + return false; + } + uint32_t colorMask = (gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24); for (int i = 0; i < 4; i++) { @@ -996,6 +1009,15 @@ bool IsColorWriteMaskComplex(bool allowFramebufferRead) { // When that's not enough, we fall back on a technique similar to shader blending, // we read from the framebuffer (or a copy of it). void ConvertMaskState(GenericMaskState &maskState, bool allowFramebufferRead) { + if (gstate_c.blueToAlpha) { + maskState.applyFramebufferRead = false; + maskState.rgba[0] = false; + maskState.rgba[1] = false; + maskState.rgba[2] = false; + maskState.rgba[3] = true; + return; + } + // Invert to convert masks from the PSP's format where 1 is don't draw to PC where 1 is draw. uint32_t colorMask = ~((gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24)); @@ -1049,13 +1071,15 @@ void ConvertBlendState(GenericBlendState &blendState, bool allowFramebufferRead, blendState.useBlendColor = false; blendState.replaceAlphaWithStencil = REPLACE_ALPHA_NO; - ReplaceBlendType replaceBlend = ReplaceBlendWithShader(allowFramebufferRead, gstate.FrameBufFormat()); + ReplaceBlendType replaceBlend = ReplaceBlendWithShader(allowFramebufferRead, gstate_c.framebufFormat); if (forceReplaceBlend) { replaceBlend = REPLACE_BLEND_COPY_FBO; } ReplaceAlphaType replaceAlphaWithStencil = ReplaceAlphaWithStencil(replaceBlend); bool usePreSrc = false; + bool blueToAlpha = false; + switch (replaceBlend) { case REPLACE_BLEND_NO: blendState.resetFramebufferRead = true; @@ -1063,6 +1087,10 @@ void ConvertBlendState(GenericBlendState &blendState, bool allowFramebufferRead, ApplyStencilReplaceAndLogicOpIgnoreBlend(replaceAlphaWithStencil, blendState); return; + case REPLACE_BLEND_BLUE_TO_ALPHA: + blueToAlpha = true; + break; + case REPLACE_BLEND_COPY_FBO: blendState.applyFramebufferRead = true; blendState.resetFramebufferRead = false; @@ -1130,7 +1158,7 @@ void ConvertBlendState(GenericBlendState &blendState, bool allowFramebufferRead, bool approxFuncB = false; BlendFactor glBlendFuncB = blendFuncB == GE_DSTBLEND_FIXB ? blendColor2Func(fixB, approxFuncB) : genericBLookup[blendFuncB]; - if (gstate.FrameBufFormat() == GE_FORMAT_565) { + if (gstate_c.framebufFormat == GE_FORMAT_565) { if (blendFuncA == GE_SRCBLEND_DSTALPHA || blendFuncA == GE_SRCBLEND_DOUBLEDSTALPHA) { glBlendFuncA = BlendFactor::ZERO; } @@ -1303,6 +1331,10 @@ void ConvertBlendState(GenericBlendState &blendState, bool allowFramebufferRead, alphaEq = BlendEq::REVERSE_SUBTRACT; break; } + } else if (blueToAlpha) { + blendState.setFactors(BlendFactor::ZERO, BlendFactor::ZERO, BlendFactor::ONE, glBlendFuncB); + blendState.setEquation(BlendEq::ADD, colorEq); + return; } else { // Retain the existing value when stencil testing is off. blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ONE); @@ -1452,7 +1484,7 @@ void ConvertStencilFuncState(GenericStencilFuncState &state) { state.writeMask = (~gstate.getStencilWriteMask()) & 0xFF; state.enabled = gstate.isStencilTestEnabled(); if (!state.enabled) { - if (gstate.FrameBufFormat() == GE_FORMAT_5551) + if (gstate_c.framebufFormat == GE_FORMAT_5551) ConvertStencilMask5551(state); return; } @@ -1465,7 +1497,7 @@ void ConvertStencilFuncState(GenericStencilFuncState &state) { state.testRef = gstate.getStencilTestRef(); state.testMask = gstate.getStencilTestMask(); - switch (gstate.FrameBufFormat()) { + switch (gstate_c.framebufFormat) { case GE_FORMAT_565: state.writeMask = 0; break; diff --git a/GPU/Common/GPUStateUtils.h b/GPU/Common/GPUStateUtils.h index d61baca4f547..5f2f455e6004 100644 --- a/GPU/Common/GPUStateUtils.h +++ b/GPU/Common/GPUStateUtils.h @@ -39,6 +39,9 @@ enum ReplaceBlendType { // Full blend equation runs in shader. // We might have to make a copy of the framebuffer target to read from. REPLACE_BLEND_COPY_FBO, + + // Color blend mode and color gets copied to alpha blend mode. + REPLACE_BLEND_BLUE_TO_ALPHA, }; enum LogicOpReplaceType { @@ -54,9 +57,6 @@ bool IsAlphaTestAgainstZero(); bool NeedsTestDiscard(); bool IsStencilTestOutputDisabled(); -// If not, we have to emulate it in the shader, similar to blend replace. -bool IsColorMaskSimple(uint32_t colorMask); - StencilValueType ReplaceAlphaWithStencilType(); ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend); ReplaceBlendType ReplaceBlendWithShader(bool allowShaderBlend, GEBufferFormat bufferFormat); diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 5cdbc17afc45..e879cec1b9f4 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -186,8 +186,19 @@ std::string FragmentShaderDesc(const FShaderID &id) { if (id.Bit(FS_BIT_CLAMP_T)) desc << "T"; desc << " "; } - if (id.Bits(FS_BIT_REPLACE_BLEND, 3)) { - desc << "ReplaceBlend_" << id.Bits(FS_BIT_REPLACE_BLEND, 3) << "A:" << id.Bits(FS_BIT_BLENDFUNC_A, 4) << "_B:" << id.Bits(FS_BIT_BLENDFUNC_B, 4) << "_Eq:" << id.Bits(FS_BIT_BLENDEQ, 3) << " "; + int blendBits = id.Bits(FS_BIT_REPLACE_BLEND, 3); + if (blendBits) { + switch (blendBits) { + case ReplaceBlendType::REPLACE_BLEND_BLUE_TO_ALPHA: + desc << "BlueToAlpha_" << "A:" << id.Bits(FS_BIT_BLENDFUNC_A, 4); + break; + default: + desc << "ReplaceBlend_" << id.Bits(FS_BIT_REPLACE_BLEND, 3) + << "A:" << id.Bits(FS_BIT_BLENDFUNC_A, 4) + << "_B:" << id.Bits(FS_BIT_BLENDFUNC_B, 4) + << "_Eq:" << id.Bits(FS_BIT_BLENDEQ, 3) << " "; + break; + } } switch (id.Bits(FS_BIT_STENCIL_TO_ALPHA, 2)) { @@ -252,7 +263,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) { // Note how we here recompute some of the work already done in state mapping. // Not ideal! At least we share the code. - ReplaceBlendType replaceBlend = ReplaceBlendWithShader(gstate_c.allowFramebufferRead, gstate.FrameBufFormat()); + ReplaceBlendType replaceBlend = ReplaceBlendWithShader(gstate_c.allowFramebufferRead, gstate_c.framebufFormat); if (colorWriteMask) { replaceBlend = REPLACE_BLEND_COPY_FBO; } @@ -312,7 +323,10 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) { id.SetBits(FS_BIT_REPLACE_LOGIC_OP_TYPE, 2, ReplaceLogicOpType()); // If replaceBlend == REPLACE_BLEND_STANDARD (or REPLACE_BLEND_NO) nothing is done, so we kill these bits. - if (replaceBlend > REPLACE_BLEND_STANDARD) { + if (replaceBlend == REPLACE_BLEND_BLUE_TO_ALPHA) { + id.SetBits(FS_BIT_REPLACE_BLEND, 3, replaceBlend); + id.SetBits(FS_BIT_BLENDFUNC_A, 4, gstate.getBlendFuncA()); + } else if (replaceBlend > REPLACE_BLEND_STANDARD) { // 3 bits. id.SetBits(FS_BIT_REPLACE_BLEND, 3, replaceBlend); // 11 bits total. diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 4a651746ed9f..b41aa69264ea 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -915,7 +915,7 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer( } // NOTE: This check is okay because the first texture formats are the same as the buffer formats. if (IsTextureFormatBufferCompatible(entry.format)) { - if (TextureFormatMatchesBufferFormat(entry.format, framebuffer->format)) { + if (TextureFormatMatchesBufferFormat(entry.format, framebuffer->format) || (framebuffer->usageFlags & FB_USAGE_BLUE_TO_ALPHA)) { return FramebufferMatchInfo{ FramebufferMatch::VALID }; } else if (IsTextureFormat16Bit(entry.format) && IsBufferFormat16Bit(framebuffer->format)) { WARN_LOG_ONCE(diffFormat1, G3D, "Texturing from framebuffer with reinterpretable format: %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format)); diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index 78dced1b4279..8715116b3060 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -676,7 +676,7 @@ void DrawEngineD3D11::DoFlush() { uint8_t clearStencil = clearColor >> 24; draw_->Clear(clearFlag, clearColor, clearDepth, clearStencil); - if ((gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate.FrameBufFormat() == GE_FORMAT_565)) { + if ((gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate_c.framebufFormat == GE_FORMAT_565)) { int scissorX1 = gstate.getScissorX1(); int scissorY1 = gstate.getScissorY1(); int scissorX2 = gstate.getScissorX2() + 1; diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index f627e2fa470a..216bc7a8a04b 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -627,7 +627,7 @@ void DrawEngineDX9::DoFlush() { device_->Clear(0, NULL, mask, SwapRB(clearColor), clearDepth, clearColor >> 24); - if ((gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate.FrameBufFormat() == GE_FORMAT_565)) { + if ((gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate_c.framebufFormat == GE_FORMAT_565)) { int scissorX1 = gstate.getScissorX1(); int scissorY1 = gstate.getScissorY1(); int scissorX2 = gstate.getScissorX2() + 1; diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index 275ded02e672..369cc62a0285 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -436,7 +436,7 @@ void DrawEngineGLES::DoFlush() { render_->Clear(clearColor, clearDepth, clearColor >> 24, target, rgbaMask, vpAndScissor.scissorX, vpAndScissor.scissorY, vpAndScissor.scissorW, vpAndScissor.scissorH); framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason); - if ((gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && colorMask && (alphaMask || gstate.FrameBufFormat() == GE_FORMAT_565)) { + if ((gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && colorMask && (alphaMask || gstate_c.framebufFormat == GE_FORMAT_565)) { int scissorX1 = gstate.getScissorX1(); int scissorY1 = gstate.getScissorY1(); int scissorX2 = gstate.getScissorX2() + 1; diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 14a2f6e9c6f4..354c85bd631e 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1633,8 +1633,9 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { return; } - // This also makes skipping drawing very effective. - framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); + // Update cached framebuffer format. + // We store it in the cache so it can be modified for blue-to-alpha, next. + gstate_c.framebufFormat = gstate.FrameBufFormat(); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { // Rough estimate, not sure what's correct. @@ -1650,6 +1651,24 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { return; } + // See the documentation for gstate_c.blueToAlpha. + bool blueToAlpha = false; + if (PSP_CoreParameter().compat.flags().BlueToAlpha) { + if (gstate_c.framebufFormat == GEBufferFormat::GE_FORMAT_565 && gstate.getColorMask() == 0x0FFFFF) { + blueToAlpha = true; + } + if (blueToAlpha != gstate_c.blueToAlpha) { + gstate_c.blueToAlpha = blueToAlpha; + gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_BLEND_STATE); + } + } + + // This also makes skipping drawing very effective. + VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); + if (blueToAlpha) { + vfb->usageFlags |= FB_USAGE_BLUE_TO_ALPHA; + } + void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr); void *inds = nullptr; u32 vertexType = gstate.vertType; @@ -1845,6 +1864,8 @@ void GPUCommon::Execute_Bezier(u32 op, u32 diff) { // We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier. gstate_c.Dirty(DIRTY_UVSCALEOFFSET); + gstate_c.framebufFormat = gstate.FrameBufFormat(); + // This also make skipping drawing very effective. framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { @@ -1913,6 +1934,8 @@ void GPUCommon::Execute_Spline(u32 op, u32 diff) { // We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier. gstate_c.Dirty(DIRTY_UVSCALEOFFSET); + gstate_c.framebufFormat = gstate.FrameBufFormat(); + // This also make skipping drawing very effective. framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { @@ -2017,6 +2040,8 @@ void GPUCommon::Execute_BlockTransferStart(u32 op, u32 diff) { PROFILE_THIS_SCOPE("block"); // don't include the flush in the profile, would be misleading. + gstate_c.framebufFormat = gstate.FrameBufFormat(); + // and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa. // Can we skip this on SkipDraw? DoBlockTransfer(gstate_c.skipDrawReason); @@ -2897,7 +2922,7 @@ bool GPUCommon::PerformStencilUpload(u32 dest, int size) { bool GPUCommon::GetCurrentFramebuffer(GPUDebugBuffer &buffer, GPUDebugFramebufferType type, int maxRes) { u32 fb_address = type == GPU_DBG_FRAMEBUF_RENDER ? (gstate.getFrameBufRawAddress() | 0x04000000) : framebufferManager_->DisplayFramebufAddr(); int fb_stride = type == GPU_DBG_FRAMEBUF_RENDER ? gstate.FrameBufStride() : framebufferManager_->DisplayFramebufStride(); - GEBufferFormat format = type == GPU_DBG_FRAMEBUF_RENDER ? gstate.FrameBufFormat() : framebufferManager_->DisplayFramebufFormat(); + GEBufferFormat format = type == GPU_DBG_FRAMEBUF_RENDER ? gstate_c.framebufFormat : framebufferManager_->DisplayFramebufFormat(); return framebufferManager_->GetFramebuffer(fb_address, fb_stride, format, buffer, maxRes); } @@ -2974,7 +2999,7 @@ bool GPUCommon::FramebufferReallyDirty() { void GPUCommon::UpdateUVScaleOffset() { #ifdef _M_SSE __m128i values = _mm_slli_epi32(_mm_load_si128((const __m128i *) & gstate.texscaleu), 8); - _mm_storeu_si128((__m128i *) & gstate_c.uv, values); + _mm_storeu_si128((__m128i *)&gstate_c.uv, values); #elif PPSSPP_ARCH(ARM_NEON) const uint32x4_t values = vshlq_n_u32(vld1q_u32((const u32 *)&gstate.texscaleu), 8); vst1q_u32((u32 *)&gstate_c.uv, values); diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 6b8f20fa8a65..a4e157448d77 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -593,6 +593,13 @@ struct GPUStateCache { KnownVertexBounds vertBounds; + GEBufferFormat framebufFormat; + // Some games use a very specific masking setup to draw into the alpha channel of a 4444 target using the blue channel of a 565 target. + // This is done because on PSP you can't write to destination alpha, other than stencil values, which can't be set from a texture. + // Examples of games that do this: Outrun, Split/Second. + // We detect this case and go into a special drawing mode. + bool blueToAlpha; + // TODO: These should be accessed from the current VFB object directly. u32 curRTWidth; u32 curRTHeight; diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp index 56ef6941bd74..ef04d92dac8f 100644 --- a/GPU/Vulkan/StateMappingVulkan.cpp +++ b/GPU/Vulkan/StateMappingVulkan.cpp @@ -367,7 +367,7 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag } void DrawEngineVulkan::BindShaderBlendTex() { - // TODO: At this point, we know if the vertices are full alpha or not. + // TODO: At this point, we know if the vertices are full alpha or not. // Set the nearest/linear here (since we correctly know if alpha/color tests are needed)? if (!gstate.isModeClear()) { if (fboTexNeedsBind_) { diff --git a/assets/compat.ini b/assets/compat.ini index c49b4c7490f9..fc17e3e9b1f3 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -990,6 +990,15 @@ ULES01367 = true NPEH00029 = true ULUS10455 = true +[BlueToAlpha] +ULES01402 = true +ULUS10513 = true +ULJM05812 = true +NPJH50371 = true +# Some games render first to RGB of a 4444 texture, then they switch to 565 and render masked to blue, +# just to be able to render to the alpha channel of the 4444. We can detect that and reroute rendering +# to avoid problems. + [DateLimited] # Car Jack Streets - issue #12698 NPUZ00043 = true @@ -1025,11 +1034,11 @@ ULES01441 = true ULJM05600 = true ULJM05775 = true -# Split/Second -ULES01402 = true -ULUS10513 = true -ULJM05812 = true -NPJH50371 = true +# Split/Second now uses BlueToAlpha instead. +# ULES01402 = true +# ULUS10513 = true +# ULJM05812 = true +# NPJH50371 = true [ShaderColorBitmask] # Outrun 2006: Coast to Coast - issue #11358 @@ -1043,11 +1052,11 @@ ULJM05533 = true NPJH50006 = true ULES01301 = true -# Split/Second -ULES01402 = true -ULUS10513 = true -ULJM05812 = true -NPJH50371 = true +# Split/Second now uses BlueToAlpha instead. +#ULES01402 = true +#ULUS10513 = true +#ULJM05812 = true +#NPJH50371 = true [DisableFirstFrameReadback] # Wipeout Pure: Temporary workaround for lens flare flicker. See #13344