From 6bc7a699feb2949bed8b96917647ddf524ecd1a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 24 Apr 2022 14:55:51 +0200 Subject: [PATCH 1/8] Minor cleanups in framebuffer manager --- Common/Data/Convert/ColorConv.cpp | 24 ------------------------ Common/Data/Convert/ColorConv.h | 24 ++++++++++++++++++++++++ GPU/Common/FramebufferManagerCommon.cpp | 6 +++--- GPU/Common/GPUStateUtils.h | 3 --- 4 files changed, 27 insertions(+), 30 deletions(-) diff --git a/Common/Data/Convert/ColorConv.cpp b/Common/Data/Convert/ColorConv.cpp index 489a3f172f82..55686f5c00af 100644 --- a/Common/Data/Convert/ColorConv.cpp +++ b/Common/Data/Convert/ColorConv.cpp @@ -34,30 +34,6 @@ #endif #endif -inline u16 RGBA8888toRGB565(u32 px) { - return ((px >> 3) & 0x001F) | ((px >> 5) & 0x07E0) | ((px >> 8) & 0xF800); -} - -inline u16 RGBA8888toRGBA4444(u32 px) { - return ((px >> 4) & 0x000F) | ((px >> 8) & 0x00F0) | ((px >> 12) & 0x0F00) | ((px >> 16) & 0xF000); -} - -inline u16 BGRA8888toRGB565(u32 px) { - return ((px >> 19) & 0x001F) | ((px >> 5) & 0x07E0) | ((px << 8) & 0xF800); -} - -inline u16 BGRA8888toRGBA4444(u32 px) { - return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000); -} - -inline u16 BGRA8888toRGBA5551(u32 px) { - return ((px >> 19) & 0x001F) | ((px >> 6) & 0x03E0) | ((px << 7) & 0x7C00) | ((px >> 16) & 0x8000); -} - -inline u16 RGBA8888toRGBA5551(u32 px) { - return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000); -} - // convert 4444 image to 8888, parallelizable void convert4444_gl(u16* data, u32* out, int width, int l, int u) { for (int y = l; y < u; ++y) { diff --git a/Common/Data/Convert/ColorConv.h b/Common/Data/Convert/ColorConv.h index 66bd47b9cf56..077533171b17 100644 --- a/Common/Data/Convert/ColorConv.h +++ b/Common/Data/Convert/ColorConv.h @@ -35,6 +35,30 @@ inline u8 Convert6To8(u8 v) { return (v << 2) | (v >> 4); } +inline u16 RGBA8888toRGB565(u32 px) { + return ((px >> 3) & 0x001F) | ((px >> 5) & 0x07E0) | ((px >> 8) & 0xF800); +} + +inline u16 RGBA8888toRGBA4444(u32 px) { + return ((px >> 4) & 0x000F) | ((px >> 8) & 0x00F0) | ((px >> 12) & 0x0F00) | ((px >> 16) & 0xF000); +} + +inline u16 BGRA8888toRGB565(u32 px) { + return ((px >> 19) & 0x001F) | ((px >> 5) & 0x07E0) | ((px << 8) & 0xF800); +} + +inline u16 BGRA8888toRGBA4444(u32 px) { + return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000); +} + +inline u16 BGRA8888toRGBA5551(u32 px) { + return ((px >> 19) & 0x001F) | ((px >> 6) & 0x03E0) | ((px << 7) & 0x7C00) | ((px >> 16) & 0x8000); +} + +inline u16 RGBA8888toRGBA5551(u32 px) { + return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000); +} + inline u32 RGBA4444ToRGBA8888(u16 src) { const u32 r = (src & 0x000F) << 0; const u32 g = (src & 0x00F0) << 4; diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 1b609e34ffd0..150d2fd8569b 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1699,9 +1699,9 @@ void FramebufferManagerCommon::ApplyClearToMemory(int x1, int y1, int x2, int y2 if (bpp == 2) { u16 clear16 = 0; switch (gstate.FrameBufFormat()) { - case GE_FORMAT_565: ConvertRGBA8888ToRGB565(&clear16, &clearColor, 1); break; - case GE_FORMAT_5551: ConvertRGBA8888ToRGBA5551(&clear16, &clearColor, 1); break; - case GE_FORMAT_4444: ConvertRGBA8888ToRGBA4444(&clear16, &clearColor, 1); break; + case GE_FORMAT_565: clear16 = RGBA8888toRGB565(clearColor); break; + case GE_FORMAT_5551: clear16 = RGBA8888toRGBA5551(clearColor); break; + case GE_FORMAT_4444: clear16 = RGBA8888toRGBA4444(clearColor); break; default: _dbg_assert_(0); break; } clearBits = clear16 | (clear16 << 16); diff --git a/GPU/Common/GPUStateUtils.h b/GPU/Common/GPUStateUtils.h index d61baca4f547..037cc91da455 100644 --- a/GPU/Common/GPUStateUtils.h +++ b/GPU/Common/GPUStateUtils.h @@ -54,9 +54,6 @@ bool IsAlphaTestAgainstZero(); bool NeedsTestDiscard(); bool IsStencilTestOutputDisabled(); -// If not, we have to emulate it in the shader, similar to blend replace. -bool IsColorMaskSimple(uint32_t colorMask); - StencilValueType ReplaceAlphaWithStencilType(); ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend); ReplaceBlendType ReplaceBlendWithShader(bool allowShaderBlend, GEBufferFormat bufferFormat); From 5a1ab67cf8fd564cbf73682461dd24316e692766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 24 Apr 2022 14:56:48 +0200 Subject: [PATCH 2/8] Dirty more state after reinterpret --- GPU/Common/FramebufferManagerCommon.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 150d2fd8569b..f097edc0c964 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -691,7 +691,7 @@ void FramebufferManagerCommon::ReinterpretFramebuffer(VirtualFramebuffer *vfb, G shaderManager_->DirtyLastShader(); textureCache_->ForgetLastTexture(); - gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE); + gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS); if (currentRenderVfb_ != vfb) { // In case ReinterpretFramebuffer was called from the texture manager. @@ -1688,6 +1688,7 @@ void FramebufferManagerCommon::ApplyClearToMemory(int x1, int y1, int x2, int y2 return; } } + if (!Memory::IsValidAddress(gstate.getFrameBufAddress())) { return; } From 7be86264d0fa48dabe69d76f9c0b4d0dc8a1d360 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 24 Apr 2022 17:30:33 +0200 Subject: [PATCH 3/8] Move framebufFormat to gstate_c, so we can override it --- GPU/Common/FramebufferManagerCommon.cpp | 8 ++++---- GPU/Common/GPUStateUtils.cpp | 16 ++++++++-------- GPU/Common/ShaderId.cpp | 2 +- GPU/D3D11/DrawEngineD3D11.cpp | 2 +- GPU/Directx9/DrawEngineDX9.cpp | 2 +- GPU/GLES/DrawEngineGLES.cpp | 2 +- GPU/GPUCommon.cpp | 6 +++++- GPU/GPUState.h | 2 ++ GPU/Vulkan/StateMappingVulkan.cpp | 2 +- 9 files changed, 24 insertions(+), 18 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index f097edc0c964..efd16b39664d 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -218,7 +218,7 @@ void GetFramebufferHeuristicInputs(FramebufferHeuristicParams *params, const GPU params->z_stride = 0; } - params->fmt = gstate.FrameBufFormat(); + params->fmt = gstate_c.framebufFormat; params->isClearingDepth = gstate.isModeClear() && gstate.isClearModeDepthMask(); // Technically, it may write depth later, but we're trying to detect it only when it's really true. @@ -672,7 +672,7 @@ void FramebufferManagerCommon::ReinterpretFramebuffer(VirtualFramebuffer *vfb, G draw_->InvalidateCachedState(); draw_->CopyFramebufferImage(vfb->fbo, 0, 0, 0, 0, temp, 0, 0, 0, 0, vfb->renderWidth, vfb->renderHeight, 1, Draw::FBChannel::FB_COLOR_BIT, "reinterpret_prep"); - draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, reinterpretStrings[(int)oldFormat][(int)newFormat]); + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, reinterpretStrings[(int)oldFormat][(int)newFormat]); draw_->BindPipeline(pipeline); draw_->BindFramebufferAsTexture(temp, 0, Draw::FBChannel::FB_COLOR_BIT, 0); draw_->BindSamplerStates(0, 1, &reinterpretSampler_); @@ -1694,12 +1694,12 @@ void FramebufferManagerCommon::ApplyClearToMemory(int x1, int y1, int x2, int y2 } u8 *addr = Memory::GetPointerUnchecked(gstate.getFrameBufAddress()); - const int bpp = gstate.FrameBufFormat() == GE_FORMAT_8888 ? 4 : 2; + const int bpp = gstate_c.framebufFormat == GE_FORMAT_8888 ? 4 : 2; u32 clearBits = clearColor; if (bpp == 2) { u16 clear16 = 0; - switch (gstate.FrameBufFormat()) { + switch (gstate_c.framebufFormat) { case GE_FORMAT_565: clear16 = RGBA8888toRGB565(clearColor); break; case GE_FORMAT_5551: clear16 = RGBA8888toRGBA5551(clearColor); break; case GE_FORMAT_4444: clear16 = RGBA8888toRGBA4444(clearColor); break; diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp index d3eec7c9ecfd..5e860337455c 100644 --- a/GPU/Common/GPUStateUtils.cpp +++ b/GPU/Common/GPUStateUtils.cpp @@ -38,7 +38,7 @@ bool IsStencilTestOutputDisabled() { // The mask applies on all stencil ops. if (gstate.isStencilTestEnabled() && (gstate.pmska & 0xFF) != 0xFF) { - if (gstate.FrameBufFormat() == GE_FORMAT_565) { + if (gstate_c.framebufFormat == GE_FORMAT_565) { return true; } return gstate.getStencilOpZPass() == GE_STENCILOP_KEEP && gstate.getStencilOpZFail() == GE_STENCILOP_KEEP && gstate.getStencilOpSFail() == GE_STENCILOP_KEEP; @@ -195,7 +195,7 @@ ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend) { } StencilValueType ReplaceAlphaWithStencilType() { - switch (gstate.FrameBufFormat()) { + switch (gstate_c.framebufFormat) { case GE_FORMAT_565: // There's never a stencil value. Maybe the right alpha is 1? return STENCIL_VALUE_ONE; @@ -236,10 +236,10 @@ StencilValueType ReplaceAlphaWithStencilType() { return STENCIL_VALUE_ZERO; case GE_STENCILOP_DECR: - return gstate.FrameBufFormat() == GE_FORMAT_4444 ? STENCIL_VALUE_DECR_4 : STENCIL_VALUE_DECR_8; + return gstate_c.framebufFormat == GE_FORMAT_4444 ? STENCIL_VALUE_DECR_4 : STENCIL_VALUE_DECR_8; case GE_STENCILOP_INCR: - return gstate.FrameBufFormat() == GE_FORMAT_4444 ? STENCIL_VALUE_INCR_4 : STENCIL_VALUE_INCR_8; + return gstate_c.framebufFormat == GE_FORMAT_4444 ? STENCIL_VALUE_INCR_4 : STENCIL_VALUE_INCR_8; case GE_STENCILOP_INVERT: return STENCIL_VALUE_INVERT; @@ -1049,7 +1049,7 @@ void ConvertBlendState(GenericBlendState &blendState, bool allowFramebufferRead, blendState.useBlendColor = false; blendState.replaceAlphaWithStencil = REPLACE_ALPHA_NO; - ReplaceBlendType replaceBlend = ReplaceBlendWithShader(allowFramebufferRead, gstate.FrameBufFormat()); + ReplaceBlendType replaceBlend = ReplaceBlendWithShader(allowFramebufferRead, gstate_c.framebufFormat); if (forceReplaceBlend) { replaceBlend = REPLACE_BLEND_COPY_FBO; } @@ -1130,7 +1130,7 @@ void ConvertBlendState(GenericBlendState &blendState, bool allowFramebufferRead, bool approxFuncB = false; BlendFactor glBlendFuncB = blendFuncB == GE_DSTBLEND_FIXB ? blendColor2Func(fixB, approxFuncB) : genericBLookup[blendFuncB]; - if (gstate.FrameBufFormat() == GE_FORMAT_565) { + if (gstate_c.framebufFormat == GE_FORMAT_565) { if (blendFuncA == GE_SRCBLEND_DSTALPHA || blendFuncA == GE_SRCBLEND_DOUBLEDSTALPHA) { glBlendFuncA = BlendFactor::ZERO; } @@ -1452,7 +1452,7 @@ void ConvertStencilFuncState(GenericStencilFuncState &state) { state.writeMask = (~gstate.getStencilWriteMask()) & 0xFF; state.enabled = gstate.isStencilTestEnabled(); if (!state.enabled) { - if (gstate.FrameBufFormat() == GE_FORMAT_5551) + if (gstate_c.framebufFormat == GE_FORMAT_5551) ConvertStencilMask5551(state); return; } @@ -1465,7 +1465,7 @@ void ConvertStencilFuncState(GenericStencilFuncState &state) { state.testRef = gstate.getStencilTestRef(); state.testMask = gstate.getStencilTestMask(); - switch (gstate.FrameBufFormat()) { + switch (gstate_c.framebufFormat) { case GE_FORMAT_565: state.writeMask = 0; break; diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 5cdbc17afc45..653f171e6669 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -252,7 +252,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) { // Note how we here recompute some of the work already done in state mapping. // Not ideal! At least we share the code. - ReplaceBlendType replaceBlend = ReplaceBlendWithShader(gstate_c.allowFramebufferRead, gstate.FrameBufFormat()); + ReplaceBlendType replaceBlend = ReplaceBlendWithShader(gstate_c.allowFramebufferRead, gstate_c.framebufFormat); if (colorWriteMask) { replaceBlend = REPLACE_BLEND_COPY_FBO; } diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index 78dced1b4279..8715116b3060 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -676,7 +676,7 @@ void DrawEngineD3D11::DoFlush() { uint8_t clearStencil = clearColor >> 24; draw_->Clear(clearFlag, clearColor, clearDepth, clearStencil); - if ((gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate.FrameBufFormat() == GE_FORMAT_565)) { + if ((gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate_c.framebufFormat == GE_FORMAT_565)) { int scissorX1 = gstate.getScissorX1(); int scissorY1 = gstate.getScissorY1(); int scissorX2 = gstate.getScissorX2() + 1; diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index f627e2fa470a..216bc7a8a04b 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -627,7 +627,7 @@ void DrawEngineDX9::DoFlush() { device_->Clear(0, NULL, mask, SwapRB(clearColor), clearDepth, clearColor >> 24); - if ((gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate.FrameBufFormat() == GE_FORMAT_565)) { + if ((gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate_c.framebufFormat == GE_FORMAT_565)) { int scissorX1 = gstate.getScissorX1(); int scissorY1 = gstate.getScissorY1(); int scissorX2 = gstate.getScissorX2() + 1; diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index 275ded02e672..369cc62a0285 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -436,7 +436,7 @@ void DrawEngineGLES::DoFlush() { render_->Clear(clearColor, clearDepth, clearColor >> 24, target, rgbaMask, vpAndScissor.scissorX, vpAndScissor.scissorY, vpAndScissor.scissorW, vpAndScissor.scissorH); framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason); - if ((gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && colorMask && (alphaMask || gstate.FrameBufFormat() == GE_FORMAT_565)) { + if ((gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && colorMask && (alphaMask || gstate_c.framebufFormat == GE_FORMAT_565)) { int scissorX1 = gstate.getScissorX1(); int scissorY1 = gstate.getScissorY1(); int scissorX2 = gstate.getScissorX2() + 1; diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 14a2f6e9c6f4..307870a228ce 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1633,6 +1633,10 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { return; } + // Update cached framebuffer format. + // We store it in the cache so it can be modified for blue-to-alpha, next. + gstate_c.framebufFormat = gstate.FrameBufFormat(); + // This also makes skipping drawing very effective. framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); @@ -2897,7 +2901,7 @@ bool GPUCommon::PerformStencilUpload(u32 dest, int size) { bool GPUCommon::GetCurrentFramebuffer(GPUDebugBuffer &buffer, GPUDebugFramebufferType type, int maxRes) { u32 fb_address = type == GPU_DBG_FRAMEBUF_RENDER ? (gstate.getFrameBufRawAddress() | 0x04000000) : framebufferManager_->DisplayFramebufAddr(); int fb_stride = type == GPU_DBG_FRAMEBUF_RENDER ? gstate.FrameBufStride() : framebufferManager_->DisplayFramebufStride(); - GEBufferFormat format = type == GPU_DBG_FRAMEBUF_RENDER ? gstate.FrameBufFormat() : framebufferManager_->DisplayFramebufFormat(); + GEBufferFormat format = type == GPU_DBG_FRAMEBUF_RENDER ? gstate_c.framebufFormat : framebufferManager_->DisplayFramebufFormat(); return framebufferManager_->GetFramebuffer(fb_address, fb_stride, format, buffer, maxRes); } diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 6b8f20fa8a65..181f8129c129 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -593,6 +593,8 @@ struct GPUStateCache { KnownVertexBounds vertBounds; + GEBufferFormat framebufFormat; + // TODO: These should be accessed from the current VFB object directly. u32 curRTWidth; u32 curRTHeight; diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp index 56ef6941bd74..ef04d92dac8f 100644 --- a/GPU/Vulkan/StateMappingVulkan.cpp +++ b/GPU/Vulkan/StateMappingVulkan.cpp @@ -367,7 +367,7 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag } void DrawEngineVulkan::BindShaderBlendTex() { - // TODO: At this point, we know if the vertices are full alpha or not. + // TODO: At this point, we know if the vertices are full alpha or not. // Set the nearest/linear here (since we correctly know if alpha/color tests are needed)? if (!gstate.isModeClear()) { if (fboTexNeedsBind_) { From 462972f7ea4984c01d8b654b00f3b84145c8f629 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 24 Apr 2022 20:53:09 +0200 Subject: [PATCH 4/8] Add option to redirect blue to alpha if 565 mode is rendered and mask is 0x0FFFFF. This is used by several games to render to the alpha channel of RGBA4444 images, which cannot normally be done directly on the PSP. Can be used as a far more efficient replacement for ReinterpretFramebuffers/ShaderColorBitmask --- Core/Compatibility.cpp | 1 + Core/Compatibility.h | 2 ++ GPU/Common/FragmentShaderGenerator.cpp | 9 +++++++ GPU/Common/FramebufferManagerCommon.cpp | 3 +-- GPU/Common/FramebufferManagerCommon.h | 2 ++ GPU/Common/GPUStateUtils.cpp | 32 +++++++++++++++++++++++++ GPU/Common/GPUStateUtils.h | 3 +++ GPU/Common/ShaderId.cpp | 19 ++++++++++++--- GPU/Common/TextureCacheCommon.cpp | 2 +- GPU/GPUCommon.cpp | 15 +++++++++++- GPU/GPUState.h | 5 ++++ assets/compat.ini | 30 +++++++++++++++-------- 12 files changed, 106 insertions(+), 17 deletions(-) diff --git a/Core/Compatibility.cpp b/Core/Compatibility.cpp index b8de96d22bbd..7a27543ab4e6 100644 --- a/Core/Compatibility.cpp +++ b/Core/Compatibility.cpp @@ -79,6 +79,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) { CheckSetting(iniFile, gameID, "DisableFirstFrameReadback", &flags_.DisableFirstFrameReadback); CheckSetting(iniFile, gameID, "DisableRangeCulling", &flags_.DisableRangeCulling); CheckSetting(iniFile, gameID, "MpegAvcWarmUp", &flags_.MpegAvcWarmUp); + CheckSetting(iniFile, gameID, "BlueToAlpha", &flags_.BlueToAlpha); } void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) { diff --git a/Core/Compatibility.h b/Core/Compatibility.h index 48eeddc197f5..fd3e76d7435f 100644 --- a/Core/Compatibility.h +++ b/Core/Compatibility.h @@ -44,6 +44,7 @@ // // We already have the Action Replay-based cheat system for such use cases. +// TODO: Turn into bitfield for smaller mem footprint. Though I think it still fits in a cacheline... struct CompatFlags { bool VertexDepthRounding; bool PixelDepthRounding; @@ -77,6 +78,7 @@ struct CompatFlags { bool DisableFirstFrameReadback; bool DisableRangeCulling; bool MpegAvcWarmUp; + bool BlueToAlpha; }; class IniFile; diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 3fc0e16ffcc4..896fb9f43932 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -95,6 +95,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu ReplaceBlendType replaceBlend = static_cast(id.Bits(FS_BIT_REPLACE_BLEND, 3)); + bool blueToAlpha = false; + if (replaceBlend == ReplaceBlendType::REPLACE_BLEND_BLUE_TO_ALPHA) { + blueToAlpha = true; + } + GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4); GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4); GEBlendMode replaceBlendEq = (GEBlendMode)id.Bits(FS_BIT_BLENDEQ, 3); @@ -1025,6 +1030,10 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, " %s = unpackUnorm4x8(v32);\n", compat.fragColor0); } + if (blueToAlpha) { + WRITE(p, " %s = vec4(0.0, 0.0, 0.0, %s.z); // blue to alpha\n", compat.fragColor0, compat.fragColor0); + } + if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) { const double scale = DepthSliceFactor() * 65535.0; diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index efd16b39664d..8553243eaa17 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1642,8 +1642,7 @@ VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFram // Create a new fbo if none was found for the size if (!nvfb) { - nvfb = new VirtualFramebuffer(); - memset(nvfb, 0, sizeof(VirtualFramebuffer)); + nvfb = new VirtualFramebuffer{}; nvfb->fbo = nullptr; nvfb->fb_address = vfb->fb_address; nvfb->fb_stride = vfb->fb_stride; diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 1182c73e3248..33d41992f9d9 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -100,6 +100,8 @@ struct VirtualFramebuffer { bool dirtyAfterDisplay; bool reallyDirtyAfterDisplay; // takes frame skipping into account + bool blueToAlphaUsed; + int last_frame_used; int last_frame_attached; int last_frame_render; diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp index 5e860337455c..8a8b7c70ba01 100644 --- a/GPU/Common/GPUStateUtils.cpp +++ b/GPU/Common/GPUStateUtils.cpp @@ -191,6 +191,10 @@ ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend) { } } + if (replaceBlend == ReplaceBlendType::REPLACE_BLEND_BLUE_TO_ALPHA) { + return REPLACE_ALPHA_NO; // irrelevant + } + return REPLACE_ALPHA_YES; } @@ -254,6 +258,10 @@ StencilValueType ReplaceAlphaWithStencilType() { } ReplaceBlendType ReplaceBlendWithShader(bool allowFramebufferRead, GEBufferFormat bufferFormat) { + if (gstate_c.blueToAlpha) { + return REPLACE_BLEND_BLUE_TO_ALPHA; + } + if (!gstate.isAlphaBlendEnabled() || gstate.isModeClear()) { return REPLACE_BLEND_NO; } @@ -976,6 +984,11 @@ bool IsColorWriteMaskComplex(bool allowFramebufferRead) { return false; } + if (gstate_c.blueToAlpha) { + // We'll generate a simple ___A mask. + return false; + } + uint32_t colorMask = (gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24); for (int i = 0; i < 4; i++) { @@ -996,6 +1009,15 @@ bool IsColorWriteMaskComplex(bool allowFramebufferRead) { // When that's not enough, we fall back on a technique similar to shader blending, // we read from the framebuffer (or a copy of it). void ConvertMaskState(GenericMaskState &maskState, bool allowFramebufferRead) { + if (gstate_c.blueToAlpha) { + maskState.applyFramebufferRead = false; + maskState.rgba[0] = false; + maskState.rgba[1] = false; + maskState.rgba[2] = false; + maskState.rgba[3] = true; + return; + } + // Invert to convert masks from the PSP's format where 1 is don't draw to PC where 1 is draw. uint32_t colorMask = ~((gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24)); @@ -1056,6 +1078,8 @@ void ConvertBlendState(GenericBlendState &blendState, bool allowFramebufferRead, ReplaceAlphaType replaceAlphaWithStencil = ReplaceAlphaWithStencil(replaceBlend); bool usePreSrc = false; + bool blueToAlpha = false; + switch (replaceBlend) { case REPLACE_BLEND_NO: blendState.resetFramebufferRead = true; @@ -1063,6 +1087,10 @@ void ConvertBlendState(GenericBlendState &blendState, bool allowFramebufferRead, ApplyStencilReplaceAndLogicOpIgnoreBlend(replaceAlphaWithStencil, blendState); return; + case REPLACE_BLEND_BLUE_TO_ALPHA: + blueToAlpha = true; + break; + case REPLACE_BLEND_COPY_FBO: blendState.applyFramebufferRead = true; blendState.resetFramebufferRead = false; @@ -1303,6 +1331,10 @@ void ConvertBlendState(GenericBlendState &blendState, bool allowFramebufferRead, alphaEq = BlendEq::REVERSE_SUBTRACT; break; } + } else if (blueToAlpha) { + blendState.setFactors(BlendFactor::ZERO, BlendFactor::ZERO, glBlendFuncA, glBlendFuncB); + blendState.setEquation(BlendEq::ADD, colorEq); + return; } else { // Retain the existing value when stencil testing is off. blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ONE); diff --git a/GPU/Common/GPUStateUtils.h b/GPU/Common/GPUStateUtils.h index 037cc91da455..5f2f455e6004 100644 --- a/GPU/Common/GPUStateUtils.h +++ b/GPU/Common/GPUStateUtils.h @@ -39,6 +39,9 @@ enum ReplaceBlendType { // Full blend equation runs in shader. // We might have to make a copy of the framebuffer target to read from. REPLACE_BLEND_COPY_FBO, + + // Color blend mode and color gets copied to alpha blend mode. + REPLACE_BLEND_BLUE_TO_ALPHA, }; enum LogicOpReplaceType { diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 653f171e6669..dfaac9a67501 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -186,8 +186,19 @@ std::string FragmentShaderDesc(const FShaderID &id) { if (id.Bit(FS_BIT_CLAMP_T)) desc << "T"; desc << " "; } - if (id.Bits(FS_BIT_REPLACE_BLEND, 3)) { - desc << "ReplaceBlend_" << id.Bits(FS_BIT_REPLACE_BLEND, 3) << "A:" << id.Bits(FS_BIT_BLENDFUNC_A, 4) << "_B:" << id.Bits(FS_BIT_BLENDFUNC_B, 4) << "_Eq:" << id.Bits(FS_BIT_BLENDEQ, 3) << " "; + int blendBits = id.Bits(FS_BIT_REPLACE_BLEND, 3); + if (blendBits) { + switch (blendBits) { + case ReplaceBlendType::REPLACE_BLEND_BLUE_TO_ALPHA: + desc << "BlueToAlpha"; + break; + default: + desc << "ReplaceBlend_" << id.Bits(FS_BIT_REPLACE_BLEND, 3) + << "A:" << id.Bits(FS_BIT_BLENDFUNC_A, 4) + << "_B:" << id.Bits(FS_BIT_BLENDFUNC_B, 4) + << "_Eq:" << id.Bits(FS_BIT_BLENDEQ, 3) << " "; + break; + } } switch (id.Bits(FS_BIT_STENCIL_TO_ALPHA, 2)) { @@ -312,7 +323,9 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) { id.SetBits(FS_BIT_REPLACE_LOGIC_OP_TYPE, 2, ReplaceLogicOpType()); // If replaceBlend == REPLACE_BLEND_STANDARD (or REPLACE_BLEND_NO) nothing is done, so we kill these bits. - if (replaceBlend > REPLACE_BLEND_STANDARD) { + if (replaceBlend == REPLACE_BLEND_BLUE_TO_ALPHA) { + id.SetBits(FS_BIT_REPLACE_BLEND, 3, replaceBlend); + } else if (replaceBlend > REPLACE_BLEND_STANDARD) { // 3 bits. id.SetBits(FS_BIT_REPLACE_BLEND, 3, replaceBlend); // 11 bits total. diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 4a651746ed9f..8273ff9cb8e3 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -915,7 +915,7 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer( } // NOTE: This check is okay because the first texture formats are the same as the buffer formats. if (IsTextureFormatBufferCompatible(entry.format)) { - if (TextureFormatMatchesBufferFormat(entry.format, framebuffer->format)) { + if (TextureFormatMatchesBufferFormat(entry.format, framebuffer->format) || framebuffer->blueToAlphaUsed) { return FramebufferMatchInfo{ FramebufferMatch::VALID }; } else if (IsTextureFormat16Bit(entry.format) && IsBufferFormat16Bit(framebuffer->format)) { WARN_LOG_ONCE(diffFormat1, G3D, "Texturing from framebuffer with reinterpretable format: %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format)); diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 307870a228ce..45427bab81b3 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1637,8 +1637,21 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { // We store it in the cache so it can be modified for blue-to-alpha, next. gstate_c.framebufFormat = gstate.FrameBufFormat(); + // See the documentation for gstate_c.blueToAlpha. + bool blueToAlpha = false; + if (gstate_c.framebufFormat == GEBufferFormat::GE_FORMAT_565 && gstate.getColorMask() == 0x0FFFFF && PSP_CoreParameter().compat.flags().BlueToAlpha) { + blueToAlpha = true; + } + if (blueToAlpha != gstate_c.blueToAlpha) { + gstate_c.blueToAlpha = blueToAlpha; + gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_BLEND_STATE); + } + // This also makes skipping drawing very effective. - framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); + VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); + if (blueToAlpha) { + vfb->blueToAlphaUsed = true; + } if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { // Rough estimate, not sure what's correct. diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 181f8129c129..a4e157448d77 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -594,6 +594,11 @@ struct GPUStateCache { KnownVertexBounds vertBounds; GEBufferFormat framebufFormat; + // Some games use a very specific masking setup to draw into the alpha channel of a 4444 target using the blue channel of a 565 target. + // This is done because on PSP you can't write to destination alpha, other than stencil values, which can't be set from a texture. + // Examples of games that do this: Outrun, Split/Second. + // We detect this case and go into a special drawing mode. + bool blueToAlpha; // TODO: These should be accessed from the current VFB object directly. u32 curRTWidth; diff --git a/assets/compat.ini b/assets/compat.ini index c49b4c7490f9..214a470e8ab7 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -990,6 +990,16 @@ ULES01367 = true NPEH00029 = true ULUS10455 = true +[BlueToAlpha] +ULES01402 = true +ULUS10513 = true +ULJM05812 = true +NPJH50371 = true + +# Some games render first to RGB of a 4444 texture, then they switch to 565 and render masked to blue, +# just to be able to render to the alpha channel of the 4444. We can detect that and reroute rendering +# to avoid problems. + [DateLimited] # Car Jack Streets - issue #12698 NPUZ00043 = true @@ -1025,11 +1035,11 @@ ULES01441 = true ULJM05600 = true ULJM05775 = true -# Split/Second -ULES01402 = true -ULUS10513 = true -ULJM05812 = true -NPJH50371 = true +# Split/Second now uses BlueToAlpha instead. +# ULES01402 = true +# ULUS10513 = true +# ULJM05812 = true +# NPJH50371 = true [ShaderColorBitmask] # Outrun 2006: Coast to Coast - issue #11358 @@ -1043,11 +1053,11 @@ ULJM05533 = true NPJH50006 = true ULES01301 = true -# Split/Second -ULES01402 = true -ULUS10513 = true -ULJM05812 = true -NPJH50371 = true +# Split/Second now uses BlueToAlpha instead. +#ULES01402 = true +#ULUS10513 = true +#ULJM05812 = true +#NPJH50371 = true [DisableFirstFrameReadback] # Wipeout Pure: Temporary workaround for lens flare flicker. See #13344 From 0c66087cc7332d6808d9d636a84b1f15936e3492 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 24 Apr 2022 21:19:48 +0200 Subject: [PATCH 5/8] BlueToAlpha hack to avoid clearing the envmap in split/second :( --- GPU/Common/FramebufferManagerCommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 8553243eaa17..1f630c8d9bea 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -589,7 +589,7 @@ void FramebufferManagerCommon::ReinterpretFramebuffer(VirtualFramebuffer *vfb, G // Games that are marked as doing reinterpret just ignore this - better to keep the data than to clear. // Fixes #13717. - if (!PSP_CoreParameter().compat.flags().ReinterpretFramebuffers) { + if (!PSP_CoreParameter().compat.flags().ReinterpretFramebuffers && !PSP_CoreParameter().compat.flags().BlueToAlpha) { draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "FakeReinterpret"); // Need to dirty anything that has command buffer dynamic state, in case we started a new pass above. // Should find a way to feed that information back, maybe... Or simply correct the issue in the rendermanager. From 6b2dec91b531dc96d2fe386bccdd4cadbb71ddd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 24 Apr 2022 22:54:08 +0200 Subject: [PATCH 6/8] Finish BlueToAlpha functionality --- GPU/Common/FragmentShaderGenerator.cpp | 4 +++- GPU/Common/GPUStateUtils.cpp | 2 +- GPU/Common/ShaderId.cpp | 3 ++- assets/compat.ini | 1 - 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 896fb9f43932..cb965bf78201 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -858,7 +858,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, " v.rgb = v.rgb * 2.0;\n"); } - if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) { + // In some cases we need to replicate the first half of the blend equation here. + // In case of blue-to-alpha, it's since we overwrite alpha with blue before the actual blend equation runs. + if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA || replaceBlend == REPLACE_BLEND_BLUE_TO_ALPHA) { const char *srcFactor = "ERROR"; switch (replaceBlendFuncA) { case GE_SRCBLEND_DSTCOLOR: srcFactor = "ERROR"; break; diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp index 8a8b7c70ba01..8b27be2d34b5 100644 --- a/GPU/Common/GPUStateUtils.cpp +++ b/GPU/Common/GPUStateUtils.cpp @@ -1332,7 +1332,7 @@ void ConvertBlendState(GenericBlendState &blendState, bool allowFramebufferRead, break; } } else if (blueToAlpha) { - blendState.setFactors(BlendFactor::ZERO, BlendFactor::ZERO, glBlendFuncA, glBlendFuncB); + blendState.setFactors(BlendFactor::ZERO, BlendFactor::ZERO, BlendFactor::ONE, glBlendFuncB); blendState.setEquation(BlendEq::ADD, colorEq); return; } else { diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index dfaac9a67501..e879cec1b9f4 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -190,7 +190,7 @@ std::string FragmentShaderDesc(const FShaderID &id) { if (blendBits) { switch (blendBits) { case ReplaceBlendType::REPLACE_BLEND_BLUE_TO_ALPHA: - desc << "BlueToAlpha"; + desc << "BlueToAlpha_" << "A:" << id.Bits(FS_BIT_BLENDFUNC_A, 4); break; default: desc << "ReplaceBlend_" << id.Bits(FS_BIT_REPLACE_BLEND, 3) @@ -325,6 +325,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) { // If replaceBlend == REPLACE_BLEND_STANDARD (or REPLACE_BLEND_NO) nothing is done, so we kill these bits. if (replaceBlend == REPLACE_BLEND_BLUE_TO_ALPHA) { id.SetBits(FS_BIT_REPLACE_BLEND, 3, replaceBlend); + id.SetBits(FS_BIT_BLENDFUNC_A, 4, gstate.getBlendFuncA()); } else if (replaceBlend > REPLACE_BLEND_STANDARD) { // 3 bits. id.SetBits(FS_BIT_REPLACE_BLEND, 3, replaceBlend); diff --git a/assets/compat.ini b/assets/compat.ini index 214a470e8ab7..fc17e3e9b1f3 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -995,7 +995,6 @@ ULES01402 = true ULUS10513 = true ULJM05812 = true NPJH50371 = true - # Some games render first to RGB of a 4444 texture, then they switch to 565 and render masked to blue, # just to be able to render to the alpha channel of the 4444. We can detect that and reroute rendering # to avoid problems. From f5e519ed4b7a3543b8a913093625f5cc2fd669bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 24 Apr 2022 23:23:54 +0200 Subject: [PATCH 7/8] Oops, gotta update the cached framebuffer format in the other drawing paths too --- GPU/GPUCommon.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 45427bab81b3..fcf97b96cc8e 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1862,6 +1862,8 @@ void GPUCommon::Execute_Bezier(u32 op, u32 diff) { // We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier. gstate_c.Dirty(DIRTY_UVSCALEOFFSET); + gstate_c.framebufFormat = gstate.FrameBufFormat(); + // This also make skipping drawing very effective. framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { @@ -1930,6 +1932,8 @@ void GPUCommon::Execute_Spline(u32 op, u32 diff) { // We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier. gstate_c.Dirty(DIRTY_UVSCALEOFFSET); + gstate_c.framebufFormat = gstate.FrameBufFormat(); + // This also make skipping drawing very effective. framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { @@ -2034,6 +2038,8 @@ void GPUCommon::Execute_BlockTransferStart(u32 op, u32 diff) { PROFILE_THIS_SCOPE("block"); // don't include the flush in the profile, would be misleading. + gstate_c.framebufFormat = gstate.FrameBufFormat(); + // and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa. // Can we skip this on SkipDraw? DoBlockTransfer(gstate_c.skipDrawReason); From 5868cf0f1cc3f770780dd485fa1ddcddeb841739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 30 Apr 2022 18:13:24 +0200 Subject: [PATCH 8/8] Convert the blueToAlpha bool to a usageFlag. KEEP instead of CLEAR for depth during reinterpret. --- GPU/Common/FramebufferManagerCommon.cpp | 4 ++- GPU/Common/FramebufferManagerCommon.h | 3 +-- GPU/Common/TextureCacheCommon.cpp | 2 +- GPU/GPUCommon.cpp | 36 +++++++++++++------------ 4 files changed, 24 insertions(+), 21 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 1f630c8d9bea..05b1dc30a6f4 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -670,9 +670,11 @@ void FramebufferManagerCommon::ReinterpretFramebuffer(VirtualFramebuffer *vfb, G // Copy to a temp framebuffer. Draw::Framebuffer *temp = GetTempFBO(TempFBO::REINTERPRET, vfb->renderWidth, vfb->renderHeight); + // Ideally on Vulkan this should be using the original framebuffer as an input attachment, allowing it to read from + // itself while writing. draw_->InvalidateCachedState(); draw_->CopyFramebufferImage(vfb->fbo, 0, 0, 0, 0, temp, 0, 0, 0, 0, vfb->renderWidth, vfb->renderHeight, 1, Draw::FBChannel::FB_COLOR_BIT, "reinterpret_prep"); - draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, reinterpretStrings[(int)oldFormat][(int)newFormat]); + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, reinterpretStrings[(int)oldFormat][(int)newFormat]); draw_->BindPipeline(pipeline); draw_->BindFramebufferAsTexture(temp, 0, Draw::FBChannel::FB_COLOR_BIT, 0); draw_->BindSamplerStates(0, 1, &reinterpretSampler_); diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 33d41992f9d9..2c31d5e668fd 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -44,6 +44,7 @@ enum { FB_USAGE_CLUT = 8, FB_USAGE_DOWNLOAD = 16, FB_USAGE_DOWNLOAD_CLEAR = 32, + FB_USAGE_BLUE_TO_ALPHA = 64, }; enum { @@ -100,8 +101,6 @@ struct VirtualFramebuffer { bool dirtyAfterDisplay; bool reallyDirtyAfterDisplay; // takes frame skipping into account - bool blueToAlphaUsed; - int last_frame_used; int last_frame_attached; int last_frame_render; diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 8273ff9cb8e3..b41aa69264ea 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -915,7 +915,7 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer( } // NOTE: This check is okay because the first texture formats are the same as the buffer formats. if (IsTextureFormatBufferCompatible(entry.format)) { - if (TextureFormatMatchesBufferFormat(entry.format, framebuffer->format) || framebuffer->blueToAlphaUsed) { + if (TextureFormatMatchesBufferFormat(entry.format, framebuffer->format) || (framebuffer->usageFlags & FB_USAGE_BLUE_TO_ALPHA)) { return FramebufferMatchInfo{ FramebufferMatch::VALID }; } else if (IsTextureFormat16Bit(entry.format) && IsBufferFormat16Bit(framebuffer->format)) { WARN_LOG_ONCE(diffFormat1, G3D, "Texturing from framebuffer with reinterpretable format: %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format)); diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index fcf97b96cc8e..354c85bd631e 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1637,22 +1637,6 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { // We store it in the cache so it can be modified for blue-to-alpha, next. gstate_c.framebufFormat = gstate.FrameBufFormat(); - // See the documentation for gstate_c.blueToAlpha. - bool blueToAlpha = false; - if (gstate_c.framebufFormat == GEBufferFormat::GE_FORMAT_565 && gstate.getColorMask() == 0x0FFFFF && PSP_CoreParameter().compat.flags().BlueToAlpha) { - blueToAlpha = true; - } - if (blueToAlpha != gstate_c.blueToAlpha) { - gstate_c.blueToAlpha = blueToAlpha; - gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_BLEND_STATE); - } - - // This also makes skipping drawing very effective. - VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); - if (blueToAlpha) { - vfb->blueToAlphaUsed = true; - } - if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { // Rough estimate, not sure what's correct. cyclesExecuted += EstimatePerVertexCost() * count; @@ -1667,6 +1651,24 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { return; } + // See the documentation for gstate_c.blueToAlpha. + bool blueToAlpha = false; + if (PSP_CoreParameter().compat.flags().BlueToAlpha) { + if (gstate_c.framebufFormat == GEBufferFormat::GE_FORMAT_565 && gstate.getColorMask() == 0x0FFFFF) { + blueToAlpha = true; + } + if (blueToAlpha != gstate_c.blueToAlpha) { + gstate_c.blueToAlpha = blueToAlpha; + gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_BLEND_STATE); + } + } + + // This also makes skipping drawing very effective. + VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); + if (blueToAlpha) { + vfb->usageFlags |= FB_USAGE_BLUE_TO_ALPHA; + } + void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr); void *inds = nullptr; u32 vertexType = gstate.vertType; @@ -2997,7 +2999,7 @@ bool GPUCommon::FramebufferReallyDirty() { void GPUCommon::UpdateUVScaleOffset() { #ifdef _M_SSE __m128i values = _mm_slli_epi32(_mm_load_si128((const __m128i *) & gstate.texscaleu), 8); - _mm_storeu_si128((__m128i *) & gstate_c.uv, values); + _mm_storeu_si128((__m128i *)&gstate_c.uv, values); #elif PPSSPP_ARCH(ARM_NEON) const uint32x4_t values = vshlq_n_u32(vld1q_u32((const u32 *)&gstate.texscaleu), 8); vst1q_u32((u32 *)&gstate_c.uv, values);