diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 15eefcf1b6db..5fe97f1faa32 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -15,16 +15,16 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. +#include + +#include "Common/ColorConv.h" +#include "Core/Config.h" #include "GPU/Common/DrawEngineCommon.h" #include "GPU/Common/SplineCommon.h" #include "GPU/Common/VertexDecoderCommon.h" #include "GPU/ge_constants.h" #include "GPU/GPUState.h" -#include "Core/Config.h" - -#include - #define QUAD_INDICES_MAX 65536 DrawEngineCommon::DrawEngineCommon() : dec_(nullptr) { @@ -116,6 +116,67 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, return DrawEngineCommon::NormalizeVertices(outPtr, bufPtr, inPtr, dec, lowerBound, upperBound, vertType); } +void DrawEngineCommon::ApplyClearToMemory(int x1, int y1, int x2, int y2, u32 clearColor) { + u8 *addr = Memory::GetPointer(gstate.getFrameBufAddress()); + const bool singleByteClear = (clearColor >> 16) == (clearColor & 0xFFFF) && (clearColor >> 24) == (clearColor & 0xFF); + const int bpp = gstate.FrameBufFormat() == GE_FORMAT_8888 ? 4 : 2; + const int stride = gstate.FrameBufStride(); + const int width = x2 - x1; + + // Simple, but often alpha is different and gums up the works. + if (singleByteClear) { + const int byteStride = stride * bpp; + const int byteWidth = width * bpp; + addr += x1 * bpp; + for (int y = y1; y < y2; ++y) { + memset(addr + y * byteStride, clearColor, byteWidth); + } + } else { + u16 clear16 = 0; + switch (gstate.FrameBufFormat()) { + case GE_FORMAT_565: ConvertRGBA8888ToRGB565(&clear16, &clearColor, 1); break; + case GE_FORMAT_5551: ConvertRGBA8888ToRGBA5551(&clear16, &clearColor, 1); break; + case GE_FORMAT_4444: ConvertRGBA8888ToRGBA4444(&clear16, &clearColor, 1); break; + } + + // This will most often be true - rarely is the width not aligned. + if ((width & 3) == 0 && (x1 & 3) == 0) { + u64 val64 = clearColor | ((u64)clearColor << 32); + int xstride = 2; + if (bpp == 2) { + // Spread to all eight bytes. + u64 c2 = clear16 | (clear16 << 16); + val64 = c2 | (c2 << 32); + xstride = 4; + } + + u64 *addr64 = (u64 *)addr; + const int stride64 = stride / xstride; + const int x1_64 = x1 / xstride; + const int x2_64 = x2 / xstride; + for (int y = y1; y < y2; ++y) { + for (int x = x1_64; x < x2_64; ++x) { + addr64[y * stride64 + x] = val64; + } + } + } else if (bpp == 4) { + u32 *addr32 = (u32 *)addr; + for (int y = y1; y < y2; ++y) { + for (int x = x1; x < x2; ++x) { + addr32[y * stride + x] = clearColor; + } + } + } else if (bpp == 2) { + u16 *addr16 = (u16 *)addr; + for (int y = y1; y < y2; ++y) { + for (int x = x1; x < x2; ++x) { + addr16[y * stride + x] = clear16; + } + } + } + } +} + // This code is HIGHLY unoptimized! // // It does the simplest and safest test possible: If all points of a bbox is outside a single of diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index e566e4226c1c..0c42ddec3396 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -60,6 +60,8 @@ class DrawEngineCommon { // Preprocessing for spline/bezier u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType); + void ApplyClearToMemory(int x1, int y1, int x2, int y2, u32 clearColor); + VertexDecoder *GetVertexDecoder(u32 vtype); inline int IndexSize(u32 vtype) const { diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index 03459e3aa0d6..ae8bfe0dfa15 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -876,9 +876,15 @@ void DrawEngineDX9::DoFlush() { dxstate.colorMask.set((mask & D3DCLEAR_TARGET) != 0, (mask & D3DCLEAR_TARGET) != 0, (mask & D3DCLEAR_TARGET) != 0, (mask & D3DCLEAR_STENCIL) != 0); pD3Ddevice->Clear(0, NULL, mask, clearColor, clearDepth, clearColor >> 24); + int scissorX1 = gstate.getScissorX1(); + int scissorY1 = gstate.getScissorY1(); int scissorX2 = gstate.getScissorX2() + 1; int scissorY2 = gstate.getScissorY2() + 1; framebufferManager_->SetSafeSize(scissorX2, scissorY2); + + if (g_Config.bBlockTransferGPU && gstate.isClearModeColorMask() && gstate.isClearModeAlphaMask()) { + ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, clearColor); + } } } diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index c77d945bce09..8ee1e5084ccf 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -987,9 +987,15 @@ void DrawEngineGLES::DoFlush() { glClear(target); framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason); + int scissorX1 = gstate.getScissorX1(); + int scissorY1 = gstate.getScissorY1(); int scissorX2 = gstate.getScissorX2() + 1; int scissorY2 = gstate.getScissorY2() + 1; framebufferManager_->SetSafeSize(scissorX2, scissorY2); + + if (g_Config.bBlockTransferGPU && colorMask && alphaMask) { + ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, clearColor); + } } } diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index bf2d175c4ab4..7b37a187f6b2 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -817,9 +817,15 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { // We let the framebuffer manager handle the clear. It can use renderpasses to optimize on tilers. framebufferManager_->NotifyClear(gstate.isClearModeColorMask(), gstate.isClearModeAlphaMask(), gstate.isClearModeDepthMask(), result.color, result.depth); + int scissorX1 = gstate.getScissorX1(); + int scissorY1 = gstate.getScissorY1(); int scissorX2 = gstate.getScissorX2() + 1; int scissorY2 = gstate.getScissorY2() + 1; framebufferManager_->SetSafeSize(scissorX2, scissorY2); + + if (g_Config.bBlockTransferGPU && gstate.isClearModeColorMask() && gstate.isClearModeAlphaMask()) { + ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, result.color); + } } }