From ee0bc6c04d01017b8064a1f2b102f7b351676ea1 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 18 Sep 2016 19:40:44 -0700 Subject: [PATCH 1/3] Clear memory when clearing drawing. This should help synchronize block transfers better. Should improve #8973. --- GPU/Common/DrawEngineCommon.cpp | 69 +++++++++++++++++++++++++++++++-- GPU/Common/DrawEngineCommon.h | 2 + GPU/Directx9/DrawEngineDX9.cpp | 6 +++ GPU/GLES/DrawEngineGLES.cpp | 6 +++ GPU/Vulkan/DrawEngineVulkan.cpp | 6 +++ 5 files changed, 85 insertions(+), 4 deletions(-) diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 15eefcf1b6db..5fe97f1faa32 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -15,16 +15,16 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. +#include + +#include "Common/ColorConv.h" +#include "Core/Config.h" #include "GPU/Common/DrawEngineCommon.h" #include "GPU/Common/SplineCommon.h" #include "GPU/Common/VertexDecoderCommon.h" #include "GPU/ge_constants.h" #include "GPU/GPUState.h" -#include "Core/Config.h" - -#include - #define QUAD_INDICES_MAX 65536 DrawEngineCommon::DrawEngineCommon() : dec_(nullptr) { @@ -116,6 +116,67 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, return DrawEngineCommon::NormalizeVertices(outPtr, bufPtr, inPtr, dec, lowerBound, upperBound, vertType); } +void DrawEngineCommon::ApplyClearToMemory(int x1, int y1, int x2, int y2, u32 clearColor) { + u8 *addr = Memory::GetPointer(gstate.getFrameBufAddress()); + const bool singleByteClear = (clearColor >> 16) == (clearColor & 0xFFFF) && (clearColor >> 24) == (clearColor & 0xFF); + const int bpp = gstate.FrameBufFormat() == GE_FORMAT_8888 ? 4 : 2; + const int stride = gstate.FrameBufStride(); + const int width = x2 - x1; + + // Simple, but often alpha is different and gums up the works. + if (singleByteClear) { + const int byteStride = stride * bpp; + const int byteWidth = width * bpp; + addr += x1 * bpp; + for (int y = y1; y < y2; ++y) { + memset(addr + y * byteStride, clearColor, byteWidth); + } + } else { + u16 clear16 = 0; + switch (gstate.FrameBufFormat()) { + case GE_FORMAT_565: ConvertRGBA8888ToRGB565(&clear16, &clearColor, 1); break; + case GE_FORMAT_5551: ConvertRGBA8888ToRGBA5551(&clear16, &clearColor, 1); break; + case GE_FORMAT_4444: ConvertRGBA8888ToRGBA4444(&clear16, &clearColor, 1); break; + } + + // This will most often be true - rarely is the width not aligned. + if ((width & 3) == 0 && (x1 & 3) == 0) { + u64 val64 = clearColor | ((u64)clearColor << 32); + int xstride = 2; + if (bpp == 2) { + // Spread to all eight bytes. + u64 c2 = clear16 | (clear16 << 16); + val64 = c2 | (c2 << 32); + xstride = 4; + } + + u64 *addr64 = (u64 *)addr; + const int stride64 = stride / xstride; + const int x1_64 = x1 / xstride; + const int x2_64 = x2 / xstride; + for (int y = y1; y < y2; ++y) { + for (int x = x1_64; x < x2_64; ++x) { + addr64[y * stride64 + x] = val64; + } + } + } else if (bpp == 4) { + u32 *addr32 = (u32 *)addr; + for (int y = y1; y < y2; ++y) { + for (int x = x1; x < x2; ++x) { + addr32[y * stride + x] = clearColor; + } + } + } else if (bpp == 2) { + u16 *addr16 = (u16 *)addr; + for (int y = y1; y < y2; ++y) { + for (int x = x1; x < x2; ++x) { + addr16[y * stride + x] = clear16; + } + } + } + } +} + // This code is HIGHLY unoptimized! // // It does the simplest and safest test possible: If all points of a bbox is outside a single of diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index e566e4226c1c..0c42ddec3396 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -60,6 +60,8 @@ class DrawEngineCommon { // Preprocessing for spline/bezier u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType); + void ApplyClearToMemory(int x1, int y1, int x2, int y2, u32 clearColor); + VertexDecoder *GetVertexDecoder(u32 vtype); inline int IndexSize(u32 vtype) const { diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index 03459e3aa0d6..ae8bfe0dfa15 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -876,9 +876,15 @@ void DrawEngineDX9::DoFlush() { dxstate.colorMask.set((mask & D3DCLEAR_TARGET) != 0, (mask & D3DCLEAR_TARGET) != 0, (mask & D3DCLEAR_TARGET) != 0, (mask & D3DCLEAR_STENCIL) != 0); pD3Ddevice->Clear(0, NULL, mask, clearColor, clearDepth, clearColor >> 24); + int scissorX1 = gstate.getScissorX1(); + int scissorY1 = gstate.getScissorY1(); int scissorX2 = gstate.getScissorX2() + 1; int scissorY2 = gstate.getScissorY2() + 1; framebufferManager_->SetSafeSize(scissorX2, scissorY2); + + if (g_Config.bBlockTransferGPU && gstate.isClearModeColorMask() && gstate.isClearModeAlphaMask()) { + ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, clearColor); + } } } diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index c77d945bce09..8ee1e5084ccf 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -987,9 +987,15 @@ void DrawEngineGLES::DoFlush() { glClear(target); framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason); + int scissorX1 = gstate.getScissorX1(); + int scissorY1 = gstate.getScissorY1(); int scissorX2 = gstate.getScissorX2() + 1; int scissorY2 = gstate.getScissorY2() + 1; framebufferManager_->SetSafeSize(scissorX2, scissorY2); + + if (g_Config.bBlockTransferGPU && colorMask && alphaMask) { + ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, clearColor); + } } } diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index bf2d175c4ab4..7b37a187f6b2 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -817,9 +817,15 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { // We let the framebuffer manager handle the clear. It can use renderpasses to optimize on tilers. framebufferManager_->NotifyClear(gstate.isClearModeColorMask(), gstate.isClearModeAlphaMask(), gstate.isClearModeDepthMask(), result.color, result.depth); + int scissorX1 = gstate.getScissorX1(); + int scissorY1 = gstate.getScissorY1(); int scissorX2 = gstate.getScissorX2() + 1; int scissorY2 = gstate.getScissorY2() + 1; framebufferManager_->SetSafeSize(scissorX2, scissorY2); + + if (g_Config.bBlockTransferGPU && gstate.isClearModeColorMask() && gstate.isClearModeAlphaMask()) { + ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, result.color); + } } } From f5f9b5227dc1d9b54f6d3e5ea271eb9b6eae6161 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 18 Sep 2016 19:48:21 -0700 Subject: [PATCH 2/3] Tweak 565 buffer clearing. --- GPU/Directx9/DrawEngineDX9.cpp | 2 +- GPU/GLES/DrawEngineGLES.cpp | 2 +- GPU/Vulkan/DrawEngineVulkan.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index ae8bfe0dfa15..3ef045e5f584 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -882,7 +882,7 @@ void DrawEngineDX9::DoFlush() { int scissorY2 = gstate.getScissorY2() + 1; framebufferManager_->SetSafeSize(scissorX2, scissorY2); - if (g_Config.bBlockTransferGPU && gstate.isClearModeColorMask() && gstate.isClearModeAlphaMask()) { + if (g_Config.bBlockTransferGPU && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate.FrameBufFormat() == GE_FORMAT_565)) { ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, clearColor); } } diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index 8ee1e5084ccf..1c05b253aa0d 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -993,7 +993,7 @@ void DrawEngineGLES::DoFlush() { int scissorY2 = gstate.getScissorY2() + 1; framebufferManager_->SetSafeSize(scissorX2, scissorY2); - if (g_Config.bBlockTransferGPU && colorMask && alphaMask) { + if (g_Config.bBlockTransferGPU && colorMask && (alphaMask || gstate.FrameBufFormat() == GE_FORMAT_565)) { ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, clearColor); } } diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 7b37a187f6b2..d2ca8b65eef0 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -823,7 +823,7 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { int scissorY2 = gstate.getScissorY2() + 1; framebufferManager_->SetSafeSize(scissorX2, scissorY2); - if (g_Config.bBlockTransferGPU && gstate.isClearModeColorMask() && gstate.isClearModeAlphaMask()) { + if (g_Config.bBlockTransferGPU && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate.FrameBufFormat() == GE_FORMAT_565)) { ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, result.color); } } From 8c9ab09fe23a8445e5c11acc3d3458dbaceb92e3 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 18 Sep 2016 20:18:55 -0700 Subject: [PATCH 3/3] Download 154000 each frame for Katamari. It textures from it in a weird way, maybe we can find a better fix. --- GPU/Common/FramebufferCommon.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index 2f2bbb7f9851..ae7f6e70a682 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -129,6 +129,8 @@ void FramebufferManagerCommon::Init() { // The game draws solid colors to a small framebuffer, and then reads this directly in VRAM. // We force this framebuffer to 1x and force download it automatically. hackForce04154000Download_ = gameId == "NPJH50631" || gameId == "NPJH50372" || gameId == "NPJH90164" || gameId == "NPJH50515"; + // Let's also apply to Me & My Katamari. + hackForce04154000Download_ = hackForce04154000Download_ || gameId == "ULUS10094" || gameId == "ULES00339" || gameId == "ULJS00033" || gameId == "UCKS45022" || gameId == "ULJS19009" || gameId == "NPJH50141"; // And an initial clear. We don't clear per frame as the games are supposed to handle that // by themselves. @@ -1032,4 +1034,4 @@ void FramebufferManagerCommon::ShowScreenResolution() { messageStream << PSP_CoreParameter().pixelWidth << "x" << PSP_CoreParameter().pixelHeight; host->NotifyUserMessage(messageStream.str(), 2.0f, 0xFFFFFF, "resize"); -} \ No newline at end of file +}