From cfc353ee81cec2a39508de4a606660f74d006414 Mon Sep 17 00:00:00 2001 From: zhang wei Date: Fri, 27 Apr 2018 16:33:35 +0800 Subject: [PATCH 1/6] handle cull mode by indices, improve performance for GVGNP, refer to #10172 --- GPU/Common/DrawEngineCommon.cpp | 21 +++++++++++++++++---- GPU/Common/DrawEngineCommon.h | 5 +++-- GPU/Common/IndexGenerator.cpp | 16 ++++++++-------- GPU/Common/IndexGenerator.h | 8 ++++---- GPU/GPUCommon.cpp | 19 +++++++++++++++++-- 5 files changed, 49 insertions(+), 20 deletions(-) diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 8903137d0e8e..5eb4c1fdf731 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -544,17 +544,29 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) { switch (dc.indexType) { case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT: for (int j = i; j <= lastMatch; j++) { - indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound); + int cullMode = drawCalls[j].cullMode; + if (cullMode != -1 && gstate.isCullEnabled()) { + cullMode = gstate.getCullMode() == cullMode ? 0 : 1; + } + indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound, cullMode); } break; case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT: for (int j = i; j <= lastMatch; j++) { - indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16_le *)drawCalls[j].inds, indexLowerBound); + int cullMode = drawCalls[j].cullMode; + if (cullMode != -1 && gstate.isCullEnabled()) { + cullMode = gstate.getCullMode() == cullMode ? 0 : 1; + } + indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16_le *)drawCalls[j].inds, indexLowerBound, cullMode); } break; case GE_VTYPE_IDX_32BIT >> GE_VTYPE_IDX_SHIFT: for (int j = i; j <= lastMatch; j++) { - indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u32_le *)drawCalls[j].inds, indexLowerBound); + int cullMode = drawCalls[j].cullMode; + if (cullMode != -1 && gstate.isCullEnabled()) { + cullMode = gstate.getCullMode() == cullMode ? 0 : 1; + } + indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u32_le *)drawCalls[j].inds, indexLowerBound, cullMode); } break; } @@ -659,7 +671,7 @@ ReliableHashType DrawEngineCommon::ComputeHash() { } // vertTypeID is the vertex type but with the UVGen mode smashed into the top bits. -void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) { +void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) { if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) { DispatchFlush(); } @@ -697,6 +709,7 @@ void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, dc.prim = prim; dc.vertexCount = vertexCount; dc.uvScale = gstate_c.uv; + dc.cullMode = cullMode; if (inds) { GetIndexBounds(inds, vertexCount, vertTypeID, &dc.indexLowerBound, &dc.indexUpperBound); diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index 45d9a185abdc..7e7f09443be2 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -67,12 +67,12 @@ class DrawEngineCommon { // is different. Should probably refactor that. // Note that vertTypeID should be computed using GetVertTypeID(). virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) { - SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, bytesRead); + SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, -1, bytesRead); } bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType, int *bytesRead); - void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead); + void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead); void SubmitSpline(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead); void SubmitBezier(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead); @@ -143,6 +143,7 @@ class DrawEngineCommon { u16 indexLowerBound; u16 indexUpperBound; UVScale uvScale; + int cullMode; }; enum { MAX_DEFERRED_DRAW_CALLS = 128 }; diff --git a/GPU/Common/IndexGenerator.cpp b/GPU/Common/IndexGenerator.cpp index 46f076cee173..347d712c8f68 100644 --- a/GPU/Common/IndexGenerator.cpp +++ b/GPU/Common/IndexGenerator.cpp @@ -235,8 +235,8 @@ void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOf } template -void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset) { - int wind = 1; +void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset, int cullMode) { + int wind = cullMode == 1 ? 2 : 1; indexOffset = index_ - indexOffset; int numTris = numInds - 2; u16 *outInds = inds_; @@ -286,37 +286,37 @@ inline void IndexGenerator::TranslateRectangles(int numInds, const ITypeLE *inds } // Could template this too, but would have to define in header. -void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset) { +void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, int cullMode) { switch (prim) { case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset); break; + case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, cullMode); break; case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset); break; case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same } } -void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset) { +void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, int cullMode) { switch (prim) { case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset); break; + case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, cullMode); break; case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset); break; case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same } } -void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset) { +void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, int cullMode) { switch (prim) { case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset); break; + case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, cullMode); break; case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset); break; case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same } diff --git a/GPU/Common/IndexGenerator.h b/GPU/Common/IndexGenerator.h index 23185c7215ec..040c52626c75 100644 --- a/GPU/Common/IndexGenerator.h +++ b/GPU/Common/IndexGenerator.h @@ -50,9 +50,9 @@ class IndexGenerator { GEPrimitiveType Prim() const { return prim_; } void AddPrim(int prim, int vertexCount); - void TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset); - void TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset); - void TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset); + void TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, int cullMode); + void TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, int cullMode); + void TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, int cullMode); void Advance(int numVerts) { index_ += numVerts; @@ -95,7 +95,7 @@ class IndexGenerator { inline void TranslateLineStrip(int numVerts, const ITypeLE *inds, int indexOffset); template - void TranslateStrip(int numVerts, const ITypeLE *inds, int indexOffset); + void TranslateStrip(int numVerts, const ITypeLE *inds, int indexOffset, int cullMode); template void TranslateFan(int numVerts, const ITypeLE *inds, int indexOffset); diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 262ed460a783..6335c931303a 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1530,8 +1530,11 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { int bytesRead = 0; UpdateUVScaleOffset(); + // cull mode + int cullMode = gstate.isCullEnabled() ? gstate.getCullMode() : -1; + uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode()); - drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, &bytesRead); + drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, cullMode, &bytesRead); // After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed). // Some games rely on this, they don't bother reloading VADDR and IADDR. // The VADDR/IADDR registers are NOT updated. @@ -1577,7 +1580,7 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { inds = Memory::GetPointerUnchecked(gstate_c.indexAddr); } - drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, &bytesRead); + drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, cullMode, &bytesRead); AdvanceVerts(vertexType, count, bytesRead); totalVertCount += count; break; @@ -1604,6 +1607,10 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { case GE_CMD_BASE: gstate.cmdmem[GE_CMD_BASE] = data; break; + case GE_CMD_CULL: + // flip face by indices for GE_PRIM_TRIANGLE_STRIP + cullMode = data & 1; + break; case GE_CMD_NOP: case GE_CMD_NOP_FF: break; @@ -1618,6 +1625,14 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { gstate.cmdmem[GE_CMD_TEXSCALEV] = data; gstate_c.uv.vScale = getFloat24(data); break; + case GE_CMD_TEXOFFSETU: + gstate.cmdmem[GE_CMD_TEXOFFSETU] = data; + gstate_c.uv.uOff = getFloat24(data); + break; + case GE_CMD_TEXOFFSETV: + gstate.cmdmem[GE_CMD_TEXOFFSETV] = data; + gstate_c.uv.vOff = getFloat24(data); + break; case GE_CMD_TEXLEVEL: // Same Gran Turismo hack from Execute_TexLevel if ((data & 3) != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & data) != 0) { From 3419197ee9997a22caa67dc11f8bf0ac6d7edf2a Mon Sep 17 00:00:00 2001 From: zhang wei Date: Fri, 27 Apr 2018 21:49:43 +0800 Subject: [PATCH 2/6] flush back cull mode --- GPU/Common/DrawEngineCommon.cpp | 24 ++++++++++++------------ GPU/Common/IndexGenerator.cpp | 16 ++++++++-------- GPU/Common/IndexGenerator.h | 8 ++++---- GPU/GPUCommon.cpp | 6 ++++++ 4 files changed, 30 insertions(+), 24 deletions(-) diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 5eb4c1fdf731..cf3553770d72 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -544,29 +544,29 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) { switch (dc.indexType) { case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT: for (int j = i; j <= lastMatch; j++) { - int cullMode = drawCalls[j].cullMode; - if (cullMode != -1 && gstate.isCullEnabled()) { - cullMode = gstate.getCullMode() == cullMode ? 0 : 1; + bool clockwise = true; + if (drawCalls[j].cullMode != -1 && gstate.isCullEnabled() && gstate.getCullMode() != drawCalls[j].cullMode) { + clockwise = false; } - indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound, cullMode); + indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound, clockwise); } break; case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT: for (int j = i; j <= lastMatch; j++) { - int cullMode = drawCalls[j].cullMode; - if (cullMode != -1 && gstate.isCullEnabled()) { - cullMode = gstate.getCullMode() == cullMode ? 0 : 1; + bool clockwise = true; + if (drawCalls[j].cullMode != -1 && gstate.isCullEnabled() && gstate.getCullMode() != drawCalls[j].cullMode) { + clockwise = false; } - indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16_le *)drawCalls[j].inds, indexLowerBound, cullMode); + indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16_le *)drawCalls[j].inds, indexLowerBound, clockwise); } break; case GE_VTYPE_IDX_32BIT >> GE_VTYPE_IDX_SHIFT: for (int j = i; j <= lastMatch; j++) { - int cullMode = drawCalls[j].cullMode; - if (cullMode != -1 && gstate.isCullEnabled()) { - cullMode = gstate.getCullMode() == cullMode ? 0 : 1; + bool clockwise = true; + if (drawCalls[j].cullMode != -1 && gstate.isCullEnabled() && gstate.getCullMode() != drawCalls[j].cullMode) { + clockwise = false; } - indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u32_le *)drawCalls[j].inds, indexLowerBound, cullMode); + indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u32_le *)drawCalls[j].inds, indexLowerBound, clockwise); } break; } diff --git a/GPU/Common/IndexGenerator.cpp b/GPU/Common/IndexGenerator.cpp index 347d712c8f68..f8de86c9058e 100644 --- a/GPU/Common/IndexGenerator.cpp +++ b/GPU/Common/IndexGenerator.cpp @@ -235,8 +235,8 @@ void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOf } template -void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset, int cullMode) { - int wind = cullMode == 1 ? 2 : 1; +void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) { + int wind = clockwise ? 1 : 2; indexOffset = index_ - indexOffset; int numTris = numInds - 2; u16 *outInds = inds_; @@ -286,37 +286,37 @@ inline void IndexGenerator::TranslateRectangles(int numInds, const ITypeLE *inds } // Could template this too, but would have to define in header. -void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, int cullMode) { +void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, bool clockwise) { switch (prim) { case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, cullMode); break; + case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset); break; case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same } } -void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, int cullMode) { +void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise) { switch (prim) { case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, cullMode); break; + case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset); break; case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same } } -void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, int cullMode) { +void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise) { switch (prim) { case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, cullMode); break; + case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset); break; case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same } diff --git a/GPU/Common/IndexGenerator.h b/GPU/Common/IndexGenerator.h index 040c52626c75..ab33ff4f514a 100644 --- a/GPU/Common/IndexGenerator.h +++ b/GPU/Common/IndexGenerator.h @@ -50,9 +50,9 @@ class IndexGenerator { GEPrimitiveType Prim() const { return prim_; } void AddPrim(int prim, int vertexCount); - void TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, int cullMode); - void TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, int cullMode); - void TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, int cullMode); + void TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, bool clockwise); + void TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise); + void TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise); void Advance(int numVerts) { index_ += numVerts; @@ -95,7 +95,7 @@ class IndexGenerator { inline void TranslateLineStrip(int numVerts, const ITypeLE *inds, int indexOffset); template - void TranslateStrip(int numVerts, const ITypeLE *inds, int indexOffset, int cullMode); + void TranslateStrip(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise); template void TranslateFan(int numVerts, const ITypeLE *inds, int indexOffset); diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 6335c931303a..c2a966cda5de 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1669,6 +1669,12 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { if (cmdCount > 0) { UpdatePC(currentList->pc, currentList->pc + cmdCount * 4); currentList->pc += cmdCount * 4; + // flush back cull mode + if (cullMode != -1 && cullMode != gstate.getCullMode()) { + drawEngineCommon_->DispatchFlush(); + gstate.cmdmem[GE_CMD_CULL] ^= 1; + gstate_c.Dirty(DIRTY_RASTER_STATE); + } } gpuStats.vertexGPUCycles += vertexCost_ * totalVertCount; From 51388ded228a742111f564cde419493a1c64e65e Mon Sep 17 00:00:00 2001 From: zhang wei Date: Sat, 28 Apr 2018 14:59:12 +0800 Subject: [PATCH 3/6] cullmode fallback --- GPU/GPUCommon.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index c2a966cda5de..1ab2977c8086 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1580,6 +1580,13 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { inds = Memory::GetPointerUnchecked(gstate_c.indexAddr); } + if (newPrim != GE_PRIM_TRIANGLE_STRIP && cullMode != -1 && cullMode != gstate.getCullMode()) { + DEBUG_LOG(G3D, "flush cull mode before prim: %d", newPrim); + drawEngineCommon_->DispatchFlush(); + gstate.cmdmem[GE_CMD_CULL] ^= 1; + gstate_c.Dirty(DIRTY_RASTER_STATE); + } + drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, cullMode, &bytesRead); AdvanceVerts(vertexType, count, bytesRead); totalVertCount += count; From d029bbacc7e7758804726a55a0c47f60a31a267d Mon Sep 17 00:00:00 2001 From: weihuoya Date: Sun, 3 Jun 2018 22:29:50 +0800 Subject: [PATCH 4/6] triangle fan --- GPU/Common/DrawEngineCommon.cpp | 7 ++++++- GPU/Common/IndexGenerator.cpp | 28 ++++++++++++++++------------ GPU/Common/IndexGenerator.h | 8 ++++---- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index cf3553770d72..95d6d5ad2eb4 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -520,7 +520,12 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) { dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride, dc.verts, indexLowerBound, indexUpperBound); decodedVerts += indexUpperBound - indexLowerBound + 1; - indexGen.AddPrim(dc.prim, dc.vertexCount); + + bool clockwise = true; + if (dc.cullMode != -1 && gstate.isCullEnabled() && gstate.getCullMode() != dc.cullMode) { + clockwise = false; + } + indexGen.AddPrim(dc.prim, dc.vertexCount, clockwise); } else { // It's fairly common that games issue long sequences of PRIM calls, with differing // inds pointer but the same base vertex pointer. We'd like to reuse vertices between diff --git a/GPU/Common/IndexGenerator.cpp b/GPU/Common/IndexGenerator.cpp index f8de86c9058e..068a5ced710f 100644 --- a/GPU/Common/IndexGenerator.cpp +++ b/GPU/Common/IndexGenerator.cpp @@ -36,14 +36,14 @@ void IndexGenerator::Setup(u16 *inds) { Reset(); } -void IndexGenerator::AddPrim(int prim, int vertexCount) { +void IndexGenerator::AddPrim(int prim, int vertexCount, bool clockwise) { switch (prim) { case GE_PRIM_POINTS: AddPoints(vertexCount); break; case GE_PRIM_LINES: AddLineList(vertexCount); break; case GE_PRIM_LINE_STRIP: AddLineStrip(vertexCount); break; case GE_PRIM_TRIANGLES: AddList(vertexCount); break; - case GE_PRIM_TRIANGLE_STRIP: AddStrip(vertexCount); break; - case GE_PRIM_TRIANGLE_FAN: AddFan(vertexCount); break; + case GE_PRIM_TRIANGLE_STRIP: AddStrip(vertexCount, clockwise); break; + case GE_PRIM_TRIANGLE_FAN: AddFan(vertexCount, clockwise); break; case GE_PRIM_RECTANGLES: AddRectangles(vertexCount); break; // Same } } @@ -77,8 +77,8 @@ void IndexGenerator::AddList(int numVerts) { seenPrims_ |= 1 << GE_PRIM_TRIANGLES; } -void IndexGenerator::AddStrip(int numVerts) { - int wind = 1; +void IndexGenerator::AddStrip(int numVerts, bool clockwise) { + int wind = clockwise ? 1 : 2; const int numTris = numVerts - 2; u16 *outInds = inds_; int ibase = index_; @@ -105,14 +105,16 @@ void IndexGenerator::AddStrip(int numVerts) { } } -void IndexGenerator::AddFan(int numVerts) { +void IndexGenerator::AddFan(int numVerts, bool clockwise) { const int numTris = numVerts - 2; u16 *outInds = inds_; const int startIndex = index_; + const int v1 = clockwise ? 1 : 2; + const int v2 = clockwise ? 2 : 1; for (int i = 0; i < numTris; i++) { *outInds++ = startIndex; - *outInds++ = startIndex + i + 1; - *outInds++ = startIndex + i + 2; + *outInds++ = startIndex + i + v1; + *outInds++ = startIndex + i + v2; } inds_ = outInds; index_ += numVerts; @@ -253,11 +255,13 @@ void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexO } template -void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOffset) { +void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) { if (numInds <= 0) return; indexOffset = index_ - indexOffset; int numTris = numInds - 2; u16 *outInds = inds_; + const int v1 = clockwise ? 1 : 2; + const int v2 = clockwise ? 2 : 1; for (int i = 0; i < numTris; i++) { *outInds++ = indexOffset + inds[0]; *outInds++ = indexOffset + inds[i + 1]; @@ -293,7 +297,7 @@ void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int in case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break; case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; - case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset); break; + case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same } } @@ -305,7 +309,7 @@ void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, in case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break; case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; - case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset); break; + case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same } } @@ -317,7 +321,7 @@ void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, in case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break; case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; - case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset); break; + case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same } } diff --git a/GPU/Common/IndexGenerator.h b/GPU/Common/IndexGenerator.h index ab33ff4f514a..877808700c73 100644 --- a/GPU/Common/IndexGenerator.h +++ b/GPU/Common/IndexGenerator.h @@ -49,7 +49,7 @@ class IndexGenerator { GEPrimitiveType Prim() const { return prim_; } - void AddPrim(int prim, int vertexCount); + void AddPrim(int prim, int vertexCount, bool clockwise = true); void TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, bool clockwise); void TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise); void TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise); @@ -76,8 +76,8 @@ class IndexGenerator { void AddPoints(int numVerts); // Triangles void AddList(int numVerts); - void AddStrip(int numVerts); - void AddFan(int numVerts); + void AddStrip(int numVerts, bool clockwise); + void AddFan(int numVerts, bool clockwise); // Lines void AddLineList(int numVerts); void AddLineStrip(int numVerts); @@ -97,7 +97,7 @@ class IndexGenerator { template void TranslateStrip(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise); template - void TranslateFan(int numVerts, const ITypeLE *inds, int indexOffset); + void TranslateFan(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise); template inline void TranslateRectangles(int numVerts, const ITypeLE *inds, int indexOffset); From 9f6922221d1c9773ec6b6f23d8e007f968e8a4b7 Mon Sep 17 00:00:00 2001 From: weihuoya Date: Sun, 3 Jun 2018 22:50:35 +0800 Subject: [PATCH 5/6] fx --- GPU/Common/IndexGenerator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/Common/IndexGenerator.cpp b/GPU/Common/IndexGenerator.cpp index 068a5ced710f..c481bfe957b2 100644 --- a/GPU/Common/IndexGenerator.cpp +++ b/GPU/Common/IndexGenerator.cpp @@ -264,8 +264,8 @@ void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOff const int v2 = clockwise ? 2 : 1; for (int i = 0; i < numTris; i++) { *outInds++ = indexOffset + inds[0]; - *outInds++ = indexOffset + inds[i + 1]; - *outInds++ = indexOffset + inds[i + 2]; + *outInds++ = indexOffset + inds[i + v1]; + *outInds++ = indexOffset + inds[i + v2]; } inds_ = outInds; count_ += numTris * 3; From f2e34767416be82d1a2bd4f2070817188679c307 Mon Sep 17 00:00:00 2001 From: zhang wei Date: Tue, 5 Jun 2018 10:39:40 +0800 Subject: [PATCH 6/6] triangles --- GPU/Common/IndexGenerator.cpp | 24 ++++++++++++++---------- GPU/Common/IndexGenerator.h | 4 ++-- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/GPU/Common/IndexGenerator.cpp b/GPU/Common/IndexGenerator.cpp index c481bfe957b2..9c1b13d42db8 100644 --- a/GPU/Common/IndexGenerator.cpp +++ b/GPU/Common/IndexGenerator.cpp @@ -41,7 +41,7 @@ void IndexGenerator::AddPrim(int prim, int vertexCount, bool clockwise) { case GE_PRIM_POINTS: AddPoints(vertexCount); break; case GE_PRIM_LINES: AddLineList(vertexCount); break; case GE_PRIM_LINE_STRIP: AddLineStrip(vertexCount); break; - case GE_PRIM_TRIANGLES: AddList(vertexCount); break; + case GE_PRIM_TRIANGLES: AddList(vertexCount, clockwise); break; case GE_PRIM_TRIANGLE_STRIP: AddStrip(vertexCount, clockwise); break; case GE_PRIM_TRIANGLE_FAN: AddFan(vertexCount, clockwise); break; case GE_PRIM_RECTANGLES: AddRectangles(vertexCount); break; // Same @@ -61,13 +61,15 @@ void IndexGenerator::AddPoints(int numVerts) { seenPrims_ |= 1 << GE_PRIM_POINTS; } -void IndexGenerator::AddList(int numVerts) { +void IndexGenerator::AddList(int numVerts, bool clockwise) { u16 *outInds = inds_; const int startIndex = index_; + const int v1 = clockwise ? 1 : 2; + const int v2 = clockwise ? 2 : 1; for (int i = 0; i < numVerts; i += 3) { *outInds++ = startIndex + i; - *outInds++ = startIndex + i + 1; - *outInds++ = startIndex + i + 2; + *outInds++ = startIndex + i + v1; + *outInds++ = startIndex + i + v2; } inds_ = outInds; // ignore overflow verts @@ -212,7 +214,7 @@ void IndexGenerator::TranslateLineStrip(int numInds, const ITypeLE *inds, int in } template -void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOffset) { +void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) { indexOffset = index_ - indexOffset; // We only bother doing this minor optimization in triangle list, since it's by far the most // common operation that can benefit. @@ -224,10 +226,12 @@ void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOf u16 *outInds = inds_; int numTris = numInds / 3; // Round to whole triangles numInds = numTris * 3; + const int v1 = clockwise ? 1 : 2; + const int v2 = clockwise ? 2 : 1; for (int i = 0; i < numInds; i += 3) { *outInds++ = indexOffset + inds[i]; - *outInds++ = indexOffset + inds[i + 1]; - *outInds++ = indexOffset + inds[i + 2]; + *outInds++ = indexOffset + inds[i + v1]; + *outInds++ = indexOffset + inds[i + v2]; } inds_ = outInds; count_ += numInds; @@ -295,7 +299,7 @@ void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int in case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break; + case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same @@ -307,7 +311,7 @@ void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, in case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break; + case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same @@ -319,7 +323,7 @@ void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, in case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break; + case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same diff --git a/GPU/Common/IndexGenerator.h b/GPU/Common/IndexGenerator.h index 877808700c73..6fb24930eef4 100644 --- a/GPU/Common/IndexGenerator.h +++ b/GPU/Common/IndexGenerator.h @@ -75,7 +75,7 @@ class IndexGenerator { // Points (why index these? code simplicity) void AddPoints(int numVerts); // Triangles - void AddList(int numVerts); + void AddList(int numVerts, bool clockwise); void AddStrip(int numVerts, bool clockwise); void AddFan(int numVerts, bool clockwise); // Lines @@ -88,7 +88,7 @@ class IndexGenerator { template void TranslatePoints(int numVerts, const ITypeLE *inds, int indexOffset); template - void TranslateList(int numVerts, const ITypeLE *inds, int indexOffset); + void TranslateList(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise); template inline void TranslateLineList(int numVerts, const ITypeLE *inds, int indexOffset); template