Skip to content

Commit

Permalink
Merge pull request #10973 from weihuoya/cullmode
Browse files Browse the repository at this point in the history
handle cull mode by indices, refer to issue #10172
  • Loading branch information
hrydgard authored Jun 10, 2018
2 parents 6c494c3 + f2e3476 commit e98ea81
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 51 deletions.
28 changes: 23 additions & 5 deletions GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,12 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
decodedVerts += indexUpperBound - indexLowerBound + 1;
indexGen.AddPrim(dc.prim, dc.vertexCount);

bool clockwise = true;
if (dc.cullMode != -1 && gstate.isCullEnabled() && gstate.getCullMode() != dc.cullMode) {
clockwise = false;
}
indexGen.AddPrim(dc.prim, dc.vertexCount, clockwise);
} else {
// It's fairly common that games issue long sequences of PRIM calls, with differing
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
Expand All @@ -544,17 +549,29 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
switch (dc.indexType) {
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound);
bool clockwise = true;
if (drawCalls[j].cullMode != -1 && gstate.isCullEnabled() && gstate.getCullMode() != drawCalls[j].cullMode) {
clockwise = false;
}
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound, clockwise);
}
break;
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16_le *)drawCalls[j].inds, indexLowerBound);
bool clockwise = true;
if (drawCalls[j].cullMode != -1 && gstate.isCullEnabled() && gstate.getCullMode() != drawCalls[j].cullMode) {
clockwise = false;
}
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16_le *)drawCalls[j].inds, indexLowerBound, clockwise);
}
break;
case GE_VTYPE_IDX_32BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u32_le *)drawCalls[j].inds, indexLowerBound);
bool clockwise = true;
if (drawCalls[j].cullMode != -1 && gstate.isCullEnabled() && gstate.getCullMode() != drawCalls[j].cullMode) {
clockwise = false;
}
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u32_le *)drawCalls[j].inds, indexLowerBound, clockwise);
}
break;
}
Expand Down Expand Up @@ -659,7 +676,7 @@ ReliableHashType DrawEngineCommon::ComputeHash() {
}

// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) {
void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
DispatchFlush();
}
Expand Down Expand Up @@ -697,6 +714,7 @@ void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
dc.prim = prim;
dc.vertexCount = vertexCount;
dc.uvScale = gstate_c.uv;
dc.cullMode = cullMode;

if (inds) {
GetIndexBounds(inds, vertexCount, vertTypeID, &dc.indexLowerBound, &dc.indexUpperBound);
Expand Down
5 changes: 3 additions & 2 deletions GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ class DrawEngineCommon {
// is different. Should probably refactor that.
// Note that vertTypeID should be computed using GetVertTypeID().
virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) {
SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, bytesRead);
SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, -1, bytesRead);
}

bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType, int *bytesRead);

void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead);
void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead);
void SubmitSpline(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead);
void SubmitBezier(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead);

Expand Down Expand Up @@ -143,6 +143,7 @@ class DrawEngineCommon {
u16 indexLowerBound;
u16 indexUpperBound;
UVScale uvScale;
int cullMode;
};

enum { MAX_DEFERRED_DRAW_CALLS = 128 };
Expand Down
72 changes: 40 additions & 32 deletions GPU/Common/IndexGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ void IndexGenerator::Setup(u16 *inds) {
Reset();
}

void IndexGenerator::AddPrim(int prim, int vertexCount) {
void IndexGenerator::AddPrim(int prim, int vertexCount, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: AddPoints(vertexCount); break;
case GE_PRIM_LINES: AddLineList(vertexCount); break;
case GE_PRIM_LINE_STRIP: AddLineStrip(vertexCount); break;
case GE_PRIM_TRIANGLES: AddList(vertexCount); break;
case GE_PRIM_TRIANGLE_STRIP: AddStrip(vertexCount); break;
case GE_PRIM_TRIANGLE_FAN: AddFan(vertexCount); break;
case GE_PRIM_TRIANGLES: AddList(vertexCount, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: AddStrip(vertexCount, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: AddFan(vertexCount, clockwise); break;
case GE_PRIM_RECTANGLES: AddRectangles(vertexCount); break; // Same
}
}
Expand All @@ -61,13 +61,15 @@ void IndexGenerator::AddPoints(int numVerts) {
seenPrims_ |= 1 << GE_PRIM_POINTS;
}

void IndexGenerator::AddList(int numVerts) {
void IndexGenerator::AddList(int numVerts, bool clockwise) {
u16 *outInds = inds_;
const int startIndex = index_;
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
for (int i = 0; i < numVerts; i += 3) {
*outInds++ = startIndex + i;
*outInds++ = startIndex + i + 1;
*outInds++ = startIndex + i + 2;
*outInds++ = startIndex + i + v1;
*outInds++ = startIndex + i + v2;
}
inds_ = outInds;
// ignore overflow verts
Expand All @@ -77,8 +79,8 @@ void IndexGenerator::AddList(int numVerts) {
seenPrims_ |= 1 << GE_PRIM_TRIANGLES;
}

void IndexGenerator::AddStrip(int numVerts) {
int wind = 1;
void IndexGenerator::AddStrip(int numVerts, bool clockwise) {
int wind = clockwise ? 1 : 2;
const int numTris = numVerts - 2;
u16 *outInds = inds_;
int ibase = index_;
Expand All @@ -105,14 +107,16 @@ void IndexGenerator::AddStrip(int numVerts) {
}
}

void IndexGenerator::AddFan(int numVerts) {
void IndexGenerator::AddFan(int numVerts, bool clockwise) {
const int numTris = numVerts - 2;
u16 *outInds = inds_;
const int startIndex = index_;
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
for (int i = 0; i < numTris; i++) {
*outInds++ = startIndex;
*outInds++ = startIndex + i + 1;
*outInds++ = startIndex + i + 2;
*outInds++ = startIndex + i + v1;
*outInds++ = startIndex + i + v2;
}
inds_ = outInds;
index_ += numVerts;
Expand Down Expand Up @@ -210,7 +214,7 @@ void IndexGenerator::TranslateLineStrip(int numInds, const ITypeLE *inds, int in
}

template <class ITypeLE, int flag>
void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOffset) {
void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
indexOffset = index_ - indexOffset;
// We only bother doing this minor optimization in triangle list, since it's by far the most
// common operation that can benefit.
Expand All @@ -222,10 +226,12 @@ void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOf
u16 *outInds = inds_;
int numTris = numInds / 3; // Round to whole triangles
numInds = numTris * 3;
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
for (int i = 0; i < numInds; i += 3) {
*outInds++ = indexOffset + inds[i];
*outInds++ = indexOffset + inds[i + 1];
*outInds++ = indexOffset + inds[i + 2];
*outInds++ = indexOffset + inds[i + v1];
*outInds++ = indexOffset + inds[i + v2];
}
inds_ = outInds;
count_ += numInds;
Expand All @@ -235,8 +241,8 @@ void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOf
}

template <class ITypeLE, int flag>
void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset) {
int wind = 1;
void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
int wind = clockwise ? 1 : 2;
indexOffset = index_ - indexOffset;
int numTris = numInds - 2;
u16 *outInds = inds_;
Expand All @@ -253,15 +259,17 @@ void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexO
}

template <class ITypeLE, int flag>
void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOffset) {
void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
if (numInds <= 0) return;
indexOffset = index_ - indexOffset;
int numTris = numInds - 2;
u16 *outInds = inds_;
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
for (int i = 0; i < numTris; i++) {
*outInds++ = indexOffset + inds[0];
*outInds++ = indexOffset + inds[i + 1];
*outInds++ = indexOffset + inds[i + 2];
*outInds++ = indexOffset + inds[i + v1];
*outInds++ = indexOffset + inds[i + v2];
}
inds_ = outInds;
count_ += numTris * 3;
Expand All @@ -286,38 +294,38 @@ inline void IndexGenerator::TranslateRectangles(int numInds, const ITypeLE *inds
}

// Could template this too, but would have to define in header.
void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset) {
void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u8, SEEN_INDEX8>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u8, SEEN_INDEX8>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u8, SEEN_INDEX8>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break; // Same
}
}

void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset) {
void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break; // Same
}
}

void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset) {
void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break; // Same
}
}
20 changes: 10 additions & 10 deletions GPU/Common/IndexGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ class IndexGenerator {

GEPrimitiveType Prim() const { return prim_; }

void AddPrim(int prim, int vertexCount);
void TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset);
void TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset);
void TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset);
void AddPrim(int prim, int vertexCount, bool clockwise = true);
void TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, bool clockwise);
void TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise);
void TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise);

void Advance(int numVerts) {
index_ += numVerts;
Expand All @@ -75,9 +75,9 @@ class IndexGenerator {
// Points (why index these? code simplicity)
void AddPoints(int numVerts);
// Triangles
void AddList(int numVerts);
void AddStrip(int numVerts);
void AddFan(int numVerts);
void AddList(int numVerts, bool clockwise);
void AddStrip(int numVerts, bool clockwise);
void AddFan(int numVerts, bool clockwise);
// Lines
void AddLineList(int numVerts);
void AddLineStrip(int numVerts);
Expand All @@ -88,16 +88,16 @@ class IndexGenerator {
template <class ITypeLE, int flag>
void TranslatePoints(int numVerts, const ITypeLE *inds, int indexOffset);
template <class ITypeLE, int flag>
void TranslateList(int numVerts, const ITypeLE *inds, int indexOffset);
void TranslateList(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise);
template <class ITypeLE, int flag>
inline void TranslateLineList(int numVerts, const ITypeLE *inds, int indexOffset);
template <class ITypeLE, int flag>
inline void TranslateLineStrip(int numVerts, const ITypeLE *inds, int indexOffset);

template <class ITypeLE, int flag>
void TranslateStrip(int numVerts, const ITypeLE *inds, int indexOffset);
void TranslateStrip(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise);
template <class ITypeLE, int flag>
void TranslateFan(int numVerts, const ITypeLE *inds, int indexOffset);
void TranslateFan(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise);

template <class ITypeLE, int flag>
inline void TranslateRectangles(int numVerts, const ITypeLE *inds, int indexOffset);
Expand Down
32 changes: 30 additions & 2 deletions GPU/GPUCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1531,8 +1531,11 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
int bytesRead = 0;
UpdateUVScaleOffset();

// cull mode
int cullMode = gstate.isCullEnabled() ? gstate.getCullMode() : -1;

uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode());
drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, &bytesRead);
drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, cullMode, &bytesRead);
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// The VADDR/IADDR registers are NOT updated.
Expand Down Expand Up @@ -1578,7 +1581,14 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}

drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, &bytesRead);
if (newPrim != GE_PRIM_TRIANGLE_STRIP && cullMode != -1 && cullMode != gstate.getCullMode()) {
DEBUG_LOG(G3D, "flush cull mode before prim: %d", newPrim);
drawEngineCommon_->DispatchFlush();
gstate.cmdmem[GE_CMD_CULL] ^= 1;
gstate_c.Dirty(DIRTY_RASTER_STATE);
}

drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, cullMode, &bytesRead);
AdvanceVerts(vertexType, count, bytesRead);
totalVertCount += count;
break;
Expand All @@ -1605,6 +1615,10 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
case GE_CMD_BASE:
gstate.cmdmem[GE_CMD_BASE] = data;
break;
case GE_CMD_CULL:
// flip face by indices for GE_PRIM_TRIANGLE_STRIP
cullMode = data & 1;
break;
case GE_CMD_NOP:
case GE_CMD_NOP_FF:
break;
Expand All @@ -1619,6 +1633,14 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
gstate.cmdmem[GE_CMD_TEXSCALEV] = data;
gstate_c.uv.vScale = getFloat24(data);
break;
case GE_CMD_TEXOFFSETU:
gstate.cmdmem[GE_CMD_TEXOFFSETU] = data;
gstate_c.uv.uOff = getFloat24(data);
break;
case GE_CMD_TEXOFFSETV:
gstate.cmdmem[GE_CMD_TEXOFFSETV] = data;
gstate_c.uv.vOff = getFloat24(data);
break;
case GE_CMD_TEXLEVEL:
// Same Gran Turismo hack from Execute_TexLevel
if ((data & 3) != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & data) != 0) {
Expand Down Expand Up @@ -1655,6 +1677,12 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
if (cmdCount > 0) {
UpdatePC(currentList->pc, currentList->pc + cmdCount * 4);
currentList->pc += cmdCount * 4;
// flush back cull mode
if (cullMode != -1 && cullMode != gstate.getCullMode()) {
drawEngineCommon_->DispatchFlush();
gstate.cmdmem[GE_CMD_CULL] ^= 1;
gstate_c.Dirty(DIRTY_RASTER_STATE);
}
}

gpuStats.vertexGPUCycles += vertexCost_ * totalVertCount;
Expand Down

0 comments on commit e98ea81

Please sign in to comment.