Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

handle cull mode by indices, refer to issue #10172 #10973

Merged
merged 6 commits into from
Jun 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,12 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
decodedVerts += indexUpperBound - indexLowerBound + 1;
indexGen.AddPrim(dc.prim, dc.vertexCount);

bool clockwise = true;
if (dc.cullMode != -1 && gstate.isCullEnabled() && gstate.getCullMode() != dc.cullMode) {
clockwise = false;
}
indexGen.AddPrim(dc.prim, dc.vertexCount, clockwise);
} else {
// It's fairly common that games issue long sequences of PRIM calls, with differing
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
Expand All @@ -544,17 +549,29 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
switch (dc.indexType) {
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound);
bool clockwise = true;
if (drawCalls[j].cullMode != -1 && gstate.isCullEnabled() && gstate.getCullMode() != drawCalls[j].cullMode) {
clockwise = false;
}
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound, clockwise);
}
break;
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16_le *)drawCalls[j].inds, indexLowerBound);
bool clockwise = true;
if (drawCalls[j].cullMode != -1 && gstate.isCullEnabled() && gstate.getCullMode() != drawCalls[j].cullMode) {
clockwise = false;
}
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16_le *)drawCalls[j].inds, indexLowerBound, clockwise);
}
break;
case GE_VTYPE_IDX_32BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u32_le *)drawCalls[j].inds, indexLowerBound);
bool clockwise = true;
if (drawCalls[j].cullMode != -1 && gstate.isCullEnabled() && gstate.getCullMode() != drawCalls[j].cullMode) {
clockwise = false;
}
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u32_le *)drawCalls[j].inds, indexLowerBound, clockwise);
}
break;
}
Expand Down Expand Up @@ -659,7 +676,7 @@ ReliableHashType DrawEngineCommon::ComputeHash() {
}

// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) {
void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
DispatchFlush();
}
Expand Down Expand Up @@ -697,6 +714,7 @@ void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
dc.prim = prim;
dc.vertexCount = vertexCount;
dc.uvScale = gstate_c.uv;
dc.cullMode = cullMode;

if (inds) {
GetIndexBounds(inds, vertexCount, vertTypeID, &dc.indexLowerBound, &dc.indexUpperBound);
Expand Down
5 changes: 3 additions & 2 deletions GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ class DrawEngineCommon {
// is different. Should probably refactor that.
// Note that vertTypeID should be computed using GetVertTypeID().
virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) {
SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, bytesRead);
SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, -1, bytesRead);
}

bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType, int *bytesRead);

void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead);
void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead);
void SubmitSpline(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead);
void SubmitBezier(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead);

Expand Down Expand Up @@ -143,6 +143,7 @@ class DrawEngineCommon {
u16 indexLowerBound;
u16 indexUpperBound;
UVScale uvScale;
int cullMode;
};

enum { MAX_DEFERRED_DRAW_CALLS = 128 };
Expand Down
72 changes: 40 additions & 32 deletions GPU/Common/IndexGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ void IndexGenerator::Setup(u16 *inds) {
Reset();
}

void IndexGenerator::AddPrim(int prim, int vertexCount) {
void IndexGenerator::AddPrim(int prim, int vertexCount, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: AddPoints(vertexCount); break;
case GE_PRIM_LINES: AddLineList(vertexCount); break;
case GE_PRIM_LINE_STRIP: AddLineStrip(vertexCount); break;
case GE_PRIM_TRIANGLES: AddList(vertexCount); break;
case GE_PRIM_TRIANGLE_STRIP: AddStrip(vertexCount); break;
case GE_PRIM_TRIANGLE_FAN: AddFan(vertexCount); break;
case GE_PRIM_TRIANGLES: AddList(vertexCount, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: AddStrip(vertexCount, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: AddFan(vertexCount, clockwise); break;
case GE_PRIM_RECTANGLES: AddRectangles(vertexCount); break; // Same
}
}
Expand All @@ -61,13 +61,15 @@ void IndexGenerator::AddPoints(int numVerts) {
seenPrims_ |= 1 << GE_PRIM_POINTS;
}

void IndexGenerator::AddList(int numVerts) {
void IndexGenerator::AddList(int numVerts, bool clockwise) {
u16 *outInds = inds_;
const int startIndex = index_;
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
for (int i = 0; i < numVerts; i += 3) {
*outInds++ = startIndex + i;
*outInds++ = startIndex + i + 1;
*outInds++ = startIndex + i + 2;
*outInds++ = startIndex + i + v1;
*outInds++ = startIndex + i + v2;
}
inds_ = outInds;
// ignore overflow verts
Expand All @@ -77,8 +79,8 @@ void IndexGenerator::AddList(int numVerts) {
seenPrims_ |= 1 << GE_PRIM_TRIANGLES;
}

void IndexGenerator::AddStrip(int numVerts) {
int wind = 1;
void IndexGenerator::AddStrip(int numVerts, bool clockwise) {
int wind = clockwise ? 1 : 2;
const int numTris = numVerts - 2;
u16 *outInds = inds_;
int ibase = index_;
Expand All @@ -105,14 +107,16 @@ void IndexGenerator::AddStrip(int numVerts) {
}
}

void IndexGenerator::AddFan(int numVerts) {
void IndexGenerator::AddFan(int numVerts, bool clockwise) {
const int numTris = numVerts - 2;
u16 *outInds = inds_;
const int startIndex = index_;
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
for (int i = 0; i < numTris; i++) {
*outInds++ = startIndex;
*outInds++ = startIndex + i + 1;
*outInds++ = startIndex + i + 2;
*outInds++ = startIndex + i + v1;
*outInds++ = startIndex + i + v2;
}
inds_ = outInds;
index_ += numVerts;
Expand Down Expand Up @@ -210,7 +214,7 @@ void IndexGenerator::TranslateLineStrip(int numInds, const ITypeLE *inds, int in
}

template <class ITypeLE, int flag>
void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOffset) {
void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
indexOffset = index_ - indexOffset;
// We only bother doing this minor optimization in triangle list, since it's by far the most
// common operation that can benefit.
Expand All @@ -222,10 +226,12 @@ void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOf
u16 *outInds = inds_;
int numTris = numInds / 3; // Round to whole triangles
numInds = numTris * 3;
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
for (int i = 0; i < numInds; i += 3) {
*outInds++ = indexOffset + inds[i];
*outInds++ = indexOffset + inds[i + 1];
*outInds++ = indexOffset + inds[i + 2];
*outInds++ = indexOffset + inds[i + v1];
*outInds++ = indexOffset + inds[i + v2];
}
inds_ = outInds;
count_ += numInds;
Expand All @@ -235,8 +241,8 @@ void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOf
}

template <class ITypeLE, int flag>
void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset) {
int wind = 1;
void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
int wind = clockwise ? 1 : 2;
indexOffset = index_ - indexOffset;
int numTris = numInds - 2;
u16 *outInds = inds_;
Expand All @@ -253,15 +259,17 @@ void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexO
}

template <class ITypeLE, int flag>
void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOffset) {
void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
if (numInds <= 0) return;
indexOffset = index_ - indexOffset;
int numTris = numInds - 2;
u16 *outInds = inds_;
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't use these below?

-[Unknown]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will fx it now.:flushed:

for (int i = 0; i < numTris; i++) {
*outInds++ = indexOffset + inds[0];
*outInds++ = indexOffset + inds[i + 1];
*outInds++ = indexOffset + inds[i + 2];
*outInds++ = indexOffset + inds[i + v1];
*outInds++ = indexOffset + inds[i + v2];
}
inds_ = outInds;
count_ += numTris * 3;
Expand All @@ -286,38 +294,38 @@ inline void IndexGenerator::TranslateRectangles(int numInds, const ITypeLE *inds
}

// Could template this too, but would have to define in header.
void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset) {
void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u8, SEEN_INDEX8>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u8, SEEN_INDEX8>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u8, SEEN_INDEX8>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break; // Same
}
}

void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset) {
void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break; // Same
}
}

void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset) {
void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break; // Same
}
}
20 changes: 10 additions & 10 deletions GPU/Common/IndexGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ class IndexGenerator {

GEPrimitiveType Prim() const { return prim_; }

void AddPrim(int prim, int vertexCount);
void TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset);
void TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset);
void TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset);
void AddPrim(int prim, int vertexCount, bool clockwise = true);
void TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, bool clockwise);
void TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise);
void TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise);

void Advance(int numVerts) {
index_ += numVerts;
Expand All @@ -75,9 +75,9 @@ class IndexGenerator {
// Points (why index these? code simplicity)
void AddPoints(int numVerts);
// Triangles
void AddList(int numVerts);
void AddStrip(int numVerts);
void AddFan(int numVerts);
void AddList(int numVerts, bool clockwise);
void AddStrip(int numVerts, bool clockwise);
void AddFan(int numVerts, bool clockwise);
// Lines
void AddLineList(int numVerts);
void AddLineStrip(int numVerts);
Expand All @@ -88,16 +88,16 @@ class IndexGenerator {
template <class ITypeLE, int flag>
void TranslatePoints(int numVerts, const ITypeLE *inds, int indexOffset);
template <class ITypeLE, int flag>
void TranslateList(int numVerts, const ITypeLE *inds, int indexOffset);
void TranslateList(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise);
template <class ITypeLE, int flag>
inline void TranslateLineList(int numVerts, const ITypeLE *inds, int indexOffset);
template <class ITypeLE, int flag>
inline void TranslateLineStrip(int numVerts, const ITypeLE *inds, int indexOffset);

template <class ITypeLE, int flag>
void TranslateStrip(int numVerts, const ITypeLE *inds, int indexOffset);
void TranslateStrip(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise);
template <class ITypeLE, int flag>
void TranslateFan(int numVerts, const ITypeLE *inds, int indexOffset);
void TranslateFan(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise);

template <class ITypeLE, int flag>
inline void TranslateRectangles(int numVerts, const ITypeLE *inds, int indexOffset);
Expand Down
32 changes: 30 additions & 2 deletions GPU/GPUCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1530,8 +1530,11 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
int bytesRead = 0;
UpdateUVScaleOffset();

// cull mode
int cullMode = gstate.isCullEnabled() ? gstate.getCullMode() : -1;

uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode());
drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, &bytesRead);
drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, cullMode, &bytesRead);
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// The VADDR/IADDR registers are NOT updated.
Expand Down Expand Up @@ -1577,7 +1580,14 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}

drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, &bytesRead);
if (newPrim != GE_PRIM_TRIANGLE_STRIP && cullMode != -1 && cullMode != gstate.getCullMode()) {
DEBUG_LOG(G3D, "flush cull mode before prim: %d", newPrim);
drawEngineCommon_->DispatchFlush();
gstate.cmdmem[GE_CMD_CULL] ^= 1;
gstate_c.Dirty(DIRTY_RASTER_STATE);
}

drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, cullMode, &bytesRead);
AdvanceVerts(vertexType, count, bytesRead);
totalVertCount += count;
break;
Expand All @@ -1604,6 +1614,10 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
case GE_CMD_BASE:
gstate.cmdmem[GE_CMD_BASE] = data;
break;
case GE_CMD_CULL:
// flip face by indices for GE_PRIM_TRIANGLE_STRIP
cullMode = data & 1;
break;
case GE_CMD_NOP:
case GE_CMD_NOP_FF:
break;
Expand All @@ -1618,6 +1632,14 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
gstate.cmdmem[GE_CMD_TEXSCALEV] = data;
gstate_c.uv.vScale = getFloat24(data);
break;
case GE_CMD_TEXOFFSETU:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tomb Raider use GE_CMD_TEXOFFSETU and GE_CMD_TEXOFFSETV frequently, but not quite useful.:grimacing:

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's some engine, there's other games that do that too (basically the whole point of the gstate_c.uv optimization.) Makes sense to add them here.

-[Unknown]

gstate.cmdmem[GE_CMD_TEXOFFSETU] = data;
gstate_c.uv.uOff = getFloat24(data);
break;
case GE_CMD_TEXOFFSETV:
gstate.cmdmem[GE_CMD_TEXOFFSETV] = data;
gstate_c.uv.vOff = getFloat24(data);
break;
case GE_CMD_TEXLEVEL:
// Same Gran Turismo hack from Execute_TexLevel
if ((data & 3) != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & data) != 0) {
Expand Down Expand Up @@ -1654,6 +1676,12 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
if (cmdCount > 0) {
UpdatePC(currentList->pc, currentList->pc + cmdCount * 4);
currentList->pc += cmdCount * 4;
// flush back cull mode
if (cullMode != -1 && cullMode != gstate.getCullMode()) {
drawEngineCommon_->DispatchFlush();
gstate.cmdmem[GE_CMD_CULL] ^= 1;
gstate_c.Dirty(DIRTY_RASTER_STATE);
}
}

gpuStats.vertexGPUCycles += vertexCost_ * totalVertCount;
Expand Down