handle cull mode by indices, improve performance for GVGNP, refer to #10172

This commit is contained in:
zhang wei 2018-04-27 16:33:35 +08:00
parent 386c9d4fcf
commit cfc353ee81
5 changed files with 49 additions and 20 deletions

View File

@ -544,17 +544,29 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
switch (dc.indexType) {
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound);
int cullMode = drawCalls[j].cullMode;
if (cullMode != -1 && gstate.isCullEnabled()) {
cullMode = gstate.getCullMode() == cullMode ? 0 : 1;
}
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound, cullMode);
}
break;
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16_le *)drawCalls[j].inds, indexLowerBound);
int cullMode = drawCalls[j].cullMode;
if (cullMode != -1 && gstate.isCullEnabled()) {
cullMode = gstate.getCullMode() == cullMode ? 0 : 1;
}
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16_le *)drawCalls[j].inds, indexLowerBound, cullMode);
}
break;
case GE_VTYPE_IDX_32BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u32_le *)drawCalls[j].inds, indexLowerBound);
int cullMode = drawCalls[j].cullMode;
if (cullMode != -1 && gstate.isCullEnabled()) {
cullMode = gstate.getCullMode() == cullMode ? 0 : 1;
}
indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u32_le *)drawCalls[j].inds, indexLowerBound, cullMode);
}
break;
}
@ -659,7 +671,7 @@ ReliableHashType DrawEngineCommon::ComputeHash() {
}
// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) {
void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
DispatchFlush();
}
@ -697,6 +709,7 @@ void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
dc.prim = prim;
dc.vertexCount = vertexCount;
dc.uvScale = gstate_c.uv;
dc.cullMode = cullMode;
if (inds) {
GetIndexBounds(inds, vertexCount, vertTypeID, &dc.indexLowerBound, &dc.indexUpperBound);

View File

@ -67,12 +67,12 @@ public:
// is different. Should probably refactor that.
// Note that vertTypeID should be computed using GetVertTypeID().
virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) {
SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, bytesRead);
SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, -1, bytesRead);
}
bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType, int *bytesRead);
void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead);
void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead);
void SubmitSpline(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead);
void SubmitBezier(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead);
@ -143,6 +143,7 @@ protected:
u16 indexLowerBound;
u16 indexUpperBound;
UVScale uvScale;
int cullMode;
};
enum { MAX_DEFERRED_DRAW_CALLS = 128 };

View File

@ -235,8 +235,8 @@ void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOf
}
template <class ITypeLE, int flag>
void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset) {
int wind = 1;
void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset, int cullMode) {
int wind = cullMode == 1 ? 2 : 1;
indexOffset = index_ - indexOffset;
int numTris = numInds - 2;
u16 *outInds = inds_;
@ -286,37 +286,37 @@ inline void IndexGenerator::TranslateRectangles(int numInds, const ITypeLE *inds
}
// Could template this too, but would have to define in header.
void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset) {
void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, int cullMode) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u8, SEEN_INDEX8>(numInds, inds, indexOffset, cullMode); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break; // Same
}
}
void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset) {
void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, int cullMode) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset, cullMode); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break; // Same
}
}
void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset) {
void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, int cullMode) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset, cullMode); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break; // Same
}

View File

@ -50,9 +50,9 @@ public:
GEPrimitiveType Prim() const { return prim_; }
void AddPrim(int prim, int vertexCount);
void TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset);
void TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset);
void TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset);
void TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, int cullMode);
void TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, int cullMode);
void TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, int cullMode);
void Advance(int numVerts) {
index_ += numVerts;
@ -95,7 +95,7 @@ private:
inline void TranslateLineStrip(int numVerts, const ITypeLE *inds, int indexOffset);
template <class ITypeLE, int flag>
void TranslateStrip(int numVerts, const ITypeLE *inds, int indexOffset);
void TranslateStrip(int numVerts, const ITypeLE *inds, int indexOffset, int cullMode);
template <class ITypeLE, int flag>
void TranslateFan(int numVerts, const ITypeLE *inds, int indexOffset);

View File

@ -1530,8 +1530,11 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
int bytesRead = 0;
UpdateUVScaleOffset();
// cull mode
int cullMode = gstate.isCullEnabled() ? gstate.getCullMode() : -1;
uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode());
drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, &bytesRead);
drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, cullMode, &bytesRead);
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// The VADDR/IADDR registers are NOT updated.
@ -1577,7 +1580,7 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, &bytesRead);
drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, cullMode, &bytesRead);
AdvanceVerts(vertexType, count, bytesRead);
totalVertCount += count;
break;
@ -1604,6 +1607,10 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
case GE_CMD_BASE:
gstate.cmdmem[GE_CMD_BASE] = data;
break;
case GE_CMD_CULL:
// flip face by indices for GE_PRIM_TRIANGLE_STRIP
cullMode = data & 1;
break;
case GE_CMD_NOP:
case GE_CMD_NOP_FF:
break;
@ -1618,6 +1625,14 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
gstate.cmdmem[GE_CMD_TEXSCALEV] = data;
gstate_c.uv.vScale = getFloat24(data);
break;
case GE_CMD_TEXOFFSETU:
gstate.cmdmem[GE_CMD_TEXOFFSETU] = data;
gstate_c.uv.uOff = getFloat24(data);
break;
case GE_CMD_TEXOFFSETV:
gstate.cmdmem[GE_CMD_TEXOFFSETV] = data;
gstate_c.uv.vOff = getFloat24(data);
break;
case GE_CMD_TEXLEVEL:
// Same Gran Turismo hack from Execute_TexLevel
if ((data & 3) != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & data) != 0) {