Avoid decoding indices when we don't need them.

This commit is contained in:
Henrik Rydgård 2023-10-06 12:43:10 +02:00
parent d9c6d0931b
commit 61acce195c
9 changed files with 150 additions and 206 deletions

View File

@ -816,6 +816,10 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *
_dbg_assert_(numDrawInds_ <= MAX_DEFERRED_DRAW_INDS); // if it's equal, the check below will take care of it before any action is taken.
_dbg_assert_(numDrawVerts_ > 0);
if (!clockwise) {
anyCCWOrIndexed_ = true;
}
int seenPrims = 0;
while (cmd != stall) {
uint32_t data = *cmd;
if ((data & 0xFFF80000) != 0x04000000) {
@ -831,6 +835,7 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *
DeferredInds &di = drawInds_[numDrawInds_++];
di.indexType = 0;
di.prim = newPrim;
seenPrims |= (1 << newPrim);
di.clockwise = clockwise;
di.vertexCount = vertexCount;
di.vertDecodeIndex = prevDrawVerts;
@ -839,6 +844,10 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *
cmd++;
}
seenPrims_ |= seenPrims;
_dbg_assert_(cmd != start);
int totalCount = offset - dv.vertexCount;
dv.vertexCount = offset;
dv.indexUpperBound = dv.vertexCount - 1;
@ -910,9 +919,16 @@ bool DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
DeferredInds &di = drawInds_[numDrawInds_++];
di.inds = inds;
di.indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
int indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
if (indexType) {
anyCCWOrIndexed_ = true;
}
di.indexType = indexType;
di.prim = prim;
di.clockwise = clockwise;
if (!clockwise) {
anyCCWOrIndexed_ = true;
}
di.vertexCount = vertexCount;
di.vertDecodeIndex = numDrawVerts_;
di.offset = 0;
@ -942,6 +958,7 @@ bool DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
}
vertexCountInDrawCalls_ += vertexCount;
seenPrims_ |= (1 << prim);
if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
// This prevents issues with consecutive self-renders in Ridge Racer.
@ -952,6 +969,8 @@ bool DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
}
void DrawEngineCommon::DecodeVerts(u8 *dest) {
// Note that this should be able to continue a partial decode - we don't necessarily start from zero here (although we do most of the time).
int i = decodeVertsCounter_;
int stride = (int)dec_->GetDecVtxFmt().stride;
for (; i < numDrawVerts_; i++) {
@ -968,7 +987,9 @@ void DrawEngineCommon::DecodeVerts(u8 *dest) {
decodeVertsCounter_ = i;
}
void DrawEngineCommon::DecodeInds() {
int DrawEngineCommon::DecodeInds() {
// Note that this should be able to continue a partial decode - we don't necessarily start from zero here (although we do most of the time).
int i = decodeIndsCounter_;
for (; i < numDrawInds_; i++) {
const DeferredInds &di = drawInds_[i];
@ -994,12 +1015,7 @@ void DrawEngineCommon::DecodeInds() {
}
decodeIndsCounter_ = i;
// Sanity check
if (indexGen.Prim() < 0) {
ERROR_LOG_REPORT(G3D, "DecodeVerts: Failed to deduce prim: %i", indexGen.Prim());
// Force to points (0)
indexGen.AddPrim(GE_PRIM_POINTS, 0, 0, true);
}
return indexGen.VertexCount();
}
bool DrawEngineCommon::CanUseHardwareTransform(int prim) {

View File

@ -152,7 +152,7 @@ protected:
void UpdatePlanes();
void DecodeVerts(u8 *dest);
void DecodeInds();
int DecodeInds();
// Preprocessing for spline/bezier
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize = nullptr);
@ -202,6 +202,8 @@ protected:
vertexCountInDrawCalls_ = 0;
decodeIndsCounter_ = 0;
decodeVertsCounter_ = 0;
seenPrims_ = 0;
anyCCWOrIndexed_ = false;
gstate_c.vertexFullAlpha = true;
// Now seems as good a time as any to reset the min/max coords, which we may examine later.
@ -211,6 +213,34 @@ protected:
gstate_c.vertBounds.maxV = 0;
}
inline bool CollectedPureDraw() const {
switch (seenPrims_) {
case 1 << GE_PRIM_TRIANGLE_STRIP:
return !anyCCWOrIndexed_ && numDrawInds_ == 1;
case 1 << GE_PRIM_LINES:
case 1 << GE_PRIM_POINTS:
case 1 << GE_PRIM_TRIANGLES:
return !anyCCWOrIndexed_;
default:
return false;
}
}
inline void DecodeIndsAndGetData(GEPrimitiveType *prim, int *numVerts, int *maxIndex, bool *useElements, bool forceIndexed) {
if (!forceIndexed && CollectedPureDraw()) {
*prim = drawInds_[0].prim;
*numVerts = numDecodedVerts_;
*maxIndex = numDecodedVerts_;
*useElements = false;
} else {
int vertexCount = DecodeInds();
*numVerts = vertexCount;
*maxIndex = numDecodedVerts_;
*prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim);
*useElements = true;
}
}
uint32_t ComputeDrawcallsHash() const;
bool useHWTransform_ = false;
@ -227,9 +257,7 @@ protected:
u16 *decIndex_ = nullptr;
// Cached vertex decoders
u32 lastVType_ = -1; // corresponds to dec_. Could really just pick it out of dec_...
DenseHashMap<u32, VertexDecoder *> decoderMap_;
VertexDecoder *dec_ = nullptr;
VertexDecoderJitCache *decJitCache_ = nullptr;
VertexDecoderOptions decOptions_{};
@ -239,10 +267,10 @@ protected:
// Defer all vertex decoding to a "Flush" (except when software skinning)
struct DeferredVerts {
const void *verts;
UVScale uvScale;
u32 vertexCount;
u16 indexLowerBound;
u16 indexUpperBound;
UVScale uvScale;
};
struct DeferredInds {
@ -250,7 +278,7 @@ protected:
u32 vertexCount;
u8 vertDecodeIndex; // index into the drawVerts_ array to look up the vertexOffset.
u8 indexType;
s8 prim;
GEPrimitiveType prim;
bool clockwise;
u16 offset;
};
@ -261,6 +289,8 @@ protected:
uint32_t drawVertexOffsets_[MAX_DEFERRED_DRAW_VERTS];
DeferredInds drawInds_[MAX_DEFERRED_DRAW_INDS];
VertexDecoder *dec_ = nullptr;
u32 lastVType_ = -1; // corresponds to dec_. Could really just pick it out of dec_...
int numDrawVerts_ = 0;
int numDrawInds_ = 0;
int vertexCountInDrawCalls_ = 0;
@ -268,6 +298,10 @@ protected:
int decodeVertsCounter_ = 0;
int decodeIndsCounter_ = 0;
int seenPrims_ = 0;
bool anyCCWOrIndexed_ = 0;
bool anyIndexed_ = 0;
// Vertex collector state
IndexGenerator indexGen;
int numDecodedVerts_ = 0;

View File

@ -67,9 +67,6 @@ void IndexGenerator::AddPoints(int numVerts, int indexOffset) {
for (int i = 0; i < numVerts; i++)
*outInds++ = indexOffset + i;
inds_ = outInds;
// ignore overflow verts
prim_ = GE_PRIM_POINTS;
seenPrims_ |= 1 << GE_PRIM_POINTS;
}
void IndexGenerator::AddList(int numVerts, int indexOffset, bool clockwise) {
@ -82,13 +79,6 @@ void IndexGenerator::AddList(int numVerts, int indexOffset, bool clockwise) {
*outInds++ = indexOffset + i + v2;
}
inds_ = outInds;
// ignore overflow verts
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= 1 << GE_PRIM_TRIANGLES;
if (!clockwise) {
// Make sure we don't treat this as pure.
seenPrims_ |= 1 << GE_PRIM_TRIANGLE_STRIP;
}
}
alignas(16) static const u16 offsets_clockwise[24] = {
@ -203,17 +193,6 @@ void IndexGenerator::AddStrip(int numVerts, int indexOffset, bool clockwise) {
}
inds_ = outInds;
#endif
// This is so we can detect one single strip by just looking at seenPrims_.
if (!seenPrims_ && clockwise) {
seenPrims_ = 1 << GE_PRIM_TRIANGLE_STRIP;
prim_ = GE_PRIM_TRIANGLE_STRIP;
pureCount_ = numVerts;
} else {
seenPrims_ |= (1 << GE_PRIM_TRIANGLE_STRIP) | (1 << GE_PRIM_TRIANGLES);
prim_ = GE_PRIM_TRIANGLES;
pureCount_ = 0;
}
}
void IndexGenerator::AddFan(int numVerts, int indexOffset, bool clockwise) {
@ -227,12 +206,6 @@ void IndexGenerator::AddFan(int numVerts, int indexOffset, bool clockwise) {
*outInds++ = indexOffset + i + v2;
}
inds_ = outInds;
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= 1 << GE_PRIM_TRIANGLE_FAN;
if (!clockwise) {
// Make sure we don't treat this as pure.
seenPrims_ |= 1 << GE_PRIM_TRIANGLE_STRIP;
}
}
//Lines
@ -243,8 +216,6 @@ void IndexGenerator::AddLineList(int numVerts, int indexOffset) {
*outInds++ = indexOffset + i + 1;
}
inds_ = outInds;
prim_ = GE_PRIM_LINES;
seenPrims_ |= 1 << prim_;
}
void IndexGenerator::AddLineStrip(int numVerts, int indexOffset) {
@ -255,8 +226,6 @@ void IndexGenerator::AddLineStrip(int numVerts, int indexOffset) {
*outInds++ = indexOffset + i + 1;
}
inds_ = outInds;
prim_ = GE_PRIM_LINES;
seenPrims_ |= 1 << GE_PRIM_LINE_STRIP;
}
void IndexGenerator::AddRectangles(int numVerts, int indexOffset) {
@ -268,21 +237,17 @@ void IndexGenerator::AddRectangles(int numVerts, int indexOffset) {
*outInds++ = indexOffset + i + 1;
}
inds_ = outInds;
prim_ = GE_PRIM_RECTANGLES;
seenPrims_ |= 1 << GE_PRIM_RECTANGLES;
}
template <class ITypeLE, int flag>
template <class ITypeLE>
void IndexGenerator::TranslatePoints(int numInds, const ITypeLE *inds, int indexOffset) {
u16 *outInds = inds_;
for (int i = 0; i < numInds; i++)
*outInds++ = indexOffset + inds[i];
inds_ = outInds;
prim_ = GE_PRIM_POINTS;
seenPrims_ |= (1 << GE_PRIM_POINTS) | flag;
}
template <class ITypeLE, int flag>
template <class ITypeLE>
void IndexGenerator::TranslateLineList(int numInds, const ITypeLE *inds, int indexOffset) {
u16 *outInds = inds_;
numInds = numInds & ~1;
@ -291,11 +256,9 @@ void IndexGenerator::TranslateLineList(int numInds, const ITypeLE *inds, int ind
*outInds++ = indexOffset + inds[i + 1];
}
inds_ = outInds;
prim_ = GE_PRIM_LINES;
seenPrims_ |= (1 << GE_PRIM_LINES) | flag;
}
template <class ITypeLE, int flag>
template <class ITypeLE>
void IndexGenerator::TranslateLineStrip(int numInds, const ITypeLE *inds, int indexOffset) {
int numLines = numInds - 1;
u16 *outInds = inds_;
@ -304,11 +267,9 @@ void IndexGenerator::TranslateLineStrip(int numInds, const ITypeLE *inds, int in
*outInds++ = indexOffset + inds[i + 1];
}
inds_ = outInds;
prim_ = GE_PRIM_LINES;
seenPrims_ |= (1 << GE_PRIM_LINE_STRIP) | flag;
}
template <class ITypeLE, int flag>
template <class ITypeLE>
void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
// We only bother doing this minor optimization in triangle list, since it's by far the most
// common operation that can benefit.
@ -329,11 +290,9 @@ void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOf
}
inds_ = outInds;
}
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= (1 << GE_PRIM_TRIANGLES) | flag;
}
template <class ITypeLE, int flag>
template <class ITypeLE>
void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
int wind = clockwise ? 1 : 2;
int numTris = numInds - 2;
@ -345,11 +304,9 @@ void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexO
*outInds++ = indexOffset + inds[i + wind];
}
inds_ = outInds;
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= (1 << GE_PRIM_TRIANGLE_STRIP) | flag;
}
template <class ITypeLE, int flag>
template <class ITypeLE>
void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
if (numInds <= 0) return;
int numTris = numInds - 2;
@ -362,11 +319,9 @@ void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOff
*outInds++ = indexOffset + inds[i + v2];
}
inds_ = outInds;
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= (1 << GE_PRIM_TRIANGLE_FAN) | flag;
}
template <class ITypeLE, int flag>
template <class ITypeLE>
inline void IndexGenerator::TranslateRectangles(int numInds, const ITypeLE *inds, int indexOffset) {
u16 *outInds = inds_;
//rectangles always need 2 vertices, disregard the last one if there's an odd number
@ -376,43 +331,41 @@ inline void IndexGenerator::TranslateRectangles(int numInds, const ITypeLE *inds
*outInds++ = indexOffset + inds[i+1];
}
inds_ = outInds;
prim_ = GE_PRIM_RECTANGLES;
seenPrims_ |= (1 << GE_PRIM_RECTANGLES) | flag;
}
// Could template this too, but would have to define in header.
void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u8, SEEN_INDEX8>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u8, SEEN_INDEX8>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u8, SEEN_INDEX8>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break; // Same
case GE_PRIM_POINTS: TranslatePoints<u8>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u8>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u8>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u8>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u8>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u8>(numInds, inds, indexOffset); break; // Same
}
}
void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break; // Same
case GE_PRIM_POINTS: TranslatePoints<u16_le>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u16_le>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u16_le>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u16_le>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u16_le>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u16_le>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u16_le>(numInds, inds, indexOffset); break; // Same
}
}
void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break; // Same
case GE_PRIM_POINTS: TranslatePoints<u32_le>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u32_le>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u32_le>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u32_le>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u32_le>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u32_le>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u32_le>(numInds, inds, indexOffset); break; // Same
}
}

View File

@ -26,32 +26,22 @@ class IndexGenerator {
public:
void Setup(u16 *indexptr);
void Reset() {
prim_ = GE_PRIM_INVALID;
seenPrims_ = 0;
pureCount_ = 0;
this->inds_ = indsBase_;
}
bool PrimCompatible(int prim1, int prim2) {
static bool PrimCompatible(int prim1, int prim2) {
if (prim1 == GE_PRIM_INVALID || prim2 == GE_PRIM_KEEP_PREVIOUS)
return true;
return indexedPrimitiveType[prim1] == indexedPrimitiveType[prim2];
}
bool PrimCompatible(int prim) const {
if (prim_ == GE_PRIM_INVALID || prim == GE_PRIM_KEEP_PREVIOUS)
return true;
return indexedPrimitiveType[prim] == prim_;
}
GEPrimitiveType Prim() const { return prim_; }
GEPrimitiveType GeneralPrim() const {
switch (prim_) {
static GEPrimitiveType GeneralPrim(GEPrimitiveType prim) {
switch (prim) {
case GE_PRIM_LINE_STRIP: return GE_PRIM_LINES; break;
case GE_PRIM_TRIANGLE_STRIP:
case GE_PRIM_TRIANGLE_FAN: return GE_PRIM_TRIANGLES; break;
default:
return prim_;
return prim;
}
}
@ -60,15 +50,8 @@ public:
void TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise);
void TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise);
// This is really the number of generated indices, or 3x the number of triangles.
int VertexCount() const { return inds_ - indsBase_; }
int SeenPrims() const { return seenPrims_; }
int PureCount() const { return pureCount_; }
bool SeenOnlyPurePrims() const {
return seenPrims_ == (1 << GE_PRIM_TRIANGLES) ||
seenPrims_ == (1 << GE_PRIM_LINES) ||
seenPrims_ == (1 << GE_PRIM_POINTS) ||
seenPrims_ == (1 << GE_PRIM_TRIANGLE_STRIP);
}
private:
// Points (why index these? code simplicity)
@ -84,34 +67,25 @@ private:
void AddRectangles(int numVerts, int indexOffset);
// These translate already indexed lists
template <class ITypeLE, int flag>
template <class ITypeLE>
void TranslatePoints(int numVerts, const ITypeLE *inds, int indexOffset);
template <class ITypeLE, int flag>
template <class ITypeLE>
void TranslateList(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise);
template <class ITypeLE, int flag>
template <class ITypeLE>
inline void TranslateLineList(int numVerts, const ITypeLE *inds, int indexOffset);
template <class ITypeLE, int flag>
template <class ITypeLE>
inline void TranslateLineStrip(int numVerts, const ITypeLE *inds, int indexOffset);
template <class ITypeLE, int flag>
template <class ITypeLE>
void TranslateStrip(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise);
template <class ITypeLE, int flag>
template <class ITypeLE>
void TranslateFan(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise);
template <class ITypeLE, int flag>
template <class ITypeLE>
inline void TranslateRectangles(int numVerts, const ITypeLE *inds, int indexOffset);
enum {
SEEN_INDEX8 = 1 << 16,
SEEN_INDEX16 = 1 << 17,
SEEN_INDEX32 = 1 << 18,
};
u16 *indsBase_;
u16 *inds_;
int pureCount_;
GEPrimitiveType prim_;
int seenPrims_;
static const u8 indexedPrimitiveType[7];
};

View File

@ -286,16 +286,12 @@ void DrawEngineD3D11::DoFlush() {
ID3D11Buffer *vb_ = nullptr;
ID3D11Buffer *ib_ = nullptr;
int vertexCount;
int maxIndex;
bool useElements;
DecodeVerts(decoded_);
DecodeInds();
bool useElements = !indexGen.SeenOnlyPurePrims() || prim == GE_PRIM_TRIANGLE_FAN;
int vertexCount = indexGen.VertexCount();
DecodeIndsAndGetData(&prim, &vertexCount, &maxIndex, &useElements, false);
gpuStats.numUncachedVertsDrawn += vertexCount;
if (!useElements && indexGen.PureCount()) {
vertexCount = indexGen.PureCount();
}
prim = indexGen.Prim();
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
@ -336,7 +332,7 @@ void DrawEngineD3D11::DoFlush() {
context_->IASetVertexBuffers(0, 1, &buf, &stride, &vOffset);
if (useElements) {
UINT iOffset;
int iSize = 2 * indexGen.VertexCount();
int iSize = 2 * vertexCount;
uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize);
memcpy(iptr, decIndex_, iSize);
pushInds_->EndPush(context_);
@ -363,7 +359,8 @@ void DrawEngineD3D11::DoFlush() {
dec_ = GetVertexDecoder(lastVType_);
}
DecodeVerts(decoded_);
DecodeInds();
int vertexCount = DecodeInds();
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
@ -371,12 +368,9 @@ void DrawEngineD3D11::DoFlush() {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
}
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
prim = indexGen.Prim();
// Undo the strip optimization, not supported by the SW code yet.
if (prim == GE_PRIM_TRIANGLE_STRIP)
prim = GE_PRIM_TRIANGLES;
VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount());
gpuStats.numUncachedVertsDrawn += vertexCount;
prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim);
VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, vertexCount);
u16 *inds = decIndex_;
SoftwareTransformResult result{};
@ -424,7 +418,7 @@ void DrawEngineD3D11::DoFlush() {
ApplyDrawState(prim);
if (result.action == SW_NOT_READY)
swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, numDecodedVerts_, &result);
swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, numDecodedVerts_, &result);
if (result.setSafeSize)
framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight);

View File

@ -258,16 +258,12 @@ void DrawEngineDX9::DoFlush() {
LPDIRECT3DVERTEXBUFFER9 vb_ = nullptr;
LPDIRECT3DINDEXBUFFER9 ib_ = nullptr;
int vertexCount;
int maxIndex;
bool useElements;
DecodeVerts(decoded_);
DecodeInds();
bool useElements = !indexGen.SeenOnlyPurePrims();
int vertexCount = indexGen.VertexCount();
DecodeIndsAndGetData(&prim, &vertexCount, &maxIndex, &useElements, false);
gpuStats.numUncachedVertsDrawn += vertexCount;
if (!useElements && indexGen.PureCount()) {
vertexCount = indexGen.PureCount();
}
prim = indexGen.Prim();
_dbg_assert_((int)prim > 0);
@ -315,7 +311,8 @@ void DrawEngineDX9::DoFlush() {
dec_ = GetVertexDecoder(lastVType_);
}
DecodeVerts(decoded_);
DecodeInds();
int vertexCount = DecodeInds();
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
@ -323,12 +320,9 @@ void DrawEngineDX9::DoFlush() {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
}
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
prim = indexGen.Prim();
// Undo the strip optimization, not supported by the SW code yet.
if (prim == GE_PRIM_TRIANGLE_STRIP)
prim = GE_PRIM_TRIANGLES;
VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount());
gpuStats.numUncachedVertsDrawn += vertexCount;
prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim);
VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, vertexCount);
u16 *inds = decIndex_;
SoftwareTransformResult result{};
@ -354,6 +348,7 @@ void DrawEngineDX9::DoFlush() {
UpdateCachedViewportState(vpAndScissor);
}
int maxIndex = numDecodedVerts_;
SoftwareTransform swTransform(params);
// Half pixel offset hack.
@ -379,7 +374,7 @@ void DrawEngineDX9::DoFlush() {
ApplyDrawState(prim);
if (result.action == SW_NOT_READY)
swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, numDecodedVerts_, &result);
swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, numDecodedVerts_, &result);
if (result.setSafeSize)
framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight);

View File

@ -284,19 +284,20 @@ void DrawEngineGLES::DoFlush() {
u8 *dest = (u8 *)frameData.pushVertex->Allocate(vertsToDecode * dec_->GetDecVtxFmt().stride, 4, &vertexBuffer, &vertexBufferOffset);
DecodeVerts(dest);
}
DecodeInds();
// If there's only been one primitive type, and it's either TRIANGLES, LINES or POINTS,
// there is no need for the index buffer we built. We can then use glDrawArrays instead
// for a very minor speed boost. TODO: We can probably detect this case earlier, like before
// actually doing any vertex decoding (unless we're doing soft skinning and pre-decode on submit).
bool useElements = !indexGen.SeenOnlyPurePrims();
int vertexCount = indexGen.VertexCount();
gpuStats.numUncachedVertsDrawn += vertexCount;
if (!useElements && indexGen.PureCount()) {
vertexCount = indexGen.PureCount();
int vertexCount;
int maxIndex;
bool useElements;
DecodeVerts(decoded_);
DecodeIndsAndGetData(&prim, &vertexCount, &maxIndex, &useElements, false);
if (useElements) {
uint32_t esz = sizeof(uint16_t) * vertexCount;
void *dest = frameData.pushIndex->Allocate(esz, 2, &indexBuffer, &indexBufferOffset);
// TODO: When we need to apply an index offset, we can apply it directly when copying the indices here.
// Of course, minding the maximum value of 65535...
memcpy(dest, decIndex_, esz);
}
prim = indexGen.Prim();
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
@ -316,11 +317,6 @@ void DrawEngineGLES::DoFlush() {
LinkedShader *program = shaderManager_->ApplyFragmentShader(vsid, vshader, pipelineState_, framebufferManager_->UseBufferedRendering());
GLRInputLayout *inputLayout = SetupDecFmtForDraw(dec_->GetDecVtxFmt());
if (useElements) {
uint32_t esz = sizeof(uint16_t) * indexGen.VertexCount();
void *dest = frameData.pushIndex->Allocate(esz, 2, &indexBuffer, &indexBufferOffset);
// TODO: When we need to apply an index offset, we can apply it directly when copying the indices here.
// Of course, minding the maximum value of 65535...
memcpy(dest, decIndex_, esz);
render_->DrawIndexed(inputLayout,
vertexBuffer, vertexBufferOffset,
indexBuffer, indexBufferOffset,
@ -338,7 +334,7 @@ void DrawEngineGLES::DoFlush() {
dec_ = GetVertexDecoder(lastVType_);
}
DecodeVerts(decoded_);
DecodeInds();
int vertexCount = DecodeInds();
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
@ -347,11 +343,8 @@ void DrawEngineGLES::DoFlush() {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
}
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
prim = indexGen.Prim();
// Undo the strip optimization, not supported by the SW code yet.
if (prim == GE_PRIM_TRIANGLE_STRIP)
prim = GE_PRIM_TRIANGLES;
gpuStats.numUncachedVertsDrawn += vertexCount;
prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim);
u16 *inds = decIndex_;
SoftwareTransformResult result{};
@ -377,7 +370,7 @@ void DrawEngineGLES::DoFlush() {
UpdateCachedViewportState(vpAndScissor_);
}
int vertexCount = indexGen.VertexCount();
int maxIndex = numDecodedVerts_;
// TODO: Split up into multiple draw calls for GLES 2.0 where you can't guarantee support for more than 0x10000 verts.
if (gl_extensions.IsGLES && !gl_extensions.GLES3) {

View File

@ -258,21 +258,11 @@ void DrawEngineVulkan::DoFlush() {
u8 *dest = pushVertex_->Allocate(vertsToDecode * dec_->GetDecVtxFmt().stride, 4, &vbuf, &vbOffset);
DecodeVerts(dest);
}
DecodeInds();
int vertexCount;
int maxIndex;
bool useElements;
int vertexCount = indexGen.VertexCount();
gpuStats.numUncachedVertsDrawn += vertexCount;
if (forceIndexed) {
useElements = true;
prim = indexGen.GeneralPrim();
} else {
useElements = !indexGen.SeenOnlyPurePrims();
if (!useElements && indexGen.PureCount()) {
vertexCount = indexGen.PureCount();
}
prim = indexGen.Prim();
}
DecodeIndsAndGetData(&prim, &vertexCount, &maxIndex, &useElements, false);
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
@ -363,7 +353,7 @@ void DrawEngineVulkan::DoFlush() {
};
if (useElements) {
if (!ibuf) {
ibOffset = (uint32_t)pushIndex_->Push(decIndex_, sizeof(uint16_t) * indexGen.VertexCount(), 4, &ibuf);
ibOffset = (uint32_t)pushIndex_->Push(decIndex_, sizeof(uint16_t) * vertexCount, 4, &ibuf);
}
renderManager->DrawIndexed(descSetIndex, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, vertexCount, 1);
} else {
@ -379,7 +369,7 @@ void DrawEngineVulkan::DoFlush() {
int prevDecodedVerts = numDecodedVerts_;
DecodeVerts(decoded_);
DecodeInds();
int vertexCount = DecodeInds();
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
@ -388,12 +378,8 @@ void DrawEngineVulkan::DoFlush() {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
}
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
prim = indexGen.Prim();
// Undo the strip optimization, not supported by the SW code yet.
if (prim == GE_PRIM_TRIANGLE_STRIP)
prim = GE_PRIM_TRIANGLES;
_dbg_assert_(prim != GE_PRIM_INVALID);
gpuStats.numUncachedVertsDrawn += vertexCount;
prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim);
u16 *inds = decIndex_;
SoftwareTransformResult result{};
@ -436,7 +422,7 @@ void DrawEngineVulkan::DoFlush() {
result.action = SW_NOT_READY;
if (result.action == SW_NOT_READY) {
swTransform.DetectOffsetTexture(numDecodedVerts_);
swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, numDecodedVerts_, &result);
swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, numDecodedVerts_, &result);
}
if (result.setSafeSize)

View File

@ -583,8 +583,7 @@ enum GETexProjMapMode
GE_PROJMAP_NORMAL = 3,
};
enum GEPrimitiveType
{
enum GEPrimitiveType : int8_t {
GE_PRIM_POINTS = 0,
GE_PRIM_LINES = 1,
GE_PRIM_LINE_STRIP = 2,