mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-27 15:30:35 +00:00
Merge pull request #10691 from hrydgard/more-drawprim-opt
More DrawPrim optimizations
This commit is contained in:
commit
90dbd9a725
@ -93,7 +93,7 @@ int DrawEngineCommon::ComputeNumVertsToDecode() const {
|
||||
void DrawEngineCommon::DecodeVerts(u8 *dest) {
|
||||
const UVScale origUV = gstate_c.uv;
|
||||
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) {
|
||||
gstate_c.uv = uvScale[decodeCounter_];
|
||||
gstate_c.uv = drawCalls[decodeCounter_].uvScale;
|
||||
DecodeVertsStep(dest, decodeCounter_, decodedVerts_); // NOTE! DecodeVertsStep can modify decodeCounter_!
|
||||
}
|
||||
gstate_c.uv = origUV;
|
||||
@ -601,11 +601,12 @@ ReliableHashType DrawEngineCommon::ComputeHash() {
|
||||
}
|
||||
}
|
||||
|
||||
fullhash += DoReliableHash(&uvScale[0], sizeof(uvScale[0]) * numDrawCalls, 0x0123e658);
|
||||
fullhash += DoReliableHash(&drawCalls[0].uvScale, sizeof(drawCalls[0].uvScale) * numDrawCalls, 0x0123e658);
|
||||
return fullhash;
|
||||
}
|
||||
|
||||
void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
|
||||
// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
|
||||
void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) {
|
||||
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
|
||||
DispatchFlush();
|
||||
}
|
||||
@ -617,9 +618,6 @@ void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
|
||||
prevPrim_ = prim;
|
||||
}
|
||||
|
||||
// As the decoder depends on the UVGenMode when we use UV prescale, we simply mash it
|
||||
// into the top of the verttype where there are unused bits.
|
||||
const u32 vertTypeID = (vertType & 0xFFFFFF) | (gstate.getUVGenMode() << 24);
|
||||
// If vtype has changed, setup the vertex decoder.
|
||||
if (vertTypeID != lastVType_) {
|
||||
dec_ = GetVertexDecoder(vertTypeID);
|
||||
@ -630,35 +628,34 @@ void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
|
||||
if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > 2 && prim != GE_PRIM_RECTANGLES))
|
||||
return;
|
||||
|
||||
DeferredDrawCall &dc = drawCalls[numDrawCalls];
|
||||
dc.verts = verts;
|
||||
dc.inds = inds;
|
||||
dc.indexType = (vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
|
||||
dc.prim = prim;
|
||||
dc.vertexCount = vertexCount;
|
||||
|
||||
if (g_Config.bVertexCache) {
|
||||
u32 dhash = dcid_;
|
||||
dhash = __rotl(dhash ^ (u32)(uintptr_t)verts, 13);
|
||||
dhash = __rotl(dhash ^ (u32)(uintptr_t)inds, 13);
|
||||
dhash = __rotl(dhash ^ (u32)vertType, 13);
|
||||
dhash = __rotl(dhash ^ (u32)vertTypeID, 13);
|
||||
dhash = __rotl(dhash ^ (u32)vertexCount, 13);
|
||||
dcid_ = dhash ^ (u32)prim;
|
||||
}
|
||||
|
||||
DeferredDrawCall &dc = drawCalls[numDrawCalls];
|
||||
dc.verts = verts;
|
||||
dc.inds = inds;
|
||||
dc.indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
|
||||
dc.prim = prim;
|
||||
dc.vertexCount = vertexCount;
|
||||
dc.uvScale = gstate_c.uv;
|
||||
|
||||
if (inds) {
|
||||
GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
|
||||
GetIndexBounds(inds, vertexCount, vertTypeID, &dc.indexLowerBound, &dc.indexUpperBound);
|
||||
} else {
|
||||
dc.indexLowerBound = 0;
|
||||
dc.indexUpperBound = vertexCount - 1;
|
||||
}
|
||||
|
||||
uvScale[numDrawCalls] = gstate_c.uv;
|
||||
|
||||
numDrawCalls++;
|
||||
vertexCountInDrawCalls_ += vertexCount;
|
||||
|
||||
if (vertType & GE_VTYPE_WEIGHT_MASK) {
|
||||
if (vertTypeID & GE_VTYPE_WEIGHT_MASK) {
|
||||
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
|
||||
decodeCounter_++;
|
||||
}
|
||||
|
@ -44,6 +44,12 @@ typedef u64 ReliableHashType;
|
||||
typedef u32 ReliableHashType;
|
||||
#endif
|
||||
|
||||
inline uint32_t GetVertTypeID(uint32_t vertType, int uvGenMode) {
|
||||
// As the decoder depends on the UVGenMode when we use UV prescale, we simply mash it
|
||||
// into the top of the verttype where there are unused bits.
|
||||
return (vertType & 0xFFFFFF) | (uvGenMode << 24);
|
||||
}
|
||||
|
||||
class DrawEngineCommon {
|
||||
public:
|
||||
DrawEngineCommon();
|
||||
@ -59,13 +65,14 @@ public:
|
||||
|
||||
// This would seem to be unnecessary now, but is still required for splines/beziers to work in the software backend since SubmitPrim
|
||||
// is different. Should probably refactor that.
|
||||
virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
|
||||
SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead);
|
||||
// Note that vertTypeID should be computed using GetVertTypeID().
|
||||
virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) {
|
||||
SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, bytesRead);
|
||||
}
|
||||
|
||||
bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType, int *bytesRead);
|
||||
|
||||
void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead);
|
||||
void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead);
|
||||
void SubmitSpline(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead);
|
||||
void SubmitBezier(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead);
|
||||
|
||||
@ -135,13 +142,13 @@ protected:
|
||||
u32 vertexCount;
|
||||
u16 indexLowerBound;
|
||||
u16 indexUpperBound;
|
||||
UVScale uvScale;
|
||||
};
|
||||
|
||||
enum { MAX_DEFERRED_DRAW_CALLS = 128 };
|
||||
DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS];
|
||||
int numDrawCalls = 0;
|
||||
int vertexCountInDrawCalls_ = 0;
|
||||
UVScale uvScale[MAX_DEFERRED_DRAW_CALLS];
|
||||
|
||||
int decimationCounter_ = 0;
|
||||
int decodeCounter_ = 0;
|
||||
|
@ -949,8 +949,10 @@ void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indi
|
||||
gstate_c.uv.vOff = 0.0f;
|
||||
}
|
||||
|
||||
uint32_t vertTypeID = GetVertTypeID(vertTypeWithIndex16, gstate.getUVGenMode());
|
||||
|
||||
int generatedBytesRead;
|
||||
DispatchSubmitPrim(splineBuffer, quadIndices_, primType[prim_type], count, vertTypeWithIndex16, &generatedBytesRead);
|
||||
DispatchSubmitPrim(splineBuffer, quadIndices_, primType[prim_type], count, vertTypeID, &generatedBytesRead);
|
||||
|
||||
DispatchFlush();
|
||||
|
||||
@ -1091,8 +1093,9 @@ void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indi
|
||||
gstate_c.uv.vOff = 0;
|
||||
}
|
||||
|
||||
uint32_t vertTypeID = GetVertTypeID(vertTypeWithIndex16, gstate.getUVGenMode());
|
||||
int generatedBytesRead;
|
||||
DispatchSubmitPrim(splineBuffer, quadIndices_, primType[prim_type], count, vertTypeWithIndex16, &generatedBytesRead);
|
||||
DispatchSubmitPrim(splineBuffer, quadIndices_, primType[prim_type], count, vertTypeID, &generatedBytesRead);
|
||||
|
||||
DispatchFlush();
|
||||
|
||||
|
@ -39,6 +39,13 @@ alignas(16) static const float by32768[4] = {
|
||||
1.0f / 32768.0f, 1.0f / 32768.0f, 1.0f / 32768.0f, 1.0f / 32768.0f,
|
||||
};
|
||||
|
||||
alignas(16) static const float by128_11[4] = {
|
||||
1.0f / 128.0f, 1.0f / 128.0f, 1.0f, 1.0f,
|
||||
};
|
||||
alignas(16) static const float by32768_11[4] = {
|
||||
1.0f / 32768.0f, 1.0f / 32768.0f, 1.0f, 1.0f,
|
||||
};
|
||||
|
||||
alignas(16) static const u32 threeMasks[4] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0 };
|
||||
alignas(16) static const u32 aOne[4] = {0, 0, 0, 0x3F800000};
|
||||
|
||||
@ -222,20 +229,14 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
||||
// Keep the scale/offset in a few fp registers if we need it.
|
||||
if (prescaleStep) {
|
||||
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.uv));
|
||||
MOVSS(fpScaleOffsetReg, MDisp(tempReg1, 0));
|
||||
MOVSS(fpScratchReg, MDisp(tempReg1, 4));
|
||||
UNPCKLPS(fpScaleOffsetReg, R(fpScratchReg));
|
||||
MOVUPS(fpScaleOffsetReg, MatR(tempReg1));
|
||||
if ((dec.VertexType() & GE_VTYPE_TC_MASK) == GE_VTYPE_TC_8BIT) {
|
||||
MOV(PTRBITS, R(tempReg2), ImmPtr(&by128));
|
||||
MOV(PTRBITS, R(tempReg2), ImmPtr(&by128_11));
|
||||
MULPS(fpScaleOffsetReg, MatR(tempReg2));
|
||||
} else if ((dec.VertexType() & GE_VTYPE_TC_MASK) == GE_VTYPE_TC_16BIT) {
|
||||
MOV(PTRBITS, R(tempReg2), ImmPtr(&by32768));
|
||||
MOV(PTRBITS, R(tempReg2), ImmPtr(&by32768_11));
|
||||
MULPS(fpScaleOffsetReg, MatR(tempReg2));
|
||||
}
|
||||
MOVSS(fpScratchReg, MDisp(tempReg1, 8));
|
||||
MOVSS(fpScratchReg2, MDisp(tempReg1, 12));
|
||||
UNPCKLPS(fpScratchReg, R(fpScratchReg2));
|
||||
UNPCKLPD(fpScaleOffsetReg, R(fpScratchReg));
|
||||
}
|
||||
|
||||
// Let's not bother with a proper stack frame. We just grab the arguments and go.
|
||||
|
@ -1498,7 +1498,8 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
|
||||
int bytesRead = 0;
|
||||
UpdateUVScaleOffset();
|
||||
|
||||
drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
|
||||
uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode());
|
||||
drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, &bytesRead);
|
||||
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
|
||||
// Some games rely on this, they don't bother reloading VADDR and IADDR.
|
||||
// The VADDR/IADDR registers are NOT updated.
|
||||
@ -1527,6 +1528,7 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
|
||||
{
|
||||
u32 count = data & 0xFFFF;
|
||||
if (count == 0) {
|
||||
// Ignore.
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1535,34 +1537,65 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
|
||||
verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
|
||||
inds = 0;
|
||||
if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
|
||||
u32 indexAddr = gstate_c.indexAddr;
|
||||
if (!Memory::IsValidAddress(indexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr);
|
||||
return;
|
||||
}
|
||||
inds = Memory::GetPointerUnchecked(indexAddr);
|
||||
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
|
||||
}
|
||||
|
||||
drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertexType, &bytesRead);
|
||||
drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, &bytesRead);
|
||||
AdvanceVerts(vertexType, count, bytesRead);
|
||||
totalVertCount += count;
|
||||
break;
|
||||
}
|
||||
case GE_CMD_VERTEXTYPE:
|
||||
// Some games spam redundant GE_CMD_VERTEXTYPE
|
||||
if (data != vertexType) { // don't mask data, vertexType is unmasked
|
||||
{
|
||||
uint32_t diff = data ^ vertexType;
|
||||
// don't mask upper bits, vertexType is unmasked
|
||||
if (diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) {
|
||||
goto bail;
|
||||
} else {
|
||||
vertexType = data;
|
||||
vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case GE_CMD_VADDR:
|
||||
gstate_c.vertexAddr = gstate_c.getRelativeAddress(data & 0x00FFFFFF);
|
||||
break;
|
||||
case GE_CMD_OFFSETADDR:
|
||||
gstate.cmdmem[GE_CMD_OFFSETADDR] = data;
|
||||
gstate_c.offsetAddr = data << 8;
|
||||
break;
|
||||
case GE_CMD_BASE:
|
||||
gstate.cmdmem[GE_CMD_BASE] = data;
|
||||
break;
|
||||
case GE_CMD_NOP:
|
||||
case GE_CMD_NOP_FF:
|
||||
break;
|
||||
case GE_CMD_BONEMATRIXNUMBER:
|
||||
gstate.cmdmem[GE_CMD_BONEMATRIXNUMBER] = data;
|
||||
break;
|
||||
case GE_CMD_TEXSCALEU:
|
||||
gstate.cmdmem[GE_CMD_TEXSCALEU] = data;
|
||||
gstate_c.uv.uScale = getFloat24(data);
|
||||
break;
|
||||
case GE_CMD_TEXSCALEV:
|
||||
gstate.cmdmem[GE_CMD_TEXSCALEV] = data;
|
||||
gstate_c.uv.vScale = getFloat24(data);
|
||||
break;
|
||||
case GE_CMD_CALL:
|
||||
{
|
||||
// A bone matrix probably. If not we bail.
|
||||
const u32 target = gstate_c.getRelativeAddress(data & 0x00FFFFFC);
|
||||
if ((Memory::ReadUnchecked_U32(target) >> 24) == GE_CMD_BONEMATRIXDATA &&
|
||||
(Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&
|
||||
(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET &&
|
||||
(target > currentList->stall || target + 12 * 4 < currentList->stall)) {
|
||||
FastLoadBoneMatrix(target);
|
||||
} else {
|
||||
goto bail;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
// All other commands might need a flush or something, stop this inner loop.
|
||||
goto bail;
|
||||
@ -1572,6 +1605,7 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
|
||||
}
|
||||
|
||||
bail:
|
||||
gstate.cmdmem[GE_CMD_VERTEXTYPE] = vertexType;
|
||||
// Skip over the commands we just read out manually.
|
||||
if (cmdCount > 0) {
|
||||
UpdatePC(currentList->pc, currentList->pc + cmdCount * 4);
|
||||
@ -2047,7 +2081,8 @@ void GPUCommon::FlushImm() {
|
||||
int vtype = GE_VTYPE_POS_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_THROUGH;
|
||||
|
||||
int bytesRead;
|
||||
drawEngineCommon_->DispatchSubmitPrim(temp, nullptr, immPrim_, immCount_, vtype, &bytesRead);
|
||||
uint32_t vertTypeID = GetVertTypeID(vtype, 0);
|
||||
drawEngineCommon_->DispatchSubmitPrim(temp, nullptr, immPrim_, immCount_, vertTypeID, &bytesRead);
|
||||
drawEngineCommon_->DispatchFlush();
|
||||
// TOOD: In the future, make a special path for these.
|
||||
// drawEngineCommon_->DispatchSubmitImm(immBuffer_, immCount_);
|
||||
|
@ -29,7 +29,7 @@ class NullDrawEngine : public DrawEngineCommon {
|
||||
public:
|
||||
void DispatchFlush() override {
|
||||
}
|
||||
void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) override {
|
||||
void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) override {
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -53,8 +53,8 @@ SoftwareDrawEngine::~SoftwareDrawEngine() {
|
||||
void SoftwareDrawEngine::DispatchFlush() {
|
||||
}
|
||||
|
||||
void SoftwareDrawEngine::DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
|
||||
transformUnit.SubmitPrimitive(verts, inds, prim, vertexCount, vertType, bytesRead, this);
|
||||
void SoftwareDrawEngine::DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) {
|
||||
transformUnit.SubmitPrimitive(verts, inds, prim, vertexCount, vertTypeID, bytesRead, this);
|
||||
}
|
||||
|
||||
VertexDecoder *SoftwareDrawEngine::FindVertexDecoder(u32 vtype) {
|
||||
|
Loading…
Reference in New Issue
Block a user