mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-02-17 04:39:34 +00:00
Move a few GE cmds to separate funcs.
2% improvement in God Eater 2.
This commit is contained in:
parent
e06c135be4
commit
9851400681
@ -60,9 +60,9 @@ struct CommandTableEntry {
|
||||
|
||||
static const CommandTableEntry commandTable[] = {
|
||||
// Changes that dirty the framebuffer
|
||||
{GE_CMD_FRAMEBUFPTR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_FRAMEBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_FRAMEBUFPIXFORMAT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_FRAMEBUFPTR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_FramebufType},
|
||||
{GE_CMD_FRAMEBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_FramebufType},
|
||||
{GE_CMD_FRAMEBUFPIXFORMAT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_FramebufType},
|
||||
{GE_CMD_ZBUFPTR, FLAG_FLUSHBEFOREONCHANGE},
|
||||
{GE_CMD_ZBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE},
|
||||
|
||||
@ -305,18 +305,18 @@ static const CommandTableEntry commandTable[] = {
|
||||
// From Common. No flushing but definitely need execute.
|
||||
{GE_CMD_OFFSETADDR, FLAG_EXECUTE},
|
||||
{GE_CMD_ORIGIN, FLAG_EXECUTE | FLAG_READS_PC}, // Really?
|
||||
{GE_CMD_PRIM, FLAG_EXECUTE},
|
||||
{GE_CMD_PRIM, FLAG_EXECUTE, &GLES_GPU::Execute_Prim},
|
||||
{GE_CMD_JUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},
|
||||
{GE_CMD_CALL, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},
|
||||
{GE_CMD_RET, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},
|
||||
{GE_CMD_END, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC}, // Flush?
|
||||
{GE_CMD_VADDR, FLAG_EXECUTE},
|
||||
{GE_CMD_IADDR, FLAG_EXECUTE},
|
||||
{GE_CMD_VADDR, FLAG_EXECUTE, &GLES_GPU::Execute_Vaddr},
|
||||
{GE_CMD_IADDR, FLAG_EXECUTE, &GLES_GPU::Execute_Iaddr},
|
||||
{GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC}, // EXECUTE
|
||||
{GE_CMD_BOUNDINGBOX, FLAG_EXECUTE}, // + FLUSHBEFORE when we implement... or not, do we need to?
|
||||
|
||||
// Changing the vertex type requires us to flush.
|
||||
{GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
|
||||
{GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_VertexType},
|
||||
|
||||
{GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE},
|
||||
{GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE},
|
||||
@ -686,6 +686,111 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) {
|
||||
}
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_Vaddr(u32 op, u32 diff) {
|
||||
gstate_c.vertexAddr = gstate_c.getRelativeAddress(op & 0x00FFFFFF);
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_Iaddr(u32 op, u32 diff) {
|
||||
gstate_c.indexAddr = gstate_c.getRelativeAddress(op & 0x00FFFFFF);
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_Prim(u32 op, u32 diff) {
|
||||
// This drives all drawing. All other state we just buffer up, then we apply it only
|
||||
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
|
||||
|
||||
u32 data = op & 0xFFFFFF;
|
||||
u32 count = data & 0xFFFF;
|
||||
GEPrimitiveType prim = static_cast<GEPrimitiveType>(data >> 16);
|
||||
|
||||
if (count == 0)
|
||||
return;
|
||||
|
||||
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
|
||||
|
||||
if (gstate.isAntiAliasEnabled()) {
|
||||
// Discard AA lines in DOA
|
||||
if (prim == GE_PRIM_LINE_STRIP)
|
||||
return;
|
||||
// Discard AA lines in Summon Night 5
|
||||
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
|
||||
return;
|
||||
}
|
||||
|
||||
// This also make skipping drawing very effective.
|
||||
framebufferManager_.SetRenderFrameBuffer();
|
||||
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
|
||||
transformDraw_.SetupVertexDecoder(gstate.vertType);
|
||||
// Rough estimate, not sure what's correct.
|
||||
int vertexCost = transformDraw_.EstimatePerVertexCost();
|
||||
cyclesExecuted += vertexCost * count;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: Split this so that we can collect sequences of primitives, can greatly speed things up
|
||||
// on platforms where draw calls are expensive like mobile and D3D
|
||||
void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
|
||||
void *inds = 0;
|
||||
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
|
||||
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
|
||||
return;
|
||||
}
|
||||
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
|
||||
}
|
||||
|
||||
#ifndef MOBILE_DEVICE
|
||||
if (prim > GE_PRIM_RECTANGLES) {
|
||||
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
|
||||
}
|
||||
#endif
|
||||
|
||||
int bytesRead;
|
||||
transformDraw_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
|
||||
|
||||
int vertexCost = transformDraw_.EstimatePerVertexCost();
|
||||
gpuStats.vertexGPUCycles += vertexCost * count;
|
||||
cyclesExecuted += vertexCost * count;
|
||||
|
||||
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
|
||||
// Some games rely on this, they don't bother reloading VADDR and IADDR.
|
||||
// Q: Are these changed reflected in the real registers? Needs testing.
|
||||
if (inds) {
|
||||
int indexSize = 1;
|
||||
if ((gstate.vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT)
|
||||
indexSize = 2;
|
||||
gstate_c.indexAddr += count * indexSize;
|
||||
} else {
|
||||
gstate_c.vertexAddr += bytesRead;
|
||||
}
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_VertexType(u32 op, u32 diff) {
|
||||
if (!g_Config.bSoftwareSkinning) {
|
||||
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
|
||||
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
|
||||
} else {
|
||||
// Don't flush when weight count changes, unless morph is enabled.
|
||||
if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
// Restore and flush
|
||||
gstate.vertType ^= diff;
|
||||
Flush();
|
||||
gstate.vertType ^= diff;
|
||||
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
|
||||
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GLES_GPU::Execute_FramebufType(u32 op, u32 diff) {
|
||||
gstate_c.framebufChanged = true;
|
||||
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
|
||||
}
|
||||
|
||||
void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
|
||||
u32 cmd = op >> 24;
|
||||
u32 data = op & 0xFFFFFF;
|
||||
@ -696,87 +801,15 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
|
||||
break;
|
||||
|
||||
case GE_CMD_VADDR:
|
||||
gstate_c.vertexAddr = gstate_c.getRelativeAddress(data);
|
||||
Execute_Vaddr(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_IADDR:
|
||||
gstate_c.indexAddr = gstate_c.getRelativeAddress(data);
|
||||
Execute_Iaddr(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_PRIM:
|
||||
{
|
||||
// This drives all drawing. All other state we just buffer up, then we apply it only
|
||||
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
|
||||
|
||||
u32 count = data & 0xFFFF;
|
||||
GEPrimitiveType prim = static_cast<GEPrimitiveType>(data >> 16);
|
||||
|
||||
if (count == 0)
|
||||
break;
|
||||
|
||||
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
|
||||
|
||||
if (gstate.isAntiAliasEnabled()) {
|
||||
// Discard AA lines in DOA
|
||||
if (prim == GE_PRIM_LINE_STRIP)
|
||||
break;
|
||||
// Discard AA lines in Summon Night 5
|
||||
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
|
||||
break;
|
||||
}
|
||||
|
||||
// This also make skipping drawing very effective.
|
||||
framebufferManager_.SetRenderFrameBuffer();
|
||||
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
|
||||
transformDraw_.SetupVertexDecoder(gstate.vertType);
|
||||
// Rough estimate, not sure what's correct.
|
||||
int vertexCost = transformDraw_.EstimatePerVertexCost();
|
||||
cyclesExecuted += vertexCost * count;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO: Split this so that we can collect sequences of primitives, can greatly speed things up
|
||||
// on platforms where draw calls are expensive like mobile and D3D
|
||||
void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
|
||||
void *inds = 0;
|
||||
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
|
||||
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
|
||||
break;
|
||||
}
|
||||
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
|
||||
}
|
||||
|
||||
#ifndef MOBILE_DEVICE
|
||||
if (prim > GE_PRIM_RECTANGLES) {
|
||||
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
|
||||
}
|
||||
#endif
|
||||
|
||||
int bytesRead;
|
||||
transformDraw_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
|
||||
|
||||
int vertexCost = transformDraw_.EstimatePerVertexCost();
|
||||
gpuStats.vertexGPUCycles += vertexCost * count;
|
||||
cyclesExecuted += vertexCost * count;
|
||||
|
||||
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
|
||||
// Some games rely on this, they don't bother reloading VADDR and IADDR.
|
||||
// Q: Are these changed reflected in the real registers? Needs testing.
|
||||
if (inds) {
|
||||
int indexSize = 1;
|
||||
if ((gstate.vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT)
|
||||
indexSize = 2;
|
||||
gstate_c.indexAddr += count * indexSize;
|
||||
} else {
|
||||
gstate_c.vertexAddr += bytesRead;
|
||||
}
|
||||
}
|
||||
Execute_Prim(op, diff);
|
||||
break;
|
||||
|
||||
// The arrow and other rotary items in Puzbob are bezier patches, strangely enough.
|
||||
@ -894,20 +927,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
|
||||
break;
|
||||
|
||||
case GE_CMD_VERTEXTYPE:
|
||||
if (!g_Config.bSoftwareSkinning) {
|
||||
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
|
||||
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
|
||||
} else {
|
||||
// Don't flush when weight count changes, unless morph is enabled.
|
||||
if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (data & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
// Restore and flush
|
||||
gstate.vertType ^= diff;
|
||||
Flush();
|
||||
gstate.vertType ^= diff;
|
||||
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
|
||||
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
|
||||
}
|
||||
}
|
||||
Execute_VertexType(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_REGION1:
|
||||
@ -986,8 +1006,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
|
||||
case GE_CMD_FRAMEBUFPTR:
|
||||
case GE_CMD_FRAMEBUFWIDTH:
|
||||
case GE_CMD_FRAMEBUFPIXFORMAT:
|
||||
gstate_c.framebufChanged = true;
|
||||
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
|
||||
Execute_FramebufType(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_TEXADDR0:
|
||||
|
@ -76,6 +76,12 @@ public:
|
||||
|
||||
typedef void (GLES_GPU::*CmdFunc)(u32 op, u32 diff);
|
||||
|
||||
void Execute_Vaddr(u32 op, u32 diff);
|
||||
void Execute_Iaddr(u32 op, u32 diff);
|
||||
void Execute_Prim(u32 op, u32 diff);
|
||||
void Execute_VertexType(u32 op, u32 diff);
|
||||
void Execute_FramebufType(u32 op, u32 diff);
|
||||
|
||||
protected:
|
||||
virtual void FastRunLoop(DisplayList &list);
|
||||
virtual void ProcessEvent(GPUEvent ev);
|
||||
|
Loading…
x
Reference in New Issue
Block a user