Move a few GE cmds to separate funcs.

2% improvement in God Eater 2.
This commit is contained in:
Unknown W. Brackets 2014-04-16 08:01:56 -07:00
parent e06c135be4
commit 9851400681
2 changed files with 123 additions and 98 deletions

View File

@ -60,9 +60,9 @@ struct CommandTableEntry {
static const CommandTableEntry commandTable[] = {
// Changes that dirty the framebuffer
{GE_CMD_FRAMEBUFPTR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_FRAMEBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_FRAMEBUFPIXFORMAT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_FRAMEBUFPTR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_FramebufType},
{GE_CMD_FRAMEBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_FramebufType},
{GE_CMD_FRAMEBUFPIXFORMAT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_FramebufType},
{GE_CMD_ZBUFPTR, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_ZBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE},
@ -305,18 +305,18 @@ static const CommandTableEntry commandTable[] = {
// From Common. No flushing but definitely need execute.
{GE_CMD_OFFSETADDR, FLAG_EXECUTE},
{GE_CMD_ORIGIN, FLAG_EXECUTE | FLAG_READS_PC}, // Really?
{GE_CMD_PRIM, FLAG_EXECUTE},
{GE_CMD_PRIM, FLAG_EXECUTE, &GLES_GPU::Execute_Prim},
{GE_CMD_JUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},
{GE_CMD_CALL, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},
{GE_CMD_RET, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},
{GE_CMD_END, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC}, // Flush?
{GE_CMD_VADDR, FLAG_EXECUTE},
{GE_CMD_IADDR, FLAG_EXECUTE},
{GE_CMD_VADDR, FLAG_EXECUTE, &GLES_GPU::Execute_Vaddr},
{GE_CMD_IADDR, FLAG_EXECUTE, &GLES_GPU::Execute_Iaddr},
{GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC}, // EXECUTE
{GE_CMD_BOUNDINGBOX, FLAG_EXECUTE}, // + FLUSHBEFORE when we implement... or not, do we need to?
// Changing the vertex type requires us to flush.
{GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_VertexType},
{GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE},
{GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE},
@ -686,6 +686,111 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) {
}
}
void GLES_GPU::Execute_Vaddr(u32 op, u32 diff) {
gstate_c.vertexAddr = gstate_c.getRelativeAddress(op & 0x00FFFFFF);
}
void GLES_GPU::Execute_Iaddr(u32 op, u32 diff) {
gstate_c.indexAddr = gstate_c.getRelativeAddress(op & 0x00FFFFFF);
}
void GLES_GPU::Execute_Prim(u32 op, u32 diff) {
// This drives all drawing. All other state we just buffer up, then we apply it only
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
u32 data = op & 0xFFFFFF;
u32 count = data & 0xFFFF;
GEPrimitiveType prim = static_cast<GEPrimitiveType>(data >> 16);
if (count == 0)
return;
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
if (gstate.isAntiAliasEnabled()) {
// Discard AA lines in DOA
if (prim == GE_PRIM_LINE_STRIP)
return;
// Discard AA lines in Summon Night 5
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
return;
}
// This also make skipping drawing very effective.
framebufferManager_.SetRenderFrameBuffer();
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
transformDraw_.SetupVertexDecoder(gstate.vertType);
// Rough estimate, not sure what's correct.
int vertexCost = transformDraw_.EstimatePerVertexCost();
cyclesExecuted += vertexCost * count;
return;
}
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
return;
}
// TODO: Split this so that we can collect sequences of primitives, can greatly speed things up
// on platforms where draw calls are expensive like mobile and D3D
void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
void *inds = 0;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
return;
}
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
#ifndef MOBILE_DEVICE
if (prim > GE_PRIM_RECTANGLES) {
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
}
#endif
int bytesRead;
transformDraw_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
int vertexCost = transformDraw_.EstimatePerVertexCost();
gpuStats.vertexGPUCycles += vertexCost * count;
cyclesExecuted += vertexCost * count;
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// Q: Are these changed reflected in the real registers? Needs testing.
if (inds) {
int indexSize = 1;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT)
indexSize = 2;
gstate_c.indexAddr += count * indexSize;
} else {
gstate_c.vertexAddr += bytesRead;
}
}
void GLES_GPU::Execute_VertexType(u32 op, u32 diff) {
if (!g_Config.bSoftwareSkinning) {
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
} else {
// Don't flush when weight count changes, unless morph is enabled.
if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
// Restore and flush
gstate.vertType ^= diff;
Flush();
gstate.vertType ^= diff;
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
}
}
void GLES_GPU::Execute_FramebufType(u32 op, u32 diff) {
gstate_c.framebufChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
}
void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
u32 cmd = op >> 24;
u32 data = op & 0xFFFFFF;
@ -696,87 +801,15 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
break;
case GE_CMD_VADDR:
gstate_c.vertexAddr = gstate_c.getRelativeAddress(data);
Execute_Vaddr(op, diff);
break;
case GE_CMD_IADDR:
gstate_c.indexAddr = gstate_c.getRelativeAddress(data);
Execute_Iaddr(op, diff);
break;
case GE_CMD_PRIM:
{
// This drives all drawing. All other state we just buffer up, then we apply it only
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
u32 count = data & 0xFFFF;
GEPrimitiveType prim = static_cast<GEPrimitiveType>(data >> 16);
if (count == 0)
break;
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
if (gstate.isAntiAliasEnabled()) {
// Discard AA lines in DOA
if (prim == GE_PRIM_LINE_STRIP)
break;
// Discard AA lines in Summon Night 5
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
break;
}
// This also make skipping drawing very effective.
framebufferManager_.SetRenderFrameBuffer();
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
transformDraw_.SetupVertexDecoder(gstate.vertType);
// Rough estimate, not sure what's correct.
int vertexCost = transformDraw_.EstimatePerVertexCost();
cyclesExecuted += vertexCost * count;
return;
}
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
break;
}
// TODO: Split this so that we can collect sequences of primitives, can greatly speed things up
// on platforms where draw calls are expensive like mobile and D3D
void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
void *inds = 0;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
break;
}
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
#ifndef MOBILE_DEVICE
if (prim > GE_PRIM_RECTANGLES) {
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
}
#endif
int bytesRead;
transformDraw_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
int vertexCost = transformDraw_.EstimatePerVertexCost();
gpuStats.vertexGPUCycles += vertexCost * count;
cyclesExecuted += vertexCost * count;
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// Q: Are these changed reflected in the real registers? Needs testing.
if (inds) {
int indexSize = 1;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT)
indexSize = 2;
gstate_c.indexAddr += count * indexSize;
} else {
gstate_c.vertexAddr += bytesRead;
}
}
Execute_Prim(op, diff);
break;
// The arrow and other rotary items in Puzbob are bezier patches, strangely enough.
@ -894,20 +927,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
break;
case GE_CMD_VERTEXTYPE:
if (!g_Config.bSoftwareSkinning) {
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
} else {
// Don't flush when weight count changes, unless morph is enabled.
if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (data & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
// Restore and flush
gstate.vertType ^= diff;
Flush();
gstate.vertType ^= diff;
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
}
Execute_VertexType(op, diff);
break;
case GE_CMD_REGION1:
@ -986,8 +1006,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_FRAMEBUFPTR:
case GE_CMD_FRAMEBUFWIDTH:
case GE_CMD_FRAMEBUFPIXFORMAT:
gstate_c.framebufChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
Execute_FramebufType(op, diff);
break;
case GE_CMD_TEXADDR0:

View File

@ -76,6 +76,12 @@ public:
typedef void (GLES_GPU::*CmdFunc)(u32 op, u32 diff);
void Execute_Vaddr(u32 op, u32 diff);
void Execute_Iaddr(u32 op, u32 diff);
void Execute_Prim(u32 op, u32 diff);
void Execute_VertexType(u32 op, u32 diff);
void Execute_FramebufType(u32 op, u32 diff);
protected:
virtual void FastRunLoop(DisplayList &list);
virtual void ProcessEvent(GPUEvent ev);