Unify Execute_Prim

This commit is contained in:
Henrik Rydgård 2018-02-26 11:18:52 +01:00
parent 23980065ba
commit c7f8f4c5ca
9 changed files with 89 additions and 345 deletions

View File

@ -75,16 +75,6 @@ struct D3D11CommandTableEntry {
// This table gets crunched into a faster form by init. // This table gets crunched into a faster form by init.
static const D3D11CommandTableEntry commandTable[] = { static const D3D11CommandTableEntry commandTable[] = {
// Changes that dirty the current texture.
{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 },
// Changing the vertex type requires us to flush.
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPU_D3D11::Execute_Prim },
{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
{ GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack... // Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_D3D11::Execute_LoadClut }, { GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_D3D11::Execute_LoadClut },
}; };
@ -435,82 +425,9 @@ void GPU_D3D11::ExecuteOp(u32 op, u32 diff) {
} }
} }
void GPU_D3D11::Execute_Prim(u32 op, u32 diff) {
// This drives all drawing. All other state we just buffer up, then we apply it only
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
u32 data = op & 0xFFFFFF;
u32 count = data & 0xFFFF;
if (count == 0)
return;
// Upper bits are ignored.
GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
SetDrawType(DRAW_PRIM, prim);
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
if (gstate.isAntiAliasEnabled()) {
// Discard AA lines in DOA
if (prim == GE_PRIM_LINE_STRIP)
return;
// Discard AA lines in Summon Night 5
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
return;
}
// This also make skipping drawing very effective.
framebufferManagerD3D11_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
drawEngine_.SetupVertexDecoder(gstate.vertType);
// Rough estimate, not sure what's correct.
cyclesExecuted += EstimatePerVertexCost() * count;
return;
}
u32 vertexAddr = gstate_c.vertexAddr;
if (!Memory::IsValidAddress(vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", vertexAddr);
return;
}
void *verts = Memory::GetPointerUnchecked(vertexAddr);
void *inds = 0;
u32 vertexType = gstate.vertType;
if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
u32 indexAddr = gstate_c.indexAddr;
if (!Memory::IsValidAddress(indexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr);
return;
}
inds = Memory::GetPointerUnchecked(indexAddr);
}
#ifndef MOBILE_DEVICE
if (prim > GE_PRIM_RECTANGLES) {
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
}
#endif
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
vertexCost_ = EstimatePerVertexCost();
}
gpuStats.vertexGPUCycles += vertexCost_ * count;
cyclesExecuted += vertexCost_* count;
int bytesRead = 0;
UpdateUVScaleOffset();
drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// The VADDR/IADDR registers are NOT updated.
AdvanceVerts(vertexType, count, bytesRead);
}
void GPU_D3D11::Execute_LoadClut(u32 op, u32 diff) { void GPU_D3D11::Execute_LoadClut(u32 op, u32 diff) {
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
textureCacheD3D11_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes()); textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
// This could be used to "dirty" textures with clut. // This could be used to "dirty" textures with clut.
} }

View File

@ -64,7 +64,6 @@ public:
GPU_D3D11::CmdFunc func; GPU_D3D11::CmdFunc func;
}; };
void Execute_Prim(u32 op, u32 diff);
void Execute_LoadClut(u32 op, u32 diff); void Execute_LoadClut(u32 op, u32 diff);
// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend. // Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.

View File

@ -59,16 +59,6 @@ struct D3D9CommandTableEntry {
// This table gets crunched into a faster form by init. // This table gets crunched into a faster form by init.
static const D3D9CommandTableEntry commandTable[] = { static const D3D9CommandTableEntry commandTable[] = {
// Changes that dirty the current texture.
{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 },
// Changing the vertex type requires us to flush.
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPU_DX9::Execute_Prim },
{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
{ GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack... // Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_DX9::Execute_LoadClut }, { GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_DX9::Execute_LoadClut },
}; };
@ -410,84 +400,6 @@ void GPU_DX9::ExecuteOp(u32 op, u32 diff) {
} }
} }
void GPU_DX9::Execute_Prim(u32 op, u32 diff) {
// This drives all drawing. All other state we just buffer up, then we apply it only
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
u32 data = op & 0xFFFFFF;
u32 count = data & 0xFFFF;
if (count == 0)
return;
// Upper bits are ignored.
GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
SetDrawType(DRAW_PRIM, prim);
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
if (gstate.isAntiAliasEnabled()) {
// Discard AA lines in DOA
if (prim == GE_PRIM_LINE_STRIP)
return;
// Discard AA lines in Summon Night 5
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
return;
}
// This also make skipping drawing very effective.
framebufferManagerDX9_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
drawEngine_.SetupVertexDecoder(gstate.vertType);
// Rough estimate, not sure what's correct.
cyclesExecuted += EstimatePerVertexCost() * count;
return;
}
u32 vertexAddr = gstate_c.vertexAddr;
if (!Memory::IsValidAddress(vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", vertexAddr);
return;
}
void *verts = Memory::GetPointerUnchecked(vertexAddr);
void *inds = 0;
u32 vertexType = gstate.vertType;
if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
u32 indexAddr = gstate_c.indexAddr;
if (!Memory::IsValidAddress(indexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr);
return;
}
inds = Memory::GetPointerUnchecked(indexAddr);
}
#ifndef MOBILE_DEVICE
if (prim > GE_PRIM_RECTANGLES) {
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
}
#endif
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
vertexCost_ = EstimatePerVertexCost();
}
gpuStats.vertexGPUCycles += vertexCost_ * count;
cyclesExecuted += vertexCost_* count;
int bytesRead = 0;
UpdateUVScaleOffset();
drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// The VADDR/IADDR registers are NOT updated.
AdvanceVerts(vertexType, count, bytesRead);
}
void GPU_DX9::Execute_LoadClut(u32 op, u32 diff) {
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
textureCacheDX9_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
// This could be used to "dirty" textures with clut.
}
void GPU_DX9::GetStats(char *buffer, size_t bufsize) { void GPU_DX9::GetStats(char *buffer, size_t bufsize) {
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f; float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
snprintf(buffer, bufsize - 1, snprintf(buffer, bufsize - 1,

View File

@ -65,8 +65,6 @@ public:
GPU_DX9::CmdFunc func; GPU_DX9::CmdFunc func;
}; };
void Execute_Prim(u32 op, u32 diff);
void Execute_TexSize0(u32 op, u32 diff);
void Execute_LoadClut(u32 op, u32 diff); void Execute_LoadClut(u32 op, u32 diff);
// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend. // Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.

View File

@ -61,16 +61,6 @@ struct GLESCommandTableEntry {
// This table gets crunched into a faster form by init. // This table gets crunched into a faster form by init.
// TODO: Share this table between the backends. Will have to make another indirection for the function pointers though.. // TODO: Share this table between the backends. Will have to make another indirection for the function pointers though..
static const GLESCommandTableEntry commandTable[] = { static const GLESCommandTableEntry commandTable[] = {
// Changes that dirty the current texture.
{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 },
// Changing the vertex type requires us to flush.
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPU_GLES::Execute_Prim },
{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
{ GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack... // Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_GLES::Execute_LoadClut }, { GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_GLES::Execute_LoadClut },
}; };
@ -625,79 +615,9 @@ void GPU_GLES::ExecuteOp(u32 op, u32 diff) {
} }
} }
void GPU_GLES::Execute_Prim(u32 op, u32 diff) {
// This drives all drawing. All other state we just buffer up, then we apply it only
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
u32 data = op & 0xFFFFFF;
u32 count = data & 0xFFFF;
if (count == 0)
return;
// Upper bits are ignored.
GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
SetDrawType(DRAW_PRIM, prim);
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
if (gstate.isAntiAliasEnabled()) {
// Discard AA lines in DOA
if (prim == GE_PRIM_LINE_STRIP)
return;
// Discard AA lines in Summon Night 5
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
return;
}
// This also makes skipping drawing very effective. This function can change the framebuffer.
framebufferManagerGL_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
drawEngine_.SetupVertexDecoder(gstate.vertType);
// Rough estimate, not sure what's correct.
cyclesExecuted += EstimatePerVertexCost() * count;
return;
}
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
return;
}
void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
void *inds = 0;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
return;
}
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
#ifndef MOBILE_DEVICE
if (prim > GE_PRIM_RECTANGLES) {
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
}
#endif
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
vertexCost_ = EstimatePerVertexCost();
}
gpuStats.vertexGPUCycles += vertexCost_ * count;
cyclesExecuted += vertexCost_* count;
int bytesRead = 0;
UpdateUVScaleOffset();
drawEngine_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// The VADDR/IADDR registers are NOT updated.
AdvanceVerts(gstate.vertType, count, bytesRead);
}
void GPU_GLES::Execute_LoadClut(u32 op, u32 diff) { void GPU_GLES::Execute_LoadClut(u32 op, u32 diff) {
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
textureCacheGL_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes()); textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
} }
void GPU_GLES::GetStats(char *buffer, size_t bufsize) { void GPU_GLES::GetStats(char *buffer, size_t bufsize) {

View File

@ -42,6 +42,13 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_BJump }, // EXECUTE { GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_BJump }, // EXECUTE
{ GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, 0, &GPUCommon::Execute_BoundingBox }, // + FLUSHBEFORE when we implement... or not, do we need to? { GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, 0, &GPUCommon::Execute_BoundingBox }, // + FLUSHBEFORE when we implement... or not, do we need to?
{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPUCommon::Execute_Prim },
{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
{ GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
// Changing the vertex type requires us to flush.
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
// These two are actually processed in CMD_END. Not sure if FLAG_FLUSHBEFORE matters. // These two are actually processed in CMD_END. Not sure if FLAG_FLUSHBEFORE matters.
{ GE_CMD_SIGNAL, FLAG_FLUSHBEFORE }, { GE_CMD_SIGNAL, FLAG_FLUSHBEFORE },
{ GE_CMD_FINISH, FLAG_FLUSHBEFORE }, { GE_CMD_FINISH, FLAG_FLUSHBEFORE },
@ -121,7 +128,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_TEXOFFSETU }, { GE_CMD_TEXOFFSETU },
{ GE_CMD_TEXOFFSETV }, { GE_CMD_TEXOFFSETV },
// TEXSIZE0 is handled by each backend. { GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 },
{ GE_CMD_TEXSIZE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS }, { GE_CMD_TEXSIZE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
{ GE_CMD_TEXSIZE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS }, { GE_CMD_TEXSIZE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
{ GE_CMD_TEXSIZE3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS }, { GE_CMD_TEXSIZE3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
@ -1368,6 +1375,81 @@ void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) {
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE); gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE);
} }
void GPUCommon::Execute_Prim(u32 op, u32 diff) {
// This drives all drawing. All other state we just buffer up, then we apply it only
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
PROFILE_THIS_SCOPE("execprim");
u32 data = op & 0xFFFFFF;
u32 count = data & 0xFFFF;
if (count == 0)
return;
// Upper bits are ignored.
GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
SetDrawType(DRAW_PRIM, prim);
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
if (gstate.isAntiAliasEnabled()) {
// Discard AA lines in DOA
if (prim == GE_PRIM_LINE_STRIP)
return;
// Discard AA lines in Summon Night 5
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
return;
}
// This also makes skipping drawing very effective.
framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
drawEngineCommon_->SetupVertexDecoder(gstate.vertType); // Do we still need to do this?
// Rough estimate, not sure what's correct.
cyclesExecuted += EstimatePerVertexCost() * count;
return;
}
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
return;
}
void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
void *inds = 0;
u32 vertexType = gstate.vertType;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
u32 indexAddr = gstate_c.indexAddr;
if (!Memory::IsValidAddress(indexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr);
return;
}
inds = Memory::GetPointerUnchecked(indexAddr);
}
#ifndef MOBILE_DEVICE
if (prim > GE_PRIM_RECTANGLES) {
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
}
#endif
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
vertexCost_ = EstimatePerVertexCost();
}
gpuStats.vertexGPUCycles += vertexCost_ * count;
cyclesExecuted += vertexCost_* count;
int bytesRead = 0;
UpdateUVScaleOffset();
drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// The VADDR/IADDR registers are NOT updated.
AdvanceVerts(vertexType, count, bytesRead);
}
void GPUCommon::Execute_Bezier(u32 op, u32 diff) { void GPUCommon::Execute_Bezier(u32 op, u32 diff) {
drawEngineCommon_->DispatchFlush(); drawEngineCommon_->DispatchFlush();

View File

@ -129,6 +129,7 @@ public:
void Execute_VertexType(u32 op, u32 diff); void Execute_VertexType(u32 op, u32 diff);
void Execute_VertexTypeSkinning(u32 op, u32 diff); void Execute_VertexTypeSkinning(u32 op, u32 diff);
void Execute_Prim(u32 op, u32 diff);
void Execute_Bezier(u32 op, u32 diff); void Execute_Bezier(u32 op, u32 diff);
void Execute_Spline(u32 op, u32 diff); void Execute_Spline(u32 op, u32 diff);
void Execute_BoundingBox(u32 op, u32 diff); void Execute_BoundingBox(u32 op, u32 diff);
@ -316,6 +317,8 @@ protected:
DrawType lastDraw_; DrawType lastDraw_;
GEPrimitiveType lastPrim_; GEPrimitiveType lastPrim_;
int vertexCost_ = 0;
// No idea how big this buffer needs to be. // No idea how big this buffer needs to be.
enum { enum {
MAX_IMMBUFFER_SIZE = 32, MAX_IMMBUFFER_SIZE = 32,

View File

@ -57,16 +57,6 @@ GPU_Vulkan::CommandInfo GPU_Vulkan::cmdInfo_[256];
// This table gets crunched into a faster form by init. // This table gets crunched into a faster form by init.
static const VulkanCommandTableEntry commandTable[] = { static const VulkanCommandTableEntry commandTable[] = {
// Changes that dirty the current texture.
{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 },
// Changing the vertex type requires us to flush.
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_Prim },
{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
{ GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack... // Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_LoadClut }, { GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_LoadClut },
}; };
@ -520,83 +510,9 @@ void GPU_Vulkan::ExecuteOp(u32 op, u32 diff) {
} }
} }
void GPU_Vulkan::Execute_Prim(u32 op, u32 diff) {
// This drives all drawing. All other state we just buffer up, then we apply it only
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
PROFILE_THIS_SCOPE("execprim");
u32 data = op & 0xFFFFFF;
u32 count = data & 0xFFFF;
if (count == 0)
return;
// Upper bits are ignored.
GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
SetDrawType(DRAW_PRIM, prim);
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
if (gstate.isAntiAliasEnabled()) {
// Discard AA lines in DOA
if (prim == GE_PRIM_LINE_STRIP)
return;
// Discard AA lines in Summon Night 5
if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
return;
}
// This also makes skipping drawing very effective.
framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
drawEngine_.SetupVertexDecoder(gstate.vertType); // Do we still need to do this?
// Rough estimate, not sure what's correct.
cyclesExecuted += EstimatePerVertexCost() * count;
return;
}
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
return;
}
void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
void *inds = 0;
u32 vertexType = gstate.vertType;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
u32 indexAddr = gstate_c.indexAddr;
if (!Memory::IsValidAddress(indexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr);
return;
}
inds = Memory::GetPointerUnchecked(indexAddr);
}
#ifndef MOBILE_DEVICE
if (prim > GE_PRIM_RECTANGLES) {
ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
}
#endif
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
vertexCost_ = EstimatePerVertexCost();
}
gpuStats.vertexGPUCycles += vertexCost_ * count;
cyclesExecuted += vertexCost_* count;
int bytesRead = 0;
UpdateUVScaleOffset();
drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// The VADDR/IADDR registers are NOT updated.
AdvanceVerts(vertexType, count, bytesRead);
}
void GPU_Vulkan::Execute_LoadClut(u32 op, u32 diff) { void GPU_Vulkan::Execute_LoadClut(u32 op, u32 diff) {
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
textureCacheVulkan_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes()); textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
} }
void GPU_Vulkan::InitDeviceObjects() { void GPU_Vulkan::InitDeviceObjects() {

View File

@ -68,7 +68,6 @@ public:
GPU_Vulkan::CmdFunc func; GPU_Vulkan::CmdFunc func;
}; };
void Execute_Prim(u32 op, u32 diff);
void Execute_LoadClut(u32 op, u32 diff); void Execute_LoadClut(u32 op, u32 diff);
// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend. // Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
@ -112,8 +111,6 @@ private:
// Manages state and pipeline objects // Manages state and pipeline objects
PipelineManagerVulkan *pipelineManager_; PipelineManagerVulkan *pipelineManager_;
int vertexCost_ = 0;
std::string reportingPrimaryInfo_; std::string reportingPrimaryInfo_;
std::string reportingFullInfo_; std::string reportingFullInfo_;