mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 13:30:02 +00:00
Merge pull request #9228 from hrydgard/centralize-matrix-loads
Centralize matrix loads
This commit is contained in:
commit
58f731d734
@ -58,35 +58,6 @@ public:
|
||||
std::vector<std::string> DebugGetVertexLoaderIDs();
|
||||
std::string DebugGetVertexLoaderString(std::string id, DebugShaderStringType stringType);
|
||||
|
||||
int EstimatePerVertexCost() {
|
||||
// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
|
||||
// runs in parallel with transform.
|
||||
|
||||
// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.
|
||||
|
||||
// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things
|
||||
// went too fast and starts doing all the work over again).
|
||||
|
||||
int cost = 20;
|
||||
if (gstate.isLightingEnabled()) {
|
||||
cost += 10;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (gstate.isLightChanEnabled(i))
|
||||
cost += 10;
|
||||
}
|
||||
}
|
||||
|
||||
if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) {
|
||||
cost += 20;
|
||||
}
|
||||
int morphCount = gstate.getNumMorphWeights();
|
||||
if (morphCount > 1) {
|
||||
cost += 5 * morphCount;
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Preprocessing for spline/bezier
|
||||
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType);
|
||||
|
@ -84,5 +84,21 @@ enum : uint64_t {
|
||||
DIRTY_SPLINETYPEV = 1ULL << 36,
|
||||
|
||||
DIRTY_BONE_UNIFORMS = 0xFF000000ULL,
|
||||
|
||||
DIRTY_ALL_UNIFORMS = 0x1FFFFFFFFFULL,
|
||||
|
||||
// Now we can add further dirty flags that are not uniforms.
|
||||
|
||||
DIRTY_ALL = 0xFFFFFFFFFFFFFFFF
|
||||
};
|
||||
|
||||
class ShaderManagerCommon {
|
||||
public:
|
||||
ShaderManagerCommon() : globalDirty_(DIRTY_ALL) {}
|
||||
virtual ~ShaderManagerCommon() {}
|
||||
void DirtyUniform(u64 what) {
|
||||
globalDirty_ |= what;
|
||||
}
|
||||
protected:
|
||||
uint64_t globalDirty_;
|
||||
};
|
@ -332,7 +332,7 @@ static const CommandTableEntry commandTable[] = {
|
||||
|
||||
// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
|
||||
{GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, &GPU_DX9::Execute_LoadClut},
|
||||
{GE_CMD_TRANSFERSTART, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC, &GPU_DX9::Execute_BlockTransferStart},
|
||||
{GE_CMD_TRANSFERSTART, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC, &GPUCommon::Execute_BlockTransferStart},
|
||||
|
||||
// We don't use the dither table.
|
||||
{GE_CMD_DITH0},
|
||||
@ -341,16 +341,16 @@ static const CommandTableEntry commandTable[] = {
|
||||
{GE_CMD_DITH3},
|
||||
|
||||
// These handle their own flushing.
|
||||
{GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_WorldMtxNum},
|
||||
{GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, &GPU_DX9::Execute_WorldMtxData},
|
||||
{GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_ViewMtxNum},
|
||||
{GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, &GPU_DX9::Execute_ViewMtxData},
|
||||
{GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_ProjMtxNum},
|
||||
{GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, &GPU_DX9::Execute_ProjMtxData},
|
||||
{GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_TgenMtxNum},
|
||||
{GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, &GPU_DX9::Execute_TgenMtxData},
|
||||
{GE_CMD_BONEMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_BoneMtxNum},
|
||||
{GE_CMD_BONEMATRIXDATA, FLAG_EXECUTE, &GPU_DX9::Execute_BoneMtxData},
|
||||
{GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_WorldMtxNum},
|
||||
{GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, &GPUCommon::Execute_WorldMtxData},
|
||||
{GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_ViewMtxNum},
|
||||
{GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, &GPUCommon::Execute_ViewMtxData},
|
||||
{GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_ProjMtxNum},
|
||||
{GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, &GPUCommon::Execute_ProjMtxData},
|
||||
{GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_TgenMtxNum},
|
||||
{GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, &GPUCommon::Execute_TgenMtxData},
|
||||
{GE_CMD_BONEMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_BoneMtxNum},
|
||||
{GE_CMD_BONEMATRIXDATA, FLAG_EXECUTE, &GPUCommon::Execute_BoneMtxData},
|
||||
|
||||
// Vertex Screen/Texture/Color
|
||||
{ GE_CMD_VSCX, FLAG_EXECUTE },
|
||||
@ -398,23 +398,24 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx)
|
||||
lastVsync_ = g_Config.bVSync ? 1 : 0;
|
||||
dxstate.SetVSyncInterval(g_Config.bVSync);
|
||||
|
||||
shaderManagerDX9_ = new ShaderManagerDX9();
|
||||
framebufferManagerDX9_ = new FramebufferManagerDX9();
|
||||
framebufferManager_ = framebufferManagerDX9_;
|
||||
textureCacheDX9_ = new TextureCacheDX9();
|
||||
textureCache_ = textureCacheDX9_;
|
||||
drawEngineCommon_ = &drawEngine_;
|
||||
shaderManager_ = shaderManagerDX9_;
|
||||
|
||||
shaderManager_ = new ShaderManagerDX9();
|
||||
drawEngine_.SetShaderManager(shaderManager_);
|
||||
drawEngine_.SetShaderManager(shaderManagerDX9_);
|
||||
drawEngine_.SetTextureCache(textureCacheDX9_);
|
||||
drawEngine_.SetFramebufferManager(framebufferManagerDX9_);
|
||||
framebufferManagerDX9_->Init();
|
||||
framebufferManagerDX9_->SetTextureCache(textureCacheDX9_);
|
||||
framebufferManagerDX9_->SetShaderManager(shaderManager_);
|
||||
framebufferManagerDX9_->SetShaderManager(shaderManagerDX9_);
|
||||
framebufferManagerDX9_->SetTransformDrawEngine(&drawEngine_);
|
||||
textureCacheDX9_->SetFramebufferManager(framebufferManagerDX9_);
|
||||
textureCacheDX9_->SetDepalShaderCache(&depalShaderCache_);
|
||||
textureCacheDX9_->SetShaderManager(shaderManager_);
|
||||
textureCacheDX9_->SetShaderManager(shaderManagerDX9_);
|
||||
|
||||
// Sanity check gstate
|
||||
if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) {
|
||||
@ -507,8 +508,8 @@ void GPU_DX9::CheckGPUFeatures() {
|
||||
|
||||
GPU_DX9::~GPU_DX9() {
|
||||
framebufferManagerDX9_->DestroyAllFBOs(true);
|
||||
shaderManager_->ClearCache(true);
|
||||
delete shaderManager_;
|
||||
shaderManagerDX9_->ClearCache(true);
|
||||
delete shaderManagerDX9_;
|
||||
}
|
||||
|
||||
// Needs to be called on GPU thread, not reporting thread.
|
||||
@ -525,7 +526,7 @@ void GPU_DX9::BuildReportingInfo() {
|
||||
void GPU_DX9::DeviceLost() {
|
||||
// Simply drop all caches and textures.
|
||||
// FBOs appear to survive? Or no?
|
||||
shaderManager_->ClearCache(false);
|
||||
shaderManagerDX9_->ClearCache(false);
|
||||
textureCacheDX9_->Clear(false);
|
||||
framebufferManagerDX9_->DeviceLost();
|
||||
}
|
||||
@ -585,7 +586,7 @@ void GPU_DX9::BeginFrameInternal() {
|
||||
} else if (dumpThisFrame_) {
|
||||
dumpThisFrame_ = false;
|
||||
}
|
||||
shaderManager_->DirtyShader();
|
||||
shaderManagerDX9_->DirtyShader();
|
||||
|
||||
framebufferManagerDX9_->BeginFrame();
|
||||
}
|
||||
@ -639,7 +640,7 @@ void GPU_DX9::CopyDisplayToOutputInternal() {
|
||||
framebufferManagerDX9_->EndFrame();
|
||||
|
||||
// shaderManager_->EndFrame();
|
||||
shaderManager_->DirtyLastShader();
|
||||
shaderManagerDX9_->DirtyLastShader();
|
||||
|
||||
gstate_c.textureChanged = TEXCHANGE_UPDATED;
|
||||
}
|
||||
@ -755,8 +756,7 @@ void GPU_DX9::Execute_Prim(u32 op, u32 diff) {
|
||||
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
|
||||
drawEngine_.SetupVertexDecoder(gstate.vertType);
|
||||
// Rough estimate, not sure what's correct.
|
||||
int vertexCost = drawEngine_.EstimatePerVertexCost();
|
||||
cyclesExecuted += vertexCost * count;
|
||||
cyclesExecuted += EstimatePerVertexCost() * count;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -787,7 +787,7 @@ void GPU_DX9::Execute_Prim(u32 op, u32 diff) {
|
||||
int bytesRead = 0;
|
||||
drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
|
||||
|
||||
int vertexCost = drawEngine_.EstimatePerVertexCost() * count;
|
||||
int vertexCost = EstimatePerVertexCost() * count;
|
||||
gpuStats.vertexGPUCycles += vertexCost;
|
||||
cyclesExecuted += vertexCost;
|
||||
|
||||
@ -1066,230 +1066,6 @@ void GPU_DX9::Execute_ColorRef(u32 op, u32 diff) {
|
||||
shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF);
|
||||
}
|
||||
|
||||
void GPU_DX9::Execute_WorldMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_WORLDMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.worldMatrix + (op & 0xF));
|
||||
const int end = 12 - (op & 0xF);
|
||||
int i = 0;
|
||||
|
||||
while ((src[i] >> 24) == GE_CMD_WORLDMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op + count) & 0xF);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPU_DX9::Execute_WorldMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.worldmtxnum & 0xF;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) {
|
||||
Flush();
|
||||
((u32 *)gstate.worldMatrix)[num] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
|
||||
}
|
||||
num++;
|
||||
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF);
|
||||
}
|
||||
|
||||
void GPU_DX9::Execute_ViewMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_VIEWMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.viewMatrix + (op & 0xF));
|
||||
const int end = 12 - (op & 0xF);
|
||||
int i = 0;
|
||||
|
||||
while ((src[i] >> 24) == GE_CMD_VIEWMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op + count) & 0xF);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPU_DX9::Execute_ViewMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.viewmtxnum & 0xF;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) {
|
||||
Flush();
|
||||
((u32 *)gstate.viewMatrix)[num] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
|
||||
}
|
||||
num++;
|
||||
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF);
|
||||
}
|
||||
|
||||
void GPU_DX9::Execute_ProjMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_PROJMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.projMatrix + (op & 0xF));
|
||||
const int end = 16 - (op & 0xF);
|
||||
int i = 0;
|
||||
|
||||
while ((src[i] >> 24) == GE_CMD_PROJMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op + count) & 0xF);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPU_DX9::Execute_ProjMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.projmtxnum & 0xF;
|
||||
u32 newVal = op << 8;
|
||||
if (newVal != ((const u32 *)gstate.projMatrix)[num]) {
|
||||
Flush();
|
||||
((u32 *)gstate.projMatrix)[num] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
|
||||
}
|
||||
num++;
|
||||
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF);
|
||||
}
|
||||
|
||||
void GPU_DX9::Execute_TgenMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_TGENMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.tgenMatrix + (op & 0xF));
|
||||
const int end = 12 - (op & 0xF);
|
||||
int i = 0;
|
||||
|
||||
while ((src[i] >> 24) == GE_CMD_TGENMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op + count) & 0xF);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPU_DX9::Execute_TgenMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.texmtxnum & 0xF;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) {
|
||||
Flush();
|
||||
((u32 *)gstate.tgenMatrix)[num] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
|
||||
}
|
||||
num++;
|
||||
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF);
|
||||
}
|
||||
|
||||
void GPU_DX9::Execute_BoneMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_BONEMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.boneMatrix + (op & 0x7F));
|
||||
const int end = 12 * 8 - (op & 0x7F);
|
||||
int i = 0;
|
||||
|
||||
// If we can't use software skinning, we have to flush and dirty.
|
||||
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int numPlusCount = (op & 0x7F) + i;
|
||||
for (int num = op & 0x7F; num < numPlusCount; num += 12) {
|
||||
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
|
||||
}
|
||||
} else {
|
||||
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
|
||||
dst[i] = src[i] << 8;
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int numPlusCount = (op & 0x7F) + i;
|
||||
for (int num = op & 0x7F; num < numPlusCount; num += 12) {
|
||||
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((op + count) & 0x7F);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPU_DX9::Execute_BoneMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.boneMatrixNumber & 0x7F;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {
|
||||
// Bone matrices should NOT flush when software skinning is enabled!
|
||||
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
Flush();
|
||||
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
|
||||
} else {
|
||||
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
|
||||
}
|
||||
((u32 *)gstate.boneMatrix)[num] = newVal;
|
||||
}
|
||||
num++;
|
||||
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
|
||||
}
|
||||
|
||||
void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
|
||||
u32 cmd = op >> 24;
|
||||
u32 data = op & 0xFFFFFF;
|
||||
@ -1514,7 +1290,8 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
|
||||
case GE_CMD_LAC0:
|
||||
case GE_CMD_LDC0:
|
||||
case GE_CMD_LSC0:
|
||||
shaderManager_->DirtyUniform(DIRTY_LIGHT0);
|
||||
if (diff)
|
||||
shaderManager_->DirtyUniform(DIRTY_LIGHT0);
|
||||
break;
|
||||
|
||||
case GE_CMD_LX1:case GE_CMD_LY1:case GE_CMD_LZ1:
|
||||
@ -1525,7 +1302,8 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
|
||||
case GE_CMD_LAC1:
|
||||
case GE_CMD_LDC1:
|
||||
case GE_CMD_LSC1:
|
||||
shaderManager_->DirtyUniform(DIRTY_LIGHT1);
|
||||
if (diff)
|
||||
shaderManager_->DirtyUniform(DIRTY_LIGHT1);
|
||||
break;
|
||||
case GE_CMD_LX2:case GE_CMD_LY2:case GE_CMD_LZ2:
|
||||
case GE_CMD_LDX2:case GE_CMD_LDY2:case GE_CMD_LDZ2:
|
||||
@ -1535,7 +1313,8 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
|
||||
case GE_CMD_LAC2:
|
||||
case GE_CMD_LDC2:
|
||||
case GE_CMD_LSC2:
|
||||
shaderManager_->DirtyUniform(DIRTY_LIGHT2);
|
||||
if (diff)
|
||||
shaderManager_->DirtyUniform(DIRTY_LIGHT2);
|
||||
break;
|
||||
case GE_CMD_LX3:case GE_CMD_LY3:case GE_CMD_LZ3:
|
||||
case GE_CMD_LDX3:case GE_CMD_LDY3:case GE_CMD_LDZ3:
|
||||
@ -1545,7 +1324,8 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
|
||||
case GE_CMD_LAC3:
|
||||
case GE_CMD_LDC3:
|
||||
case GE_CMD_LSC3:
|
||||
shaderManager_->DirtyUniform(DIRTY_LIGHT3);
|
||||
if (diff)
|
||||
shaderManager_->DirtyUniform(DIRTY_LIGHT3);
|
||||
break;
|
||||
|
||||
case GE_CMD_VIEWPORTXSCALE:
|
||||
@ -1554,7 +1334,8 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
|
||||
case GE_CMD_VIEWPORTYCENTER:
|
||||
case GE_CMD_VIEWPORTZSCALE:
|
||||
case GE_CMD_VIEWPORTZCENTER:
|
||||
Execute_ViewportType(op, diff);
|
||||
if (diff)
|
||||
Execute_ViewportType(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_LIGHTENABLE0:
|
||||
@ -1761,23 +1542,6 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
|
||||
}
|
||||
}
|
||||
|
||||
void GPU_DX9::FastLoadBoneMatrix(u32 target) {
|
||||
const int num = gstate.boneMatrixNumber & 0x7F;
|
||||
const int mtxNum = num / 12;
|
||||
uint32_t uniformsToDirty = DIRTY_BONEMATRIX0 << mtxNum;
|
||||
if ((num - 12 * mtxNum) != 0) {
|
||||
uniformsToDirty |= DIRTY_BONEMATRIX0 << ((mtxNum + 1) & 7);
|
||||
}
|
||||
|
||||
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
Flush();
|
||||
shaderManager_->DirtyUniform(uniformsToDirty);
|
||||
} else {
|
||||
gstate_c.deferredVertTypeDirty |= uniformsToDirty;
|
||||
}
|
||||
gstate.FastLoadBoneMatrix(target);
|
||||
}
|
||||
|
||||
void GPU_DX9::GetStats(char *buffer, size_t bufsize) {
|
||||
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
|
||||
snprintf(buffer, bufsize - 1,
|
||||
@ -1807,8 +1571,8 @@ void GPU_DX9::GetStats(char *buffer, size_t bufsize) {
|
||||
(int)textureCacheDX9_->NumLoadedTextures(),
|
||||
gpuStats.numTexturesDecoded,
|
||||
gpuStats.numTextureInvalidations,
|
||||
shaderManager_->NumVertexShaders(),
|
||||
shaderManager_->NumFragmentShaders()
|
||||
shaderManagerDX9_->NumVertexShaders(),
|
||||
shaderManagerDX9_->NumFragmentShaders()
|
||||
);
|
||||
}
|
||||
|
||||
@ -1817,7 +1581,7 @@ void GPU_DX9::ClearCacheNextFrame() {
|
||||
}
|
||||
|
||||
void GPU_DX9::ClearShaderCache() {
|
||||
shaderManager_->ClearCache(true);
|
||||
shaderManagerDX9_->ClearCache(true);
|
||||
}
|
||||
|
||||
std::vector<FramebufferInfo> GPU_DX9::GetFramebufferList() {
|
||||
@ -1835,7 +1599,7 @@ void GPU_DX9::DoState(PointerWrap &p) {
|
||||
|
||||
gstate_c.textureChanged = TEXCHANGE_UPDATED;
|
||||
framebufferManagerDX9_->DestroyAllFBOs(true);
|
||||
shaderManager_->ClearCache(true);
|
||||
shaderManagerDX9_->ClearCache(true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1956,7 +1720,7 @@ std::vector<std::string> GPU_DX9::DebugGetShaderIDs(DebugShaderType type) {
|
||||
if (type == SHADER_TYPE_VERTEXLOADER) {
|
||||
return drawEngine_.DebugGetVertexLoaderIDs();
|
||||
} else {
|
||||
return shaderManager_->DebugGetShaderIDs(type);
|
||||
return shaderManagerDX9_->DebugGetShaderIDs(type);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1964,7 +1728,7 @@ std::string GPU_DX9::DebugGetShaderString(std::string id, DebugShaderType type,
|
||||
if (type == SHADER_TYPE_VERTEXLOADER) {
|
||||
return drawEngine_.DebugGetVertexLoaderString(id, stringType);
|
||||
} else {
|
||||
return shaderManager_->DebugGetShaderString(id, type, stringType);
|
||||
return shaderManagerDX9_->DebugGetShaderString(id, type, stringType);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -125,24 +125,12 @@ public:
|
||||
void Execute_StencilTest(u32 op, u32 diff);
|
||||
void Execute_ColorRef(u32 op, u32 diff);
|
||||
|
||||
void Execute_WorldMtxNum(u32 op, u32 diff);
|
||||
void Execute_WorldMtxData(u32 op, u32 diff);
|
||||
void Execute_ViewMtxNum(u32 op, u32 diff);
|
||||
void Execute_ViewMtxData(u32 op, u32 diff);
|
||||
void Execute_ProjMtxNum(u32 op, u32 diff);
|
||||
void Execute_ProjMtxData(u32 op, u32 diff);
|
||||
void Execute_TgenMtxNum(u32 op, u32 diff);
|
||||
void Execute_TgenMtxData(u32 op, u32 diff);
|
||||
void Execute_BoneMtxNum(u32 op, u32 diff);
|
||||
void Execute_BoneMtxData(u32 op, u32 diff);
|
||||
|
||||
// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
|
||||
std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
|
||||
std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override;
|
||||
|
||||
protected:
|
||||
void FastRunLoop(DisplayList &list) override;
|
||||
void FastLoadBoneMatrix(u32 target) override;
|
||||
void FinishDeferred() override;
|
||||
|
||||
private:
|
||||
@ -163,7 +151,7 @@ private:
|
||||
TextureCacheDX9 *textureCacheDX9_;
|
||||
DepalShaderCacheDX9 depalShaderCache_;
|
||||
DrawEngineDX9 drawEngine_;
|
||||
ShaderManagerDX9 *shaderManager_;
|
||||
ShaderManagerDX9 *shaderManagerDX9_;
|
||||
|
||||
static CommandInfo cmdInfo_[256];
|
||||
|
||||
|
@ -505,7 +505,7 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) {
|
||||
}
|
||||
}
|
||||
|
||||
ShaderManagerDX9::ShaderManagerDX9() : lastVShader_(nullptr), lastPShader_(nullptr), globalDirty_(0xFFFFFFFF) {
|
||||
ShaderManagerDX9::ShaderManagerDX9() : lastVShader_(nullptr), lastPShader_(nullptr) {
|
||||
codeBuffer_ = new char[16384];
|
||||
}
|
||||
|
||||
|
@ -76,7 +76,7 @@ protected:
|
||||
ShaderID id_;
|
||||
};
|
||||
|
||||
class ShaderManagerDX9 {
|
||||
class ShaderManagerDX9 : public ShaderManagerCommon {
|
||||
public:
|
||||
ShaderManagerDX9();
|
||||
~ShaderManagerDX9();
|
||||
@ -84,9 +84,6 @@ public:
|
||||
void ClearCache(bool deleteThem); // TODO: deleteThem currently not respected
|
||||
VSShader *ApplyShader(int prim, u32 vertType);
|
||||
void DirtyShader();
|
||||
void DirtyUniform(u64 what) {
|
||||
globalDirty_ |= what;
|
||||
}
|
||||
void DirtyLastShader();
|
||||
|
||||
int NumVertexShaders() const { return (int)vsCache_.size(); }
|
||||
@ -119,7 +116,6 @@ private:
|
||||
ShaderID lastFSID_;
|
||||
ShaderID lastVSID_;
|
||||
|
||||
u64 globalDirty_;
|
||||
char *codeBuffer_;
|
||||
|
||||
VSShader *lastVShader_;
|
||||
|
@ -401,24 +401,26 @@ GPU_GLES::GPU_GLES(GraphicsContext *ctx)
|
||||
UpdateVsyncInterval(true);
|
||||
CheckGPUFeatures();
|
||||
|
||||
shaderManager_ = new ShaderManagerGLES();
|
||||
shaderManagerGL_ = new ShaderManagerGLES();
|
||||
framebufferManagerGL_ = new FramebufferManagerGLES();
|
||||
framebufferManager_ = framebufferManagerGL_;
|
||||
textureCacheGL_ = new TextureCacheGLES();
|
||||
textureCache_ = textureCacheGL_;
|
||||
drawEngineCommon_ = &drawEngine_;
|
||||
shaderManager_ = shaderManagerGL_;
|
||||
drawEngineCommon_ = &drawEngine_;
|
||||
|
||||
drawEngine_.SetShaderManager(shaderManager_);
|
||||
drawEngine_.SetShaderManager(shaderManagerGL_);
|
||||
drawEngine_.SetTextureCache(textureCacheGL_);
|
||||
drawEngine_.SetFramebufferManager(framebufferManagerGL_);
|
||||
drawEngine_.SetFragmentTestCache(&fragmentTestCache_);
|
||||
framebufferManagerGL_->Init();
|
||||
framebufferManagerGL_->SetTextureCache(textureCacheGL_);
|
||||
framebufferManagerGL_->SetShaderManager(shaderManager_);
|
||||
framebufferManagerGL_->SetShaderManager(shaderManagerGL_);
|
||||
framebufferManagerGL_->SetTransformDrawEngine(&drawEngine_);
|
||||
textureCacheGL_->SetFramebufferManager(framebufferManagerGL_);
|
||||
textureCacheGL_->SetDepalShaderCache(&depalShaderCache_);
|
||||
textureCacheGL_->SetShaderManager(shaderManager_);
|
||||
textureCacheGL_->SetShaderManager(shaderManagerGL_);
|
||||
textureCacheGL_->SetTransformDrawEngine(&drawEngine_);
|
||||
fragmentTestCache_.SetTextureCache(textureCacheGL_);
|
||||
|
||||
@ -470,20 +472,20 @@ GPU_GLES::GPU_GLES(GraphicsContext *ctx)
|
||||
if (discID.size()) {
|
||||
File::CreateFullPath(GetSysDirectory(DIRECTORY_APP_CACHE));
|
||||
shaderCachePath_ = GetSysDirectory(DIRECTORY_APP_CACHE) + "/" + g_paramSFO.GetValueString("DISC_ID") + ".glshadercache";
|
||||
shaderManager_->LoadAndPrecompile(shaderCachePath_);
|
||||
shaderManagerGL_->LoadAndPrecompile(shaderCachePath_);
|
||||
}
|
||||
}
|
||||
|
||||
GPU_GLES::~GPU_GLES() {
|
||||
framebufferManagerGL_->DestroyAllFBOs(true);
|
||||
shaderManager_->ClearCache(true);
|
||||
shaderManagerGL_->ClearCache(true);
|
||||
depalShaderCache_.Clear();
|
||||
fragmentTestCache_.Clear();
|
||||
if (!shaderCachePath_.empty()) {
|
||||
shaderManager_->Save(shaderCachePath_);
|
||||
shaderManagerGL_->Save(shaderCachePath_);
|
||||
}
|
||||
delete shaderManager_;
|
||||
shaderManager_ = nullptr;
|
||||
delete shaderManagerGL_;
|
||||
shaderManagerGL_ = nullptr;
|
||||
|
||||
#ifdef _WIN32
|
||||
gfxCtx_->SwapInterval(0);
|
||||
@ -652,7 +654,7 @@ void GPU_GLES::DeviceLost() {
|
||||
// Simply drop all caches and textures.
|
||||
// FBOs appear to survive? Or no?
|
||||
// TransformDraw has registered as a GfxResourceHolder.
|
||||
shaderManager_->ClearCache(false);
|
||||
shaderManagerGL_->ClearCache(false);
|
||||
textureCacheGL_->Clear(false);
|
||||
fragmentTestCache_.Clear(false);
|
||||
depalShaderCache_.Clear();
|
||||
@ -662,6 +664,7 @@ void GPU_GLES::DeviceLost() {
|
||||
void GPU_GLES::DeviceRestore() {
|
||||
ILOG("GPU_GLES: DeviceRestore");
|
||||
|
||||
UpdateCmdInfo();
|
||||
UpdateVsyncInterval(true);
|
||||
}
|
||||
|
||||
@ -760,10 +763,10 @@ void GPU_GLES::BeginFrameInternal() {
|
||||
|
||||
// Save the cache from time to time. TODO: How often?
|
||||
if (!shaderCachePath_.empty() && (gpuStats.numFlips & 1023) == 0) {
|
||||
shaderManager_->Save(shaderCachePath_);
|
||||
shaderManagerGL_->Save(shaderCachePath_);
|
||||
}
|
||||
|
||||
shaderManager_->DirtyShader();
|
||||
shaderManagerGL_->DirtyShader();
|
||||
|
||||
// Not sure if this is really needed.
|
||||
shaderManager_->DirtyUniform(DIRTY_ALL);
|
||||
@ -811,7 +814,7 @@ void GPU_GLES::CopyDisplayToOutputInternal() {
|
||||
framebufferManagerGL_->RebindFramebuffer();
|
||||
drawEngine_.Flush();
|
||||
|
||||
shaderManager_->DirtyLastShader();
|
||||
shaderManagerGL_->DirtyLastShader();
|
||||
|
||||
glstate.depthWrite.set(GL_TRUE);
|
||||
glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
|
||||
@ -923,8 +926,7 @@ void GPU_GLES::Execute_Prim(u32 op, u32 diff) {
|
||||
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
|
||||
drawEngine_.SetupVertexDecoder(gstate.vertType);
|
||||
// Rough estimate, not sure what's correct.
|
||||
int vertexCost = drawEngine_.EstimatePerVertexCost();
|
||||
cyclesExecuted += vertexCost * count;
|
||||
cyclesExecuted += EstimatePerVertexCost() * count;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -952,7 +954,7 @@ void GPU_GLES::Execute_Prim(u32 op, u32 diff) {
|
||||
int bytesRead = 0;
|
||||
drawEngine_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
|
||||
|
||||
int vertexCost = drawEngine_.EstimatePerVertexCost();
|
||||
int vertexCost = EstimatePerVertexCost();
|
||||
gpuStats.vertexGPUCycles += vertexCost * count;
|
||||
cyclesExecuted += vertexCost * count;
|
||||
|
||||
@ -1289,230 +1291,6 @@ void GPU_GLES::Execute_ColorRef(u32 op, u32 diff) {
|
||||
shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF);
|
||||
}
|
||||
|
||||
void GPU_GLES::Execute_WorldMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_WORLDMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.worldMatrix + (op & 0xF));
|
||||
const int end = 12 - (op & 0xF);
|
||||
int i = 0;
|
||||
|
||||
while ((src[i] >> 24) == GE_CMD_WORLDMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op + count) & 0xF);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPU_GLES::Execute_WorldMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.worldmtxnum & 0xF;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) {
|
||||
Flush();
|
||||
((u32 *)gstate.worldMatrix)[num] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
|
||||
}
|
||||
num++;
|
||||
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF);
|
||||
}
|
||||
|
||||
void GPU_GLES::Execute_ViewMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_VIEWMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.viewMatrix + (op & 0xF));
|
||||
const int end = 12 - (op & 0xF);
|
||||
int i = 0;
|
||||
|
||||
while ((src[i] >> 24) == GE_CMD_VIEWMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op + count) & 0xF);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPU_GLES::Execute_ViewMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.viewmtxnum & 0xF;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) {
|
||||
Flush();
|
||||
((u32 *)gstate.viewMatrix)[num] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
|
||||
}
|
||||
num++;
|
||||
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF);
|
||||
}
|
||||
|
||||
void GPU_GLES::Execute_ProjMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_PROJMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.projMatrix + (op & 0xF));
|
||||
const int end = 16 - (op & 0xF);
|
||||
int i = 0;
|
||||
|
||||
while ((src[i] >> 24) == GE_CMD_PROJMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op + count) & 0xF);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPU_GLES::Execute_ProjMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.projmtxnum & 0xF;
|
||||
u32 newVal = op << 8;
|
||||
if (newVal != ((const u32 *)gstate.projMatrix)[num]) {
|
||||
Flush();
|
||||
((u32 *)gstate.projMatrix)[num] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
|
||||
}
|
||||
num++;
|
||||
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF);
|
||||
}
|
||||
|
||||
void GPU_GLES::Execute_TgenMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_TGENMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.tgenMatrix + (op & 0xF));
|
||||
const int end = 12 - (op & 0xF);
|
||||
int i = 0;
|
||||
|
||||
while ((src[i] >> 24) == GE_CMD_TGENMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op + count) & 0xF);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPU_GLES::Execute_TgenMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.texmtxnum & 0xF;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) {
|
||||
Flush();
|
||||
((u32 *)gstate.tgenMatrix)[num] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
|
||||
}
|
||||
num++;
|
||||
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF);
|
||||
}
|
||||
|
||||
void GPU_GLES::Execute_BoneMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_BONEMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.boneMatrix + (op & 0x7F));
|
||||
const int end = 12 * 8 - (op & 0x7F);
|
||||
int i = 0;
|
||||
|
||||
// If we can't use software skinning, we have to flush and dirty.
|
||||
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int numPlusCount = (op & 0x7F) + i;
|
||||
for (int num = op & 0x7F; num < numPlusCount; num += 12) {
|
||||
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
|
||||
}
|
||||
} else {
|
||||
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
|
||||
dst[i] = src[i] << 8;
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int numPlusCount = (op & 0x7F) + i;
|
||||
for (int num = op & 0x7F; num < numPlusCount; num += 12) {
|
||||
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((op + count) & 0x7F);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPU_GLES::Execute_BoneMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.boneMatrixNumber & 0x7F;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {
|
||||
// Bone matrices should NOT flush when software skinning is enabled!
|
||||
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
Flush();
|
||||
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
|
||||
} else {
|
||||
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
|
||||
}
|
||||
((u32 *)gstate.boneMatrix)[num] = newVal;
|
||||
}
|
||||
num++;
|
||||
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
|
||||
}
|
||||
|
||||
void GPU_GLES::Execute_Generic(u32 op, u32 diff) {
|
||||
u32 cmd = op >> 24;
|
||||
u32 data = op & 0xFFFFFF;
|
||||
@ -2044,23 +1822,6 @@ void GPU_GLES::Execute_Generic(u32 op, u32 diff) {
|
||||
}
|
||||
}
|
||||
|
||||
void GPU_GLES::FastLoadBoneMatrix(u32 target) {
|
||||
const int num = gstate.boneMatrixNumber & 0x7F;
|
||||
const int mtxNum = num / 12;
|
||||
uint32_t uniformsToDirty = DIRTY_BONEMATRIX0 << mtxNum;
|
||||
if ((num - 12 * mtxNum) != 0) {
|
||||
uniformsToDirty |= DIRTY_BONEMATRIX0 << ((mtxNum + 1) & 7);
|
||||
}
|
||||
|
||||
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
Flush();
|
||||
shaderManager_->DirtyUniform(uniformsToDirty);
|
||||
} else {
|
||||
gstate_c.deferredVertTypeDirty |= uniformsToDirty;
|
||||
}
|
||||
gstate.FastLoadBoneMatrix(target);
|
||||
}
|
||||
|
||||
void GPU_GLES::GetStats(char *buffer, size_t bufsize) {
|
||||
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
|
||||
snprintf(buffer, bufsize - 1,
|
||||
@ -2090,9 +1851,9 @@ void GPU_GLES::GetStats(char *buffer, size_t bufsize) {
|
||||
(int)textureCacheGL_->NumLoadedTextures(),
|
||||
gpuStats.numTexturesDecoded,
|
||||
gpuStats.numTextureInvalidations,
|
||||
shaderManager_->NumVertexShaders(),
|
||||
shaderManager_->NumFragmentShaders(),
|
||||
shaderManager_->NumPrograms());
|
||||
shaderManagerGL_->NumVertexShaders(),
|
||||
shaderManagerGL_->NumFragmentShaders(),
|
||||
shaderManagerGL_->NumPrograms());
|
||||
}
|
||||
|
||||
void GPU_GLES::ClearCacheNextFrame() {
|
||||
@ -2100,12 +1861,12 @@ void GPU_GLES::ClearCacheNextFrame() {
|
||||
}
|
||||
|
||||
void GPU_GLES::ClearShaderCache() {
|
||||
shaderManager_->ClearCache(true);
|
||||
shaderManagerGL_->ClearCache(true);
|
||||
}
|
||||
|
||||
void GPU_GLES::CleanupBeforeUI() {
|
||||
// Clear any enabled vertex arrays.
|
||||
shaderManager_->DirtyLastShader();
|
||||
shaderManagerGL_->DirtyLastShader();
|
||||
glstate.arrayBuffer.bind(0);
|
||||
glstate.elementArrayBuffer.bind(0);
|
||||
}
|
||||
@ -2127,7 +1888,7 @@ void GPU_GLES::DoState(PointerWrap &p) {
|
||||
|
||||
gstate_c.textureChanged = TEXCHANGE_UPDATED;
|
||||
framebufferManagerGL_->DestroyAllFBOs(true);
|
||||
shaderManager_->ClearCache(true);
|
||||
shaderManagerGL_->ClearCache(true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2216,7 +1977,7 @@ std::vector<std::string> GPU_GLES::DebugGetShaderIDs(DebugShaderType type) {
|
||||
if (type == SHADER_TYPE_VERTEXLOADER) {
|
||||
return drawEngine_.DebugGetVertexLoaderIDs();
|
||||
} else {
|
||||
return shaderManager_->DebugGetShaderIDs(type);
|
||||
return shaderManagerGL_->DebugGetShaderIDs(type);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2224,6 +1985,6 @@ std::string GPU_GLES::DebugGetShaderString(std::string id, DebugShaderType type,
|
||||
if (type == SHADER_TYPE_VERTEXLOADER) {
|
||||
return drawEngine_.DebugGetVertexLoaderString(id, stringType);
|
||||
} else {
|
||||
return shaderManager_->DebugGetShaderString(id, type, stringType);
|
||||
return shaderManagerGL_->DebugGetShaderString(id, type, stringType);
|
||||
}
|
||||
}
|
||||
|
@ -130,16 +130,6 @@ public:
|
||||
void Execute_AlphaTest(u32 op, u32 diff);
|
||||
void Execute_StencilTest(u32 op, u32 diff);
|
||||
void Execute_ColorRef(u32 op, u32 diff);
|
||||
void Execute_WorldMtxNum(u32 op, u32 diff);
|
||||
void Execute_WorldMtxData(u32 op, u32 diff);
|
||||
void Execute_ViewMtxNum(u32 op, u32 diff);
|
||||
void Execute_ViewMtxData(u32 op, u32 diff);
|
||||
void Execute_ProjMtxNum(u32 op, u32 diff);
|
||||
void Execute_ProjMtxData(u32 op, u32 diff);
|
||||
void Execute_TgenMtxNum(u32 op, u32 diff);
|
||||
void Execute_TgenMtxData(u32 op, u32 diff);
|
||||
void Execute_BoneMtxNum(u32 op, u32 diff);
|
||||
void Execute_BoneMtxData(u32 op, u32 diff);
|
||||
|
||||
// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
|
||||
std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
|
||||
@ -147,7 +137,6 @@ public:
|
||||
|
||||
protected:
|
||||
void FastRunLoop(DisplayList &list) override;
|
||||
void FastLoadBoneMatrix(u32 target) override;
|
||||
void FinishDeferred() override;
|
||||
|
||||
private:
|
||||
@ -172,7 +161,7 @@ private:
|
||||
DepalShaderCacheGLES depalShaderCache_;
|
||||
DrawEngineGLES drawEngine_;
|
||||
FragmentTestCacheGLES fragmentTestCache_;
|
||||
ShaderManagerGLES *shaderManager_;
|
||||
ShaderManagerGLES *shaderManagerGL_;
|
||||
|
||||
int lastVsync_;
|
||||
|
||||
|
@ -748,7 +748,7 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
|
||||
}
|
||||
|
||||
ShaderManagerGLES::ShaderManagerGLES()
|
||||
: lastShader_(nullptr), globalDirty_(DIRTY_ALL), shaderSwitchDirty_(0), diskCacheDirty_(false) {
|
||||
: lastShader_(nullptr), shaderSwitchDirty_(0), diskCacheDirty_(false) {
|
||||
codeBuffer_ = new char[16384];
|
||||
lastFSID_.set_invalid();
|
||||
lastVSID_.set_invalid();
|
||||
|
@ -140,7 +140,7 @@ private:
|
||||
bool isFragment_;
|
||||
};
|
||||
|
||||
class ShaderManagerGLES {
|
||||
class ShaderManagerGLES : public ShaderManagerCommon {
|
||||
public:
|
||||
ShaderManagerGLES();
|
||||
~ShaderManagerGLES();
|
||||
@ -153,9 +153,6 @@ public:
|
||||
LinkedShader *ApplyFragmentShader(ShaderID VSID, Shader *vs, u32 vertType, int prim);
|
||||
|
||||
void DirtyShader();
|
||||
void DirtyUniform(u64 what) {
|
||||
globalDirty_ |= what;
|
||||
}
|
||||
void DirtyLastShader(); // disables vertex arrays
|
||||
|
||||
int NumVertexShaders() const { return (int)vsCache_.size(); }
|
||||
@ -191,7 +188,6 @@ private:
|
||||
ShaderID lastVSID_;
|
||||
|
||||
LinkedShader *lastShader_;
|
||||
u64 globalDirty_;
|
||||
u64 shaderSwitchDirty_;
|
||||
char *codeBuffer_;
|
||||
|
||||
|
@ -25,6 +25,10 @@
|
||||
#include "GPU/Common/TextureCacheCommon.h"
|
||||
#include "GPU/Common/DrawEngineCommon.h"
|
||||
|
||||
void GPUCommon::Flush() {
|
||||
drawEngineCommon_->DispatchFlush();
|
||||
}
|
||||
|
||||
GPUCommon::GPUCommon() :
|
||||
dumpNextFrame_(false),
|
||||
dumpThisFrame_(false),
|
||||
@ -84,6 +88,35 @@ void GPUCommon::Reinitialize() {
|
||||
ScheduleEvent(GPU_EVENT_REINITIALIZE);
|
||||
}
|
||||
|
||||
int GPUCommon::EstimatePerVertexCost() {
|
||||
// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
|
||||
// runs in parallel with transform.
|
||||
|
||||
// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.
|
||||
|
||||
// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things
|
||||
// went too fast and starts doing all the work over again).
|
||||
|
||||
int cost = 20;
|
||||
if (gstate.isLightingEnabled()) {
|
||||
cost += 10;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (gstate.isLightChanEnabled(i))
|
||||
cost += 10;
|
||||
}
|
||||
}
|
||||
|
||||
if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) {
|
||||
cost += 20;
|
||||
}
|
||||
int morphCount = gstate.getNumMorphWeights();
|
||||
if (morphCount > 1) {
|
||||
cost += 5 * morphCount;
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
||||
void GPUCommon::PopDLQueue() {
|
||||
easy_guard guard(listLock);
|
||||
if(!dlQueue.empty()) {
|
||||
@ -1163,6 +1196,230 @@ void GPUCommon::Execute_BlockTransferStart(u32 op, u32 diff) {
|
||||
gstate_c.textureChanged = TEXCHANGE_UPDATED;
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_WorldMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_WORLDMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.worldMatrix + (op & 0xF));
|
||||
const int end = 12 - (op & 0xF);
|
||||
int i = 0;
|
||||
|
||||
while ((src[i] >> 24) == GE_CMD_WORLDMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op + count) & 0xF);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_WorldMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.worldmtxnum & 0xF;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) {
|
||||
Flush();
|
||||
((u32 *)gstate.worldMatrix)[num] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
|
||||
}
|
||||
num++;
|
||||
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF);
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_ViewMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_VIEWMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.viewMatrix + (op & 0xF));
|
||||
const int end = 12 - (op & 0xF);
|
||||
int i = 0;
|
||||
|
||||
while ((src[i] >> 24) == GE_CMD_VIEWMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op + count) & 0xF);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_ViewMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.viewmtxnum & 0xF;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) {
|
||||
Flush();
|
||||
((u32 *)gstate.viewMatrix)[num] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
|
||||
}
|
||||
num++;
|
||||
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF);
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_ProjMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_PROJMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.projMatrix + (op & 0xF));
|
||||
const int end = 16 - (op & 0xF);
|
||||
int i = 0;
|
||||
|
||||
while ((src[i] >> 24) == GE_CMD_PROJMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op + count) & 0xF);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_ProjMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.projmtxnum & 0xF;
|
||||
u32 newVal = op << 8;
|
||||
if (newVal != ((const u32 *)gstate.projMatrix)[num]) {
|
||||
Flush();
|
||||
((u32 *)gstate.projMatrix)[num] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
|
||||
}
|
||||
num++;
|
||||
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF);
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_TgenMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_TGENMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.tgenMatrix + (op & 0xF));
|
||||
const int end = 12 - (op & 0xF);
|
||||
int i = 0;
|
||||
|
||||
while ((src[i] >> 24) == GE_CMD_TGENMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op + count) & 0xF);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_TgenMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.texmtxnum & 0xF;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) {
|
||||
Flush();
|
||||
((u32 *)gstate.tgenMatrix)[num] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
|
||||
}
|
||||
num++;
|
||||
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF);
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_BoneMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_BONEMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.boneMatrix + (op & 0x7F));
|
||||
const int end = 12 * 8 - (op & 0x7F);
|
||||
int i = 0;
|
||||
|
||||
// If we can't use software skinning, we have to flush and dirty.
|
||||
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int numPlusCount = (op & 0x7F) + i;
|
||||
for (int num = op & 0x7F; num < numPlusCount; num += 12) {
|
||||
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
|
||||
}
|
||||
} else {
|
||||
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
|
||||
dst[i] = src[i] << 8;
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int numPlusCount = (op & 0x7F) + i;
|
||||
for (int num = op & 0x7F; num < numPlusCount; num += 12) {
|
||||
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((op + count) & 0x7F);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_BoneMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.boneMatrixNumber & 0x7F;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {
|
||||
// Bone matrices should NOT flush when software skinning is enabled!
|
||||
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
Flush();
|
||||
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
|
||||
} else {
|
||||
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
|
||||
}
|
||||
((u32 *)gstate.boneMatrix)[num] = newVal;
|
||||
}
|
||||
num++;
|
||||
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
|
||||
}
|
||||
|
||||
void GPUCommon::ExecuteOp(u32 op, u32 diff) {
|
||||
const u32 cmd = op >> 24;
|
||||
|
||||
@ -1211,6 +1468,19 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) {
|
||||
}
|
||||
|
||||
void GPUCommon::FastLoadBoneMatrix(u32 target) {
|
||||
const int num = gstate.boneMatrixNumber & 0x7F;
|
||||
const int mtxNum = num / 12;
|
||||
uint32_t uniformsToDirty = DIRTY_BONEMATRIX0 << mtxNum;
|
||||
if ((num - 12 * mtxNum) != 0) {
|
||||
uniformsToDirty |= DIRTY_BONEMATRIX0 << ((mtxNum + 1) & 7);
|
||||
}
|
||||
|
||||
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
Flush();
|
||||
shaderManager_->DirtyUniform(uniformsToDirty);
|
||||
} else {
|
||||
gstate_c.deferredVertTypeDirty |= uniformsToDirty;
|
||||
}
|
||||
gstate.FastLoadBoneMatrix(target);
|
||||
}
|
||||
|
||||
|
@ -85,6 +85,22 @@ public:
|
||||
void Execute_BoundingBox(u32 op, u32 diff);
|
||||
void Execute_BlockTransferStart(u32 op, u32 diff);
|
||||
|
||||
void Execute_WorldMtxNum(u32 op, u32 diff);
|
||||
void Execute_WorldMtxData(u32 op, u32 diff);
|
||||
void Execute_ViewMtxNum(u32 op, u32 diff);
|
||||
void Execute_ViewMtxData(u32 op, u32 diff);
|
||||
void Execute_ProjMtxNum(u32 op, u32 diff);
|
||||
void Execute_ProjMtxData(u32 op, u32 diff);
|
||||
void Execute_TgenMtxNum(u32 op, u32 diff);
|
||||
void Execute_TgenMtxData(u32 op, u32 diff);
|
||||
void Execute_BoneMtxNum(u32 op, u32 diff);
|
||||
void Execute_BoneMtxData(u32 op, u32 diff);
|
||||
|
||||
int EstimatePerVertexCost();
|
||||
|
||||
// Note: Not virtual!
|
||||
inline void Flush();
|
||||
|
||||
u64 GetTickEstimate() override {
|
||||
#if defined(_M_X64) || defined(__ANDROID__)
|
||||
return curTickEst_;
|
||||
@ -207,6 +223,7 @@ protected:
|
||||
FramebufferManagerCommon *framebufferManager_;
|
||||
TextureCacheCommon *textureCache_;
|
||||
DrawEngineCommon *drawEngineCommon_;
|
||||
ShaderManagerCommon *shaderManager_;
|
||||
|
||||
typedef std::list<int> DisplayListQueue;
|
||||
|
||||
|
@ -283,40 +283,12 @@ void SoftGPU::FastRunLoop(DisplayList &list) {
|
||||
}
|
||||
}
|
||||
|
||||
int EstimatePerVertexCost() {
|
||||
// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
|
||||
// runs in parallel with transform.
|
||||
|
||||
// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.
|
||||
|
||||
// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things
|
||||
// went too fast and starts doing all the work over again).
|
||||
|
||||
int cost = 20;
|
||||
if (gstate.isLightingEnabled()) {
|
||||
cost += 10;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (gstate.isLightChanEnabled(i))
|
||||
cost += 10;
|
||||
}
|
||||
if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) {
|
||||
cost += 20;
|
||||
}
|
||||
// TODO: morphcount
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
||||
void SoftGPU::ExecuteOp(u32 op, u32 diff)
|
||||
{
|
||||
void SoftGPU::ExecuteOp(u32 op, u32 diff) {
|
||||
u32 cmd = op >> 24;
|
||||
u32 data = op & 0xFFFFFF;
|
||||
|
||||
// Handle control and drawing commands here directly. The others we delegate.
|
||||
switch (cmd)
|
||||
{
|
||||
switch (cmd) {
|
||||
case GE_CMD_BASE:
|
||||
break;
|
||||
|
||||
|
@ -319,7 +319,7 @@ static const CommandTableEntry commandTable[] = {
|
||||
{ GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_BJump }, // EXECUTE
|
||||
{ GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_BoundingBox }, // + FLUSHBEFORE when we implement... or not, do we need to?
|
||||
|
||||
// Changing the vertex type requires us to flush.
|
||||
// Changing the vertex type requires us to flush.
|
||||
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPU_Vulkan::Execute_VertexType },
|
||||
|
||||
{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_Bezier },
|
||||
@ -340,14 +340,14 @@ static const CommandTableEntry commandTable[] = {
|
||||
{ GE_CMD_DITH3 },
|
||||
|
||||
// These handle their own flushing.
|
||||
{ GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPU_Vulkan::Execute_WorldMtxNum },
|
||||
{ GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_WorldMtxData },
|
||||
{ GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPU_Vulkan::Execute_ViewMtxNum },
|
||||
{ GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_ViewMtxData },
|
||||
{ GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPU_Vulkan::Execute_ProjMtxNum },
|
||||
{ GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_ProjMtxData },
|
||||
{ GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPU_Vulkan::Execute_TgenMtxNum },
|
||||
{ GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_TgenMtxData },
|
||||
{ GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_WorldMtxNum },
|
||||
{ GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommon::Execute_WorldMtxData },
|
||||
{ GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_ViewMtxNum },
|
||||
{ GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommon::Execute_ViewMtxData },
|
||||
{ GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_ProjMtxNum },
|
||||
{ GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommon::Execute_ProjMtxData },
|
||||
{ GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_TgenMtxNum },
|
||||
{ GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommon::Execute_TgenMtxData },
|
||||
{ GE_CMD_BONEMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPU_Vulkan::Execute_BoneMtxNum },
|
||||
{ GE_CMD_BONEMATRIXDATA, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_BoneMtxData },
|
||||
|
||||
@ -397,24 +397,25 @@ GPU_Vulkan::GPU_Vulkan(GraphicsContext *ctx)
|
||||
UpdateVsyncInterval(true);
|
||||
CheckGPUFeatures();
|
||||
|
||||
shaderManager_ = new ShaderManagerVulkan(vulkan_);
|
||||
shaderManagerVulkan_ = new ShaderManagerVulkan(vulkan_);
|
||||
pipelineManager_ = new PipelineManagerVulkan(vulkan_);
|
||||
framebufferManagerVulkan_ = new FramebufferManagerVulkan(vulkan_);
|
||||
framebufferManager_ = framebufferManagerVulkan_;
|
||||
textureCacheVulkan_ = new TextureCacheVulkan(vulkan_);
|
||||
textureCache_ = textureCacheVulkan_;
|
||||
drawEngineCommon_ = &drawEngine_;
|
||||
shaderManager_ = shaderManagerVulkan_;
|
||||
|
||||
drawEngine_.SetTextureCache(textureCacheVulkan_);
|
||||
drawEngine_.SetFramebufferManager(framebufferManagerVulkan_);
|
||||
drawEngine_.SetShaderManager(shaderManager_);
|
||||
drawEngine_.SetShaderManager(shaderManagerVulkan_);
|
||||
drawEngine_.SetPipelineManager(pipelineManager_);
|
||||
framebufferManagerVulkan_->Init();
|
||||
framebufferManagerVulkan_->SetTextureCache(textureCacheVulkan_);
|
||||
framebufferManagerVulkan_->SetDrawEngine(&drawEngine_);
|
||||
textureCacheVulkan_->SetFramebufferManager(framebufferManagerVulkan_);
|
||||
textureCacheVulkan_->SetDepalShaderCache(&depalShaderCache_);
|
||||
textureCacheVulkan_->SetShaderManager(shaderManager_);
|
||||
textureCacheVulkan_->SetShaderManager(shaderManagerVulkan_);
|
||||
textureCacheVulkan_->SetTransformDrawEngine(&drawEngine_);
|
||||
|
||||
// Sanity check gstate
|
||||
@ -458,7 +459,7 @@ GPU_Vulkan::~GPU_Vulkan() {
|
||||
framebufferManagerVulkan_->DestroyAllFBOs(true);
|
||||
depalShaderCache_.Clear();
|
||||
delete pipelineManager_;
|
||||
delete shaderManager_;
|
||||
delete shaderManagerVulkan_;
|
||||
}
|
||||
|
||||
void GPU_Vulkan::CheckGPUFeatures() {
|
||||
@ -506,7 +507,7 @@ void GPU_Vulkan::BeginHostFrame() {
|
||||
|
||||
framebufferManagerVulkan_->BeginFrameVulkan();
|
||||
|
||||
shaderManager_->DirtyShader();
|
||||
shaderManagerVulkan_->DirtyShader();
|
||||
shaderManager_->DirtyUniform(DIRTY_ALL);
|
||||
|
||||
if (dumpNextFrame_) {
|
||||
@ -686,7 +687,7 @@ void GPU_Vulkan::CopyDisplayToOutputInternal() {
|
||||
// Flush anything left over.
|
||||
drawEngine_.Flush(curCmd_);
|
||||
|
||||
shaderManager_->DirtyLastShader();
|
||||
shaderManagerVulkan_->DirtyLastShader();
|
||||
|
||||
framebufferManagerVulkan_->CopyDisplayToOutput();
|
||||
|
||||
@ -783,8 +784,7 @@ void GPU_Vulkan::Execute_Prim(u32 op, u32 diff) {
|
||||
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
|
||||
drawEngine_.SetupVertexDecoder(gstate.vertType);
|
||||
// Rough estimate, not sure what's correct.
|
||||
int vertexCost = drawEngine_.EstimatePerVertexCost();
|
||||
cyclesExecuted += vertexCost * count;
|
||||
cyclesExecuted += EstimatePerVertexCost() * count;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -812,7 +812,7 @@ void GPU_Vulkan::Execute_Prim(u32 op, u32 diff) {
|
||||
int bytesRead = 0;
|
||||
drawEngine_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
|
||||
|
||||
int vertexCost = drawEngine_.EstimatePerVertexCost();
|
||||
int vertexCost = EstimatePerVertexCost();
|
||||
gpuStats.vertexGPUCycles += vertexCost * count;
|
||||
cyclesExecuted += vertexCost * count;
|
||||
|
||||
@ -1097,166 +1097,6 @@ void GPU_Vulkan::Execute_ColorRef(u32 op, u32 diff) {
|
||||
shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF);
|
||||
}
|
||||
|
||||
void GPU_Vulkan::Execute_WorldMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_WORLDMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.worldMatrix + (op & 0xF));
|
||||
const int end = 12 - (op & 0xF);
|
||||
int i = 0;
|
||||
|
||||
while ((src[i] >> 24) == GE_CMD_WORLDMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op + count) & 0xF);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPU_Vulkan::Execute_WorldMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.worldmtxnum & 0xF;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) {
|
||||
Flush();
|
||||
((u32 *)gstate.worldMatrix)[num] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
|
||||
}
|
||||
num++;
|
||||
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF);
|
||||
}
|
||||
|
||||
void GPU_Vulkan::Execute_ViewMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_VIEWMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.viewMatrix + (op & 0xF));
|
||||
const int end = 12 - (op & 0xF);
|
||||
int i = 0;
|
||||
|
||||
while ((src[i] >> 24) == GE_CMD_VIEWMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op + count) & 0xF);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPU_Vulkan::Execute_ViewMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.viewmtxnum & 0xF;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) {
|
||||
Flush();
|
||||
((u32 *)gstate.viewMatrix)[num] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
|
||||
}
|
||||
num++;
|
||||
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF);
|
||||
}
|
||||
|
||||
void GPU_Vulkan::Execute_ProjMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_PROJMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.projMatrix + (op & 0xF));
|
||||
const int end = 16 - (op & 0xF);
|
||||
int i = 0;
|
||||
|
||||
while ((src[i] >> 24) == GE_CMD_PROJMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op + count) & 0xF);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPU_Vulkan::Execute_ProjMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.projmtxnum & 0xF;
|
||||
u32 newVal = op << 8;
|
||||
if (newVal != ((const u32 *)gstate.projMatrix)[num]) {
|
||||
Flush();
|
||||
((u32 *)gstate.projMatrix)[num] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
|
||||
}
|
||||
num++;
|
||||
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF);
|
||||
}
|
||||
|
||||
void GPU_Vulkan::Execute_TgenMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_TGENMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
u32 *dst = (u32 *)(gstate.tgenMatrix + (op & 0xF));
|
||||
const int end = 12 - (op & 0xF);
|
||||
int i = 0;
|
||||
|
||||
while ((src[i] >> 24) == GE_CMD_TGENMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = i;
|
||||
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op + count) & 0xF);
|
||||
|
||||
// Skip over the loaded data, it's done now.
|
||||
UpdatePC(currentList->pc, currentList->pc + count * 4);
|
||||
currentList->pc += count * 4;
|
||||
}
|
||||
|
||||
void GPU_Vulkan::Execute_TgenMtxData(u32 op, u32 diff) {
|
||||
// Note: it's uncommon to get here now, see above.
|
||||
int num = gstate.texmtxnum & 0xF;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) {
|
||||
Flush();
|
||||
((u32 *)gstate.tgenMatrix)[num] = newVal;
|
||||
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
|
||||
}
|
||||
num++;
|
||||
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF);
|
||||
}
|
||||
|
||||
void GPU_Vulkan::Execute_BoneMtxNum(u32 op, u32 diff) {
|
||||
// This is almost always followed by GE_CMD_BONEMATRIXDATA.
|
||||
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
|
||||
@ -1264,6 +1104,7 @@ void GPU_Vulkan::Execute_BoneMtxNum(u32 op, u32 diff) {
|
||||
const int end = 12 * 8 - (op & 0x7F);
|
||||
int i = 0;
|
||||
|
||||
// If we can't use software skinning, we have to flush and dirty.
|
||||
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
@ -1293,6 +1134,7 @@ void GPU_Vulkan::Execute_BoneMtxData(u32 op, u32 diff) {
|
||||
int num = gstate.boneMatrixNumber & 0x7F;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {
|
||||
// Bone matrices should NOT flush when software skinning is enabled!
|
||||
Flush();
|
||||
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
|
||||
((u32 *)gstate.boneMatrix)[num] = newVal;
|
||||
@ -1850,7 +1692,7 @@ void GPU_Vulkan::DeviceLost() {
|
||||
pipelineManager_->DeviceLost();
|
||||
textureCacheVulkan_->DeviceLost();
|
||||
depalShaderCache_.Clear();
|
||||
shaderManager_->ClearShaders();
|
||||
shaderManagerVulkan_->ClearShaders();
|
||||
}
|
||||
|
||||
void GPU_Vulkan::DeviceRestore() {
|
||||
@ -1863,7 +1705,7 @@ void GPU_Vulkan::DeviceRestore() {
|
||||
drawEngine_.DeviceRestore(vulkan_);
|
||||
pipelineManager_->DeviceRestore(vulkan_);
|
||||
textureCacheVulkan_->DeviceRestore(vulkan_);
|
||||
shaderManager_->DeviceRestore(vulkan_);
|
||||
shaderManagerVulkan_->DeviceRestore(vulkan_);
|
||||
}
|
||||
|
||||
void GPU_Vulkan::GetStats(char *buffer, size_t bufsize) {
|
||||
@ -1897,8 +1739,8 @@ void GPU_Vulkan::GetStats(char *buffer, size_t bufsize) {
|
||||
(int)textureCacheVulkan_->NumLoadedTextures(),
|
||||
gpuStats.numTexturesDecoded,
|
||||
gpuStats.numTextureInvalidations,
|
||||
shaderManager_->GetNumVertexShaders(),
|
||||
shaderManager_->GetNumFragmentShaders(),
|
||||
shaderManagerVulkan_->GetNumVertexShaders(),
|
||||
shaderManagerVulkan_->GetNumFragmentShaders(),
|
||||
pipelineManager_->GetNumPipelines(),
|
||||
drawStats.pushUBOSpaceUsed,
|
||||
drawStats.pushVertexSpaceUsed,
|
||||
@ -1930,7 +1772,7 @@ void GPU_Vulkan::DoState(PointerWrap &p) {
|
||||
|
||||
gstate_c.textureChanged = TEXCHANGE_UPDATED;
|
||||
framebufferManagerVulkan_->DestroyAllFBOs(true);
|
||||
shaderManager_->ClearShaders();
|
||||
shaderManagerVulkan_->ClearShaders();
|
||||
pipelineManager_->Clear();
|
||||
}
|
||||
}
|
||||
@ -1953,7 +1795,7 @@ std::vector<std::string> GPU_Vulkan::DebugGetShaderIDs(DebugShaderType type) {
|
||||
} else if (type == SHADER_TYPE_PIPELINE) {
|
||||
return pipelineManager_->DebugGetObjectIDs(type);
|
||||
} else {
|
||||
return shaderManager_->DebugGetShaderIDs(type);
|
||||
return shaderManagerVulkan_->DebugGetShaderIDs(type);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1963,6 +1805,6 @@ std::string GPU_Vulkan::DebugGetShaderString(std::string id, DebugShaderType typ
|
||||
} else if (type == SHADER_TYPE_PIPELINE) {
|
||||
return pipelineManager_->DebugGetObjectString(id, type, stringType);
|
||||
} else {
|
||||
return shaderManager_->DebugGetShaderString(id, type, stringType);
|
||||
return shaderManagerVulkan_->DebugGetShaderString(id, type, stringType);
|
||||
}
|
||||
}
|
||||
|
@ -117,14 +117,6 @@ public:
|
||||
void Execute_AlphaTest(u32 op, u32 diff);
|
||||
void Execute_StencilTest(u32 op, u32 diff);
|
||||
void Execute_ColorRef(u32 op, u32 diff);
|
||||
void Execute_WorldMtxNum(u32 op, u32 diff);
|
||||
void Execute_WorldMtxData(u32 op, u32 diff);
|
||||
void Execute_ViewMtxNum(u32 op, u32 diff);
|
||||
void Execute_ViewMtxData(u32 op, u32 diff);
|
||||
void Execute_ProjMtxNum(u32 op, u32 diff);
|
||||
void Execute_ProjMtxData(u32 op, u32 diff);
|
||||
void Execute_TgenMtxNum(u32 op, u32 diff);
|
||||
void Execute_TgenMtxData(u32 op, u32 diff);
|
||||
void Execute_BoneMtxNum(u32 op, u32 diff);
|
||||
void Execute_BoneMtxData(u32 op, u32 diff);
|
||||
|
||||
@ -162,7 +154,7 @@ private:
|
||||
DrawEngineVulkan drawEngine_;
|
||||
|
||||
// Manages shaders and UBO data
|
||||
ShaderManagerVulkan *shaderManager_;
|
||||
ShaderManagerVulkan *shaderManagerVulkan_;
|
||||
|
||||
// Manages state and pipeline objects
|
||||
PipelineManagerVulkan *pipelineManager_;
|
||||
|
@ -157,7 +157,7 @@ static void ConvertProjMatrixToVulkan(Matrix4x4 &in, bool invertedX, bool invert
|
||||
}
|
||||
|
||||
ShaderManagerVulkan::ShaderManagerVulkan(VulkanContext *vulkan)
|
||||
: vulkan_(vulkan), lastVShader_(nullptr), lastFShader_(nullptr), globalDirty_(0xFFFFFFFF) {
|
||||
: vulkan_(vulkan), lastVShader_(nullptr), lastFShader_(nullptr) {
|
||||
codeBuffer_ = new char[16384];
|
||||
uboAlignment_ = vulkan_->GetPhysicalDeviceProperties().limits.minUniformBufferOffsetAlignment;
|
||||
memset(&ub_base, 0, sizeof(ub_base));
|
||||
|
@ -184,7 +184,7 @@ protected:
|
||||
|
||||
class VulkanPushBuffer;
|
||||
|
||||
class ShaderManagerVulkan {
|
||||
class ShaderManagerVulkan : public ShaderManagerCommon {
|
||||
public:
|
||||
ShaderManagerVulkan(VulkanContext *vulkan);
|
||||
~ShaderManagerVulkan();
|
||||
@ -204,10 +204,6 @@ public:
|
||||
|
||||
uint32_t UpdateUniforms();
|
||||
|
||||
void DirtyUniform(uint64_t what) {
|
||||
globalDirty_ |= what;
|
||||
}
|
||||
|
||||
// TODO: Avoid copying these buffers if same as last draw, can still point to it assuming we're still in the same pushbuffer.
|
||||
// Applies dirty changes and copies the buffer.
|
||||
bool IsBaseDirty() { return true; }
|
||||
@ -235,7 +231,6 @@ private:
|
||||
|
||||
char *codeBuffer_;
|
||||
|
||||
uint64_t globalDirty_;
|
||||
uint64_t uboAlignment_;
|
||||
// Uniform block scratchpad. These (the relevant ones) are copied to the current pushbuffer at draw time.
|
||||
UB_VS_FS_Base ub_base;
|
||||
|
Loading…
Reference in New Issue
Block a user