Merge pull request #9228 from hrydgard/centralize-matrix-loads

Centralize matrix loads
This commit is contained in:
Henrik Rydgård 2017-01-23 22:47:08 +01:00 committed by GitHub
commit 58f731d734
17 changed files with 406 additions and 837 deletions

View File

@ -58,35 +58,6 @@ public:
std::vector<std::string> DebugGetVertexLoaderIDs();
std::string DebugGetVertexLoaderString(std::string id, DebugShaderStringType stringType);
int EstimatePerVertexCost() {
// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
// runs in parallel with transform.
// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.
// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things
// went too fast and starts doing all the work over again).
int cost = 20;
if (gstate.isLightingEnabled()) {
cost += 10;
for (int i = 0; i < 4; i++) {
if (gstate.isLightChanEnabled(i))
cost += 10;
}
}
if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) {
cost += 20;
}
int morphCount = gstate.getNumMorphWeights();
if (morphCount > 1) {
cost += 5 * morphCount;
}
return cost;
}
protected:
// Preprocessing for spline/bezier
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType);

View File

@ -84,5 +84,21 @@ enum : uint64_t {
DIRTY_SPLINETYPEV = 1ULL << 36,
DIRTY_BONE_UNIFORMS = 0xFF000000ULL,
DIRTY_ALL_UNIFORMS = 0x1FFFFFFFFFULL,
// Now we can add further dirty flags that are not uniforms.
DIRTY_ALL = 0xFFFFFFFFFFFFFFFF
};
class ShaderManagerCommon {
public:
ShaderManagerCommon() : globalDirty_(DIRTY_ALL) {}
virtual ~ShaderManagerCommon() {}
void DirtyUniform(u64 what) {
globalDirty_ |= what;
}
protected:
uint64_t globalDirty_;
};

View File

@ -332,7 +332,7 @@ static const CommandTableEntry commandTable[] = {
// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
{GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, &GPU_DX9::Execute_LoadClut},
{GE_CMD_TRANSFERSTART, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC, &GPU_DX9::Execute_BlockTransferStart},
{GE_CMD_TRANSFERSTART, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC, &GPUCommon::Execute_BlockTransferStart},
// We don't use the dither table.
{GE_CMD_DITH0},
@ -341,16 +341,16 @@ static const CommandTableEntry commandTable[] = {
{GE_CMD_DITH3},
// These handle their own flushing.
{GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_WorldMtxNum},
{GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, &GPU_DX9::Execute_WorldMtxData},
{GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_ViewMtxNum},
{GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, &GPU_DX9::Execute_ViewMtxData},
{GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_ProjMtxNum},
{GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, &GPU_DX9::Execute_ProjMtxData},
{GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_TgenMtxNum},
{GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, &GPU_DX9::Execute_TgenMtxData},
{GE_CMD_BONEMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_BoneMtxNum},
{GE_CMD_BONEMATRIXDATA, FLAG_EXECUTE, &GPU_DX9::Execute_BoneMtxData},
{GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_WorldMtxNum},
{GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, &GPUCommon::Execute_WorldMtxData},
{GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_ViewMtxNum},
{GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, &GPUCommon::Execute_ViewMtxData},
{GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_ProjMtxNum},
{GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, &GPUCommon::Execute_ProjMtxData},
{GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_TgenMtxNum},
{GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, &GPUCommon::Execute_TgenMtxData},
{GE_CMD_BONEMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_BoneMtxNum},
{GE_CMD_BONEMATRIXDATA, FLAG_EXECUTE, &GPUCommon::Execute_BoneMtxData},
// Vertex Screen/Texture/Color
{ GE_CMD_VSCX, FLAG_EXECUTE },
@ -398,23 +398,24 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx)
lastVsync_ = g_Config.bVSync ? 1 : 0;
dxstate.SetVSyncInterval(g_Config.bVSync);
shaderManagerDX9_ = new ShaderManagerDX9();
framebufferManagerDX9_ = new FramebufferManagerDX9();
framebufferManager_ = framebufferManagerDX9_;
textureCacheDX9_ = new TextureCacheDX9();
textureCache_ = textureCacheDX9_;
drawEngineCommon_ = &drawEngine_;
shaderManager_ = shaderManagerDX9_;
shaderManager_ = new ShaderManagerDX9();
drawEngine_.SetShaderManager(shaderManager_);
drawEngine_.SetShaderManager(shaderManagerDX9_);
drawEngine_.SetTextureCache(textureCacheDX9_);
drawEngine_.SetFramebufferManager(framebufferManagerDX9_);
framebufferManagerDX9_->Init();
framebufferManagerDX9_->SetTextureCache(textureCacheDX9_);
framebufferManagerDX9_->SetShaderManager(shaderManager_);
framebufferManagerDX9_->SetShaderManager(shaderManagerDX9_);
framebufferManagerDX9_->SetTransformDrawEngine(&drawEngine_);
textureCacheDX9_->SetFramebufferManager(framebufferManagerDX9_);
textureCacheDX9_->SetDepalShaderCache(&depalShaderCache_);
textureCacheDX9_->SetShaderManager(shaderManager_);
textureCacheDX9_->SetShaderManager(shaderManagerDX9_);
// Sanity check gstate
if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) {
@ -507,8 +508,8 @@ void GPU_DX9::CheckGPUFeatures() {
GPU_DX9::~GPU_DX9() {
framebufferManagerDX9_->DestroyAllFBOs(true);
shaderManager_->ClearCache(true);
delete shaderManager_;
shaderManagerDX9_->ClearCache(true);
delete shaderManagerDX9_;
}
// Needs to be called on GPU thread, not reporting thread.
@ -525,7 +526,7 @@ void GPU_DX9::BuildReportingInfo() {
void GPU_DX9::DeviceLost() {
// Simply drop all caches and textures.
// FBOs appear to survive? Or no?
shaderManager_->ClearCache(false);
shaderManagerDX9_->ClearCache(false);
textureCacheDX9_->Clear(false);
framebufferManagerDX9_->DeviceLost();
}
@ -585,7 +586,7 @@ void GPU_DX9::BeginFrameInternal() {
} else if (dumpThisFrame_) {
dumpThisFrame_ = false;
}
shaderManager_->DirtyShader();
shaderManagerDX9_->DirtyShader();
framebufferManagerDX9_->BeginFrame();
}
@ -639,7 +640,7 @@ void GPU_DX9::CopyDisplayToOutputInternal() {
framebufferManagerDX9_->EndFrame();
// shaderManager_->EndFrame();
shaderManager_->DirtyLastShader();
shaderManagerDX9_->DirtyLastShader();
gstate_c.textureChanged = TEXCHANGE_UPDATED;
}
@ -755,8 +756,7 @@ void GPU_DX9::Execute_Prim(u32 op, u32 diff) {
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
drawEngine_.SetupVertexDecoder(gstate.vertType);
// Rough estimate, not sure what's correct.
int vertexCost = drawEngine_.EstimatePerVertexCost();
cyclesExecuted += vertexCost * count;
cyclesExecuted += EstimatePerVertexCost() * count;
return;
}
@ -787,7 +787,7 @@ void GPU_DX9::Execute_Prim(u32 op, u32 diff) {
int bytesRead = 0;
drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
int vertexCost = drawEngine_.EstimatePerVertexCost() * count;
int vertexCost = EstimatePerVertexCost() * count;
gpuStats.vertexGPUCycles += vertexCost;
cyclesExecuted += vertexCost;
@ -1066,230 +1066,6 @@ void GPU_DX9::Execute_ColorRef(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF);
}
void GPU_DX9::Execute_WorldMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_WORLDMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.worldMatrix + (op & 0xF));
const int end = 12 - (op & 0xF);
int i = 0;
while ((src[i] >> 24) == GE_CMD_WORLDMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
}
if (++i >= end) {
break;
}
}
const int count = i;
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op + count) & 0xF);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPU_DX9::Execute_WorldMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.worldmtxnum & 0xF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) {
Flush();
((u32 *)gstate.worldMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
}
num++;
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF);
}
void GPU_DX9::Execute_ViewMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_VIEWMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.viewMatrix + (op & 0xF));
const int end = 12 - (op & 0xF);
int i = 0;
while ((src[i] >> 24) == GE_CMD_VIEWMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
}
if (++i >= end) {
break;
}
}
const int count = i;
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op + count) & 0xF);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPU_DX9::Execute_ViewMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.viewmtxnum & 0xF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) {
Flush();
((u32 *)gstate.viewMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
}
num++;
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF);
}
void GPU_DX9::Execute_ProjMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_PROJMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.projMatrix + (op & 0xF));
const int end = 16 - (op & 0xF);
int i = 0;
while ((src[i] >> 24) == GE_CMD_PROJMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
}
if (++i >= end) {
break;
}
}
const int count = i;
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op + count) & 0xF);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPU_DX9::Execute_ProjMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.projmtxnum & 0xF;
u32 newVal = op << 8;
if (newVal != ((const u32 *)gstate.projMatrix)[num]) {
Flush();
((u32 *)gstate.projMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
}
num++;
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF);
}
void GPU_DX9::Execute_TgenMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_TGENMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.tgenMatrix + (op & 0xF));
const int end = 12 - (op & 0xF);
int i = 0;
while ((src[i] >> 24) == GE_CMD_TGENMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
}
if (++i >= end) {
break;
}
}
const int count = i;
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op + count) & 0xF);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPU_DX9::Execute_TgenMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.texmtxnum & 0xF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) {
Flush();
((u32 *)gstate.tgenMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
}
num++;
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF);
}
void GPU_DX9::Execute_BoneMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_BONEMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.boneMatrix + (op & 0x7F));
const int end = 12 * 8 - (op & 0x7F);
int i = 0;
// If we can't use software skinning, we have to flush and dirty.
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
}
if (++i >= end) {
break;
}
}
const int numPlusCount = (op & 0x7F) + i;
for (int num = op & 0x7F; num < numPlusCount; num += 12) {
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
}
} else {
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
dst[i] = src[i] << 8;
if (++i >= end) {
break;
}
}
const int numPlusCount = (op & 0x7F) + i;
for (int num = op & 0x7F; num < numPlusCount; num += 12) {
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
}
}
const int count = i;
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((op + count) & 0x7F);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPU_DX9::Execute_BoneMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.boneMatrixNumber & 0x7F;
u32 newVal = op << 8;
if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {
// Bone matrices should NOT flush when software skinning is enabled!
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
Flush();
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
} else {
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
}
((u32 *)gstate.boneMatrix)[num] = newVal;
}
num++;
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
}
void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
u32 cmd = op >> 24;
u32 data = op & 0xFFFFFF;
@ -1514,7 +1290,8 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
case GE_CMD_LAC0:
case GE_CMD_LDC0:
case GE_CMD_LSC0:
shaderManager_->DirtyUniform(DIRTY_LIGHT0);
if (diff)
shaderManager_->DirtyUniform(DIRTY_LIGHT0);
break;
case GE_CMD_LX1:case GE_CMD_LY1:case GE_CMD_LZ1:
@ -1525,7 +1302,8 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
case GE_CMD_LAC1:
case GE_CMD_LDC1:
case GE_CMD_LSC1:
shaderManager_->DirtyUniform(DIRTY_LIGHT1);
if (diff)
shaderManager_->DirtyUniform(DIRTY_LIGHT1);
break;
case GE_CMD_LX2:case GE_CMD_LY2:case GE_CMD_LZ2:
case GE_CMD_LDX2:case GE_CMD_LDY2:case GE_CMD_LDZ2:
@ -1535,7 +1313,8 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
case GE_CMD_LAC2:
case GE_CMD_LDC2:
case GE_CMD_LSC2:
shaderManager_->DirtyUniform(DIRTY_LIGHT2);
if (diff)
shaderManager_->DirtyUniform(DIRTY_LIGHT2);
break;
case GE_CMD_LX3:case GE_CMD_LY3:case GE_CMD_LZ3:
case GE_CMD_LDX3:case GE_CMD_LDY3:case GE_CMD_LDZ3:
@ -1545,7 +1324,8 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
case GE_CMD_LAC3:
case GE_CMD_LDC3:
case GE_CMD_LSC3:
shaderManager_->DirtyUniform(DIRTY_LIGHT3);
if (diff)
shaderManager_->DirtyUniform(DIRTY_LIGHT3);
break;
case GE_CMD_VIEWPORTXSCALE:
@ -1554,7 +1334,8 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
case GE_CMD_VIEWPORTYCENTER:
case GE_CMD_VIEWPORTZSCALE:
case GE_CMD_VIEWPORTZCENTER:
Execute_ViewportType(op, diff);
if (diff)
Execute_ViewportType(op, diff);
break;
case GE_CMD_LIGHTENABLE0:
@ -1761,23 +1542,6 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
}
}
void GPU_DX9::FastLoadBoneMatrix(u32 target) {
const int num = gstate.boneMatrixNumber & 0x7F;
const int mtxNum = num / 12;
uint32_t uniformsToDirty = DIRTY_BONEMATRIX0 << mtxNum;
if ((num - 12 * mtxNum) != 0) {
uniformsToDirty |= DIRTY_BONEMATRIX0 << ((mtxNum + 1) & 7);
}
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
Flush();
shaderManager_->DirtyUniform(uniformsToDirty);
} else {
gstate_c.deferredVertTypeDirty |= uniformsToDirty;
}
gstate.FastLoadBoneMatrix(target);
}
void GPU_DX9::GetStats(char *buffer, size_t bufsize) {
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
snprintf(buffer, bufsize - 1,
@ -1807,8 +1571,8 @@ void GPU_DX9::GetStats(char *buffer, size_t bufsize) {
(int)textureCacheDX9_->NumLoadedTextures(),
gpuStats.numTexturesDecoded,
gpuStats.numTextureInvalidations,
shaderManager_->NumVertexShaders(),
shaderManager_->NumFragmentShaders()
shaderManagerDX9_->NumVertexShaders(),
shaderManagerDX9_->NumFragmentShaders()
);
}
@ -1817,7 +1581,7 @@ void GPU_DX9::ClearCacheNextFrame() {
}
void GPU_DX9::ClearShaderCache() {
shaderManager_->ClearCache(true);
shaderManagerDX9_->ClearCache(true);
}
std::vector<FramebufferInfo> GPU_DX9::GetFramebufferList() {
@ -1835,7 +1599,7 @@ void GPU_DX9::DoState(PointerWrap &p) {
gstate_c.textureChanged = TEXCHANGE_UPDATED;
framebufferManagerDX9_->DestroyAllFBOs(true);
shaderManager_->ClearCache(true);
shaderManagerDX9_->ClearCache(true);
}
}
@ -1956,7 +1720,7 @@ std::vector<std::string> GPU_DX9::DebugGetShaderIDs(DebugShaderType type) {
if (type == SHADER_TYPE_VERTEXLOADER) {
return drawEngine_.DebugGetVertexLoaderIDs();
} else {
return shaderManager_->DebugGetShaderIDs(type);
return shaderManagerDX9_->DebugGetShaderIDs(type);
}
}
@ -1964,7 +1728,7 @@ std::string GPU_DX9::DebugGetShaderString(std::string id, DebugShaderType type,
if (type == SHADER_TYPE_VERTEXLOADER) {
return drawEngine_.DebugGetVertexLoaderString(id, stringType);
} else {
return shaderManager_->DebugGetShaderString(id, type, stringType);
return shaderManagerDX9_->DebugGetShaderString(id, type, stringType);
}
}

View File

@ -125,24 +125,12 @@ public:
void Execute_StencilTest(u32 op, u32 diff);
void Execute_ColorRef(u32 op, u32 diff);
void Execute_WorldMtxNum(u32 op, u32 diff);
void Execute_WorldMtxData(u32 op, u32 diff);
void Execute_ViewMtxNum(u32 op, u32 diff);
void Execute_ViewMtxData(u32 op, u32 diff);
void Execute_ProjMtxNum(u32 op, u32 diff);
void Execute_ProjMtxData(u32 op, u32 diff);
void Execute_TgenMtxNum(u32 op, u32 diff);
void Execute_TgenMtxData(u32 op, u32 diff);
void Execute_BoneMtxNum(u32 op, u32 diff);
void Execute_BoneMtxData(u32 op, u32 diff);
// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override;
protected:
void FastRunLoop(DisplayList &list) override;
void FastLoadBoneMatrix(u32 target) override;
void FinishDeferred() override;
private:
@ -163,7 +151,7 @@ private:
TextureCacheDX9 *textureCacheDX9_;
DepalShaderCacheDX9 depalShaderCache_;
DrawEngineDX9 drawEngine_;
ShaderManagerDX9 *shaderManager_;
ShaderManagerDX9 *shaderManagerDX9_;
static CommandInfo cmdInfo_[256];

View File

@ -505,7 +505,7 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) {
}
}
ShaderManagerDX9::ShaderManagerDX9() : lastVShader_(nullptr), lastPShader_(nullptr), globalDirty_(0xFFFFFFFF) {
ShaderManagerDX9::ShaderManagerDX9() : lastVShader_(nullptr), lastPShader_(nullptr) {
codeBuffer_ = new char[16384];
}

View File

@ -76,7 +76,7 @@ protected:
ShaderID id_;
};
class ShaderManagerDX9 {
class ShaderManagerDX9 : public ShaderManagerCommon {
public:
ShaderManagerDX9();
~ShaderManagerDX9();
@ -84,9 +84,6 @@ public:
void ClearCache(bool deleteThem); // TODO: deleteThem currently not respected
VSShader *ApplyShader(int prim, u32 vertType);
void DirtyShader();
void DirtyUniform(u64 what) {
globalDirty_ |= what;
}
void DirtyLastShader();
int NumVertexShaders() const { return (int)vsCache_.size(); }
@ -119,7 +116,6 @@ private:
ShaderID lastFSID_;
ShaderID lastVSID_;
u64 globalDirty_;
char *codeBuffer_;
VSShader *lastVShader_;

View File

@ -401,24 +401,26 @@ GPU_GLES::GPU_GLES(GraphicsContext *ctx)
UpdateVsyncInterval(true);
CheckGPUFeatures();
shaderManager_ = new ShaderManagerGLES();
shaderManagerGL_ = new ShaderManagerGLES();
framebufferManagerGL_ = new FramebufferManagerGLES();
framebufferManager_ = framebufferManagerGL_;
textureCacheGL_ = new TextureCacheGLES();
textureCache_ = textureCacheGL_;
drawEngineCommon_ = &drawEngine_;
shaderManager_ = shaderManagerGL_;
drawEngineCommon_ = &drawEngine_;
drawEngine_.SetShaderManager(shaderManager_);
drawEngine_.SetShaderManager(shaderManagerGL_);
drawEngine_.SetTextureCache(textureCacheGL_);
drawEngine_.SetFramebufferManager(framebufferManagerGL_);
drawEngine_.SetFragmentTestCache(&fragmentTestCache_);
framebufferManagerGL_->Init();
framebufferManagerGL_->SetTextureCache(textureCacheGL_);
framebufferManagerGL_->SetShaderManager(shaderManager_);
framebufferManagerGL_->SetShaderManager(shaderManagerGL_);
framebufferManagerGL_->SetTransformDrawEngine(&drawEngine_);
textureCacheGL_->SetFramebufferManager(framebufferManagerGL_);
textureCacheGL_->SetDepalShaderCache(&depalShaderCache_);
textureCacheGL_->SetShaderManager(shaderManager_);
textureCacheGL_->SetShaderManager(shaderManagerGL_);
textureCacheGL_->SetTransformDrawEngine(&drawEngine_);
fragmentTestCache_.SetTextureCache(textureCacheGL_);
@ -470,20 +472,20 @@ GPU_GLES::GPU_GLES(GraphicsContext *ctx)
if (discID.size()) {
File::CreateFullPath(GetSysDirectory(DIRECTORY_APP_CACHE));
shaderCachePath_ = GetSysDirectory(DIRECTORY_APP_CACHE) + "/" + g_paramSFO.GetValueString("DISC_ID") + ".glshadercache";
shaderManager_->LoadAndPrecompile(shaderCachePath_);
shaderManagerGL_->LoadAndPrecompile(shaderCachePath_);
}
}
GPU_GLES::~GPU_GLES() {
framebufferManagerGL_->DestroyAllFBOs(true);
shaderManager_->ClearCache(true);
shaderManagerGL_->ClearCache(true);
depalShaderCache_.Clear();
fragmentTestCache_.Clear();
if (!shaderCachePath_.empty()) {
shaderManager_->Save(shaderCachePath_);
shaderManagerGL_->Save(shaderCachePath_);
}
delete shaderManager_;
shaderManager_ = nullptr;
delete shaderManagerGL_;
shaderManagerGL_ = nullptr;
#ifdef _WIN32
gfxCtx_->SwapInterval(0);
@ -652,7 +654,7 @@ void GPU_GLES::DeviceLost() {
// Simply drop all caches and textures.
// FBOs appear to survive? Or no?
// TransformDraw has registered as a GfxResourceHolder.
shaderManager_->ClearCache(false);
shaderManagerGL_->ClearCache(false);
textureCacheGL_->Clear(false);
fragmentTestCache_.Clear(false);
depalShaderCache_.Clear();
@ -662,6 +664,7 @@ void GPU_GLES::DeviceLost() {
void GPU_GLES::DeviceRestore() {
ILOG("GPU_GLES: DeviceRestore");
UpdateCmdInfo();
UpdateVsyncInterval(true);
}
@ -760,10 +763,10 @@ void GPU_GLES::BeginFrameInternal() {
// Save the cache from time to time. TODO: How often?
if (!shaderCachePath_.empty() && (gpuStats.numFlips & 1023) == 0) {
shaderManager_->Save(shaderCachePath_);
shaderManagerGL_->Save(shaderCachePath_);
}
shaderManager_->DirtyShader();
shaderManagerGL_->DirtyShader();
// Not sure if this is really needed.
shaderManager_->DirtyUniform(DIRTY_ALL);
@ -811,7 +814,7 @@ void GPU_GLES::CopyDisplayToOutputInternal() {
framebufferManagerGL_->RebindFramebuffer();
drawEngine_.Flush();
shaderManager_->DirtyLastShader();
shaderManagerGL_->DirtyLastShader();
glstate.depthWrite.set(GL_TRUE);
glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
@ -923,8 +926,7 @@ void GPU_GLES::Execute_Prim(u32 op, u32 diff) {
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
drawEngine_.SetupVertexDecoder(gstate.vertType);
// Rough estimate, not sure what's correct.
int vertexCost = drawEngine_.EstimatePerVertexCost();
cyclesExecuted += vertexCost * count;
cyclesExecuted += EstimatePerVertexCost() * count;
return;
}
@ -952,7 +954,7 @@ void GPU_GLES::Execute_Prim(u32 op, u32 diff) {
int bytesRead = 0;
drawEngine_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
int vertexCost = drawEngine_.EstimatePerVertexCost();
int vertexCost = EstimatePerVertexCost();
gpuStats.vertexGPUCycles += vertexCost * count;
cyclesExecuted += vertexCost * count;
@ -1289,230 +1291,6 @@ void GPU_GLES::Execute_ColorRef(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF);
}
void GPU_GLES::Execute_WorldMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_WORLDMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.worldMatrix + (op & 0xF));
const int end = 12 - (op & 0xF);
int i = 0;
while ((src[i] >> 24) == GE_CMD_WORLDMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
}
if (++i >= end) {
break;
}
}
const int count = i;
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op + count) & 0xF);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPU_GLES::Execute_WorldMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.worldmtxnum & 0xF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) {
Flush();
((u32 *)gstate.worldMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
}
num++;
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF);
}
void GPU_GLES::Execute_ViewMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_VIEWMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.viewMatrix + (op & 0xF));
const int end = 12 - (op & 0xF);
int i = 0;
while ((src[i] >> 24) == GE_CMD_VIEWMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
}
if (++i >= end) {
break;
}
}
const int count = i;
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op + count) & 0xF);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPU_GLES::Execute_ViewMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.viewmtxnum & 0xF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) {
Flush();
((u32 *)gstate.viewMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
}
num++;
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF);
}
void GPU_GLES::Execute_ProjMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_PROJMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.projMatrix + (op & 0xF));
const int end = 16 - (op & 0xF);
int i = 0;
while ((src[i] >> 24) == GE_CMD_PROJMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
}
if (++i >= end) {
break;
}
}
const int count = i;
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op + count) & 0xF);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPU_GLES::Execute_ProjMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.projmtxnum & 0xF;
u32 newVal = op << 8;
if (newVal != ((const u32 *)gstate.projMatrix)[num]) {
Flush();
((u32 *)gstate.projMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
}
num++;
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF);
}
void GPU_GLES::Execute_TgenMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_TGENMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.tgenMatrix + (op & 0xF));
const int end = 12 - (op & 0xF);
int i = 0;
while ((src[i] >> 24) == GE_CMD_TGENMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
}
if (++i >= end) {
break;
}
}
const int count = i;
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op + count) & 0xF);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPU_GLES::Execute_TgenMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.texmtxnum & 0xF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) {
Flush();
((u32 *)gstate.tgenMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
}
num++;
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF);
}
void GPU_GLES::Execute_BoneMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_BONEMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.boneMatrix + (op & 0x7F));
const int end = 12 * 8 - (op & 0x7F);
int i = 0;
// If we can't use software skinning, we have to flush and dirty.
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
}
if (++i >= end) {
break;
}
}
const int numPlusCount = (op & 0x7F) + i;
for (int num = op & 0x7F; num < numPlusCount; num += 12) {
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
}
} else {
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
dst[i] = src[i] << 8;
if (++i >= end) {
break;
}
}
const int numPlusCount = (op & 0x7F) + i;
for (int num = op & 0x7F; num < numPlusCount; num += 12) {
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
}
}
const int count = i;
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((op + count) & 0x7F);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPU_GLES::Execute_BoneMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.boneMatrixNumber & 0x7F;
u32 newVal = op << 8;
if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {
// Bone matrices should NOT flush when software skinning is enabled!
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
Flush();
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
} else {
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
}
((u32 *)gstate.boneMatrix)[num] = newVal;
}
num++;
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
}
void GPU_GLES::Execute_Generic(u32 op, u32 diff) {
u32 cmd = op >> 24;
u32 data = op & 0xFFFFFF;
@ -2044,23 +1822,6 @@ void GPU_GLES::Execute_Generic(u32 op, u32 diff) {
}
}
void GPU_GLES::FastLoadBoneMatrix(u32 target) {
const int num = gstate.boneMatrixNumber & 0x7F;
const int mtxNum = num / 12;
uint32_t uniformsToDirty = DIRTY_BONEMATRIX0 << mtxNum;
if ((num - 12 * mtxNum) != 0) {
uniformsToDirty |= DIRTY_BONEMATRIX0 << ((mtxNum + 1) & 7);
}
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
Flush();
shaderManager_->DirtyUniform(uniformsToDirty);
} else {
gstate_c.deferredVertTypeDirty |= uniformsToDirty;
}
gstate.FastLoadBoneMatrix(target);
}
void GPU_GLES::GetStats(char *buffer, size_t bufsize) {
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
snprintf(buffer, bufsize - 1,
@ -2090,9 +1851,9 @@ void GPU_GLES::GetStats(char *buffer, size_t bufsize) {
(int)textureCacheGL_->NumLoadedTextures(),
gpuStats.numTexturesDecoded,
gpuStats.numTextureInvalidations,
shaderManager_->NumVertexShaders(),
shaderManager_->NumFragmentShaders(),
shaderManager_->NumPrograms());
shaderManagerGL_->NumVertexShaders(),
shaderManagerGL_->NumFragmentShaders(),
shaderManagerGL_->NumPrograms());
}
void GPU_GLES::ClearCacheNextFrame() {
@ -2100,12 +1861,12 @@ void GPU_GLES::ClearCacheNextFrame() {
}
void GPU_GLES::ClearShaderCache() {
shaderManager_->ClearCache(true);
shaderManagerGL_->ClearCache(true);
}
void GPU_GLES::CleanupBeforeUI() {
// Clear any enabled vertex arrays.
shaderManager_->DirtyLastShader();
shaderManagerGL_->DirtyLastShader();
glstate.arrayBuffer.bind(0);
glstate.elementArrayBuffer.bind(0);
}
@ -2127,7 +1888,7 @@ void GPU_GLES::DoState(PointerWrap &p) {
gstate_c.textureChanged = TEXCHANGE_UPDATED;
framebufferManagerGL_->DestroyAllFBOs(true);
shaderManager_->ClearCache(true);
shaderManagerGL_->ClearCache(true);
}
}
@ -2216,7 +1977,7 @@ std::vector<std::string> GPU_GLES::DebugGetShaderIDs(DebugShaderType type) {
if (type == SHADER_TYPE_VERTEXLOADER) {
return drawEngine_.DebugGetVertexLoaderIDs();
} else {
return shaderManager_->DebugGetShaderIDs(type);
return shaderManagerGL_->DebugGetShaderIDs(type);
}
}
@ -2224,6 +1985,6 @@ std::string GPU_GLES::DebugGetShaderString(std::string id, DebugShaderType type,
if (type == SHADER_TYPE_VERTEXLOADER) {
return drawEngine_.DebugGetVertexLoaderString(id, stringType);
} else {
return shaderManager_->DebugGetShaderString(id, type, stringType);
return shaderManagerGL_->DebugGetShaderString(id, type, stringType);
}
}

View File

@ -130,16 +130,6 @@ public:
void Execute_AlphaTest(u32 op, u32 diff);
void Execute_StencilTest(u32 op, u32 diff);
void Execute_ColorRef(u32 op, u32 diff);
void Execute_WorldMtxNum(u32 op, u32 diff);
void Execute_WorldMtxData(u32 op, u32 diff);
void Execute_ViewMtxNum(u32 op, u32 diff);
void Execute_ViewMtxData(u32 op, u32 diff);
void Execute_ProjMtxNum(u32 op, u32 diff);
void Execute_ProjMtxData(u32 op, u32 diff);
void Execute_TgenMtxNum(u32 op, u32 diff);
void Execute_TgenMtxData(u32 op, u32 diff);
void Execute_BoneMtxNum(u32 op, u32 diff);
void Execute_BoneMtxData(u32 op, u32 diff);
// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
@ -147,7 +137,6 @@ public:
protected:
void FastRunLoop(DisplayList &list) override;
void FastLoadBoneMatrix(u32 target) override;
void FinishDeferred() override;
private:
@ -172,7 +161,7 @@ private:
DepalShaderCacheGLES depalShaderCache_;
DrawEngineGLES drawEngine_;
FragmentTestCacheGLES fragmentTestCache_;
ShaderManagerGLES *shaderManager_;
ShaderManagerGLES *shaderManagerGL_;
int lastVsync_;

View File

@ -748,7 +748,7 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
}
ShaderManagerGLES::ShaderManagerGLES()
: lastShader_(nullptr), globalDirty_(DIRTY_ALL), shaderSwitchDirty_(0), diskCacheDirty_(false) {
: lastShader_(nullptr), shaderSwitchDirty_(0), diskCacheDirty_(false) {
codeBuffer_ = new char[16384];
lastFSID_.set_invalid();
lastVSID_.set_invalid();

View File

@ -140,7 +140,7 @@ private:
bool isFragment_;
};
class ShaderManagerGLES {
class ShaderManagerGLES : public ShaderManagerCommon {
public:
ShaderManagerGLES();
~ShaderManagerGLES();
@ -153,9 +153,6 @@ public:
LinkedShader *ApplyFragmentShader(ShaderID VSID, Shader *vs, u32 vertType, int prim);
void DirtyShader();
void DirtyUniform(u64 what) {
globalDirty_ |= what;
}
void DirtyLastShader(); // disables vertex arrays
int NumVertexShaders() const { return (int)vsCache_.size(); }
@ -191,7 +188,6 @@ private:
ShaderID lastVSID_;
LinkedShader *lastShader_;
u64 globalDirty_;
u64 shaderSwitchDirty_;
char *codeBuffer_;

View File

@ -25,6 +25,10 @@
#include "GPU/Common/TextureCacheCommon.h"
#include "GPU/Common/DrawEngineCommon.h"
void GPUCommon::Flush() {
drawEngineCommon_->DispatchFlush();
}
GPUCommon::GPUCommon() :
dumpNextFrame_(false),
dumpThisFrame_(false),
@ -84,6 +88,35 @@ void GPUCommon::Reinitialize() {
ScheduleEvent(GPU_EVENT_REINITIALIZE);
}
int GPUCommon::EstimatePerVertexCost() {
// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
// runs in parallel with transform.
// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.
// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things
// went too fast and starts doing all the work over again).
int cost = 20;
if (gstate.isLightingEnabled()) {
cost += 10;
for (int i = 0; i < 4; i++) {
if (gstate.isLightChanEnabled(i))
cost += 10;
}
}
if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) {
cost += 20;
}
int morphCount = gstate.getNumMorphWeights();
if (morphCount > 1) {
cost += 5 * morphCount;
}
return cost;
}
void GPUCommon::PopDLQueue() {
easy_guard guard(listLock);
if(!dlQueue.empty()) {
@ -1163,6 +1196,230 @@ void GPUCommon::Execute_BlockTransferStart(u32 op, u32 diff) {
gstate_c.textureChanged = TEXCHANGE_UPDATED;
}
void GPUCommon::Execute_WorldMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_WORLDMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.worldMatrix + (op & 0xF));
const int end = 12 - (op & 0xF);
int i = 0;
while ((src[i] >> 24) == GE_CMD_WORLDMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
}
if (++i >= end) {
break;
}
}
const int count = i;
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op + count) & 0xF);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPUCommon::Execute_WorldMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.worldmtxnum & 0xF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) {
Flush();
((u32 *)gstate.worldMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
}
num++;
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF);
}
void GPUCommon::Execute_ViewMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_VIEWMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.viewMatrix + (op & 0xF));
const int end = 12 - (op & 0xF);
int i = 0;
while ((src[i] >> 24) == GE_CMD_VIEWMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
}
if (++i >= end) {
break;
}
}
const int count = i;
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op + count) & 0xF);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPUCommon::Execute_ViewMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.viewmtxnum & 0xF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) {
Flush();
((u32 *)gstate.viewMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
}
num++;
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF);
}
void GPUCommon::Execute_ProjMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_PROJMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.projMatrix + (op & 0xF));
const int end = 16 - (op & 0xF);
int i = 0;
while ((src[i] >> 24) == GE_CMD_PROJMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
}
if (++i >= end) {
break;
}
}
const int count = i;
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op + count) & 0xF);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPUCommon::Execute_ProjMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.projmtxnum & 0xF;
u32 newVal = op << 8;
if (newVal != ((const u32 *)gstate.projMatrix)[num]) {
Flush();
((u32 *)gstate.projMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
}
num++;
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF);
}
void GPUCommon::Execute_TgenMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_TGENMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.tgenMatrix + (op & 0xF));
const int end = 12 - (op & 0xF);
int i = 0;
while ((src[i] >> 24) == GE_CMD_TGENMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
}
if (++i >= end) {
break;
}
}
const int count = i;
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op + count) & 0xF);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPUCommon::Execute_TgenMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.texmtxnum & 0xF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) {
Flush();
((u32 *)gstate.tgenMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
}
num++;
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF);
}
void GPUCommon::Execute_BoneMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_BONEMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.boneMatrix + (op & 0x7F));
const int end = 12 * 8 - (op & 0x7F);
int i = 0;
// If we can't use software skinning, we have to flush and dirty.
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
}
if (++i >= end) {
break;
}
}
const int numPlusCount = (op & 0x7F) + i;
for (int num = op & 0x7F; num < numPlusCount; num += 12) {
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
}
} else {
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
dst[i] = src[i] << 8;
if (++i >= end) {
break;
}
}
const int numPlusCount = (op & 0x7F) + i;
for (int num = op & 0x7F; num < numPlusCount; num += 12) {
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
}
}
const int count = i;
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((op + count) & 0x7F);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPUCommon::Execute_BoneMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.boneMatrixNumber & 0x7F;
u32 newVal = op << 8;
if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {
// Bone matrices should NOT flush when software skinning is enabled!
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
Flush();
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
} else {
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
}
((u32 *)gstate.boneMatrix)[num] = newVal;
}
num++;
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
}
void GPUCommon::ExecuteOp(u32 op, u32 diff) {
const u32 cmd = op >> 24;
@ -1211,6 +1468,19 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) {
}
void GPUCommon::FastLoadBoneMatrix(u32 target) {
const int num = gstate.boneMatrixNumber & 0x7F;
const int mtxNum = num / 12;
uint32_t uniformsToDirty = DIRTY_BONEMATRIX0 << mtxNum;
if ((num - 12 * mtxNum) != 0) {
uniformsToDirty |= DIRTY_BONEMATRIX0 << ((mtxNum + 1) & 7);
}
if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
Flush();
shaderManager_->DirtyUniform(uniformsToDirty);
} else {
gstate_c.deferredVertTypeDirty |= uniformsToDirty;
}
gstate.FastLoadBoneMatrix(target);
}

View File

@ -85,6 +85,22 @@ public:
void Execute_BoundingBox(u32 op, u32 diff);
void Execute_BlockTransferStart(u32 op, u32 diff);
void Execute_WorldMtxNum(u32 op, u32 diff);
void Execute_WorldMtxData(u32 op, u32 diff);
void Execute_ViewMtxNum(u32 op, u32 diff);
void Execute_ViewMtxData(u32 op, u32 diff);
void Execute_ProjMtxNum(u32 op, u32 diff);
void Execute_ProjMtxData(u32 op, u32 diff);
void Execute_TgenMtxNum(u32 op, u32 diff);
void Execute_TgenMtxData(u32 op, u32 diff);
void Execute_BoneMtxNum(u32 op, u32 diff);
void Execute_BoneMtxData(u32 op, u32 diff);
int EstimatePerVertexCost();
// Note: Not virtual!
inline void Flush();
u64 GetTickEstimate() override {
#if defined(_M_X64) || defined(__ANDROID__)
return curTickEst_;
@ -207,6 +223,7 @@ protected:
FramebufferManagerCommon *framebufferManager_;
TextureCacheCommon *textureCache_;
DrawEngineCommon *drawEngineCommon_;
ShaderManagerCommon *shaderManager_;
typedef std::list<int> DisplayListQueue;

View File

@ -283,40 +283,12 @@ void SoftGPU::FastRunLoop(DisplayList &list) {
}
}
int EstimatePerVertexCost() {
// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
// runs in parallel with transform.
// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.
// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things
// went too fast and starts doing all the work over again).
int cost = 20;
if (gstate.isLightingEnabled()) {
cost += 10;
}
for (int i = 0; i < 4; i++) {
if (gstate.isLightChanEnabled(i))
cost += 10;
}
if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) {
cost += 20;
}
// TODO: morphcount
return cost;
}
void SoftGPU::ExecuteOp(u32 op, u32 diff)
{
void SoftGPU::ExecuteOp(u32 op, u32 diff) {
u32 cmd = op >> 24;
u32 data = op & 0xFFFFFF;
// Handle control and drawing commands here directly. The others we delegate.
switch (cmd)
{
switch (cmd) {
case GE_CMD_BASE:
break;

View File

@ -319,7 +319,7 @@ static const CommandTableEntry commandTable[] = {
{ GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_BJump }, // EXECUTE
{ GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_BoundingBox }, // + FLUSHBEFORE when we implement... or not, do we need to?
// Changing the vertex type requires us to flush.
// Changing the vertex type requires us to flush.
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPU_Vulkan::Execute_VertexType },
{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_Bezier },
@ -340,14 +340,14 @@ static const CommandTableEntry commandTable[] = {
{ GE_CMD_DITH3 },
// These handle their own flushing.
{ GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPU_Vulkan::Execute_WorldMtxNum },
{ GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_WorldMtxData },
{ GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPU_Vulkan::Execute_ViewMtxNum },
{ GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_ViewMtxData },
{ GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPU_Vulkan::Execute_ProjMtxNum },
{ GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_ProjMtxData },
{ GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPU_Vulkan::Execute_TgenMtxNum },
{ GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_TgenMtxData },
{ GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_WorldMtxNum },
{ GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommon::Execute_WorldMtxData },
{ GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_ViewMtxNum },
{ GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommon::Execute_ViewMtxData },
{ GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_ProjMtxNum },
{ GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommon::Execute_ProjMtxData },
{ GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_TgenMtxNum },
{ GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommon::Execute_TgenMtxData },
{ GE_CMD_BONEMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPU_Vulkan::Execute_BoneMtxNum },
{ GE_CMD_BONEMATRIXDATA, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_BoneMtxData },
@ -397,24 +397,25 @@ GPU_Vulkan::GPU_Vulkan(GraphicsContext *ctx)
UpdateVsyncInterval(true);
CheckGPUFeatures();
shaderManager_ = new ShaderManagerVulkan(vulkan_);
shaderManagerVulkan_ = new ShaderManagerVulkan(vulkan_);
pipelineManager_ = new PipelineManagerVulkan(vulkan_);
framebufferManagerVulkan_ = new FramebufferManagerVulkan(vulkan_);
framebufferManager_ = framebufferManagerVulkan_;
textureCacheVulkan_ = new TextureCacheVulkan(vulkan_);
textureCache_ = textureCacheVulkan_;
drawEngineCommon_ = &drawEngine_;
shaderManager_ = shaderManagerVulkan_;
drawEngine_.SetTextureCache(textureCacheVulkan_);
drawEngine_.SetFramebufferManager(framebufferManagerVulkan_);
drawEngine_.SetShaderManager(shaderManager_);
drawEngine_.SetShaderManager(shaderManagerVulkan_);
drawEngine_.SetPipelineManager(pipelineManager_);
framebufferManagerVulkan_->Init();
framebufferManagerVulkan_->SetTextureCache(textureCacheVulkan_);
framebufferManagerVulkan_->SetDrawEngine(&drawEngine_);
textureCacheVulkan_->SetFramebufferManager(framebufferManagerVulkan_);
textureCacheVulkan_->SetDepalShaderCache(&depalShaderCache_);
textureCacheVulkan_->SetShaderManager(shaderManager_);
textureCacheVulkan_->SetShaderManager(shaderManagerVulkan_);
textureCacheVulkan_->SetTransformDrawEngine(&drawEngine_);
// Sanity check gstate
@ -458,7 +459,7 @@ GPU_Vulkan::~GPU_Vulkan() {
framebufferManagerVulkan_->DestroyAllFBOs(true);
depalShaderCache_.Clear();
delete pipelineManager_;
delete shaderManager_;
delete shaderManagerVulkan_;
}
void GPU_Vulkan::CheckGPUFeatures() {
@ -506,7 +507,7 @@ void GPU_Vulkan::BeginHostFrame() {
framebufferManagerVulkan_->BeginFrameVulkan();
shaderManager_->DirtyShader();
shaderManagerVulkan_->DirtyShader();
shaderManager_->DirtyUniform(DIRTY_ALL);
if (dumpNextFrame_) {
@ -686,7 +687,7 @@ void GPU_Vulkan::CopyDisplayToOutputInternal() {
// Flush anything left over.
drawEngine_.Flush(curCmd_);
shaderManager_->DirtyLastShader();
shaderManagerVulkan_->DirtyLastShader();
framebufferManagerVulkan_->CopyDisplayToOutput();
@ -783,8 +784,7 @@ void GPU_Vulkan::Execute_Prim(u32 op, u32 diff) {
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
drawEngine_.SetupVertexDecoder(gstate.vertType);
// Rough estimate, not sure what's correct.
int vertexCost = drawEngine_.EstimatePerVertexCost();
cyclesExecuted += vertexCost * count;
cyclesExecuted += EstimatePerVertexCost() * count;
return;
}
@ -812,7 +812,7 @@ void GPU_Vulkan::Execute_Prim(u32 op, u32 diff) {
int bytesRead = 0;
drawEngine_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
int vertexCost = drawEngine_.EstimatePerVertexCost();
int vertexCost = EstimatePerVertexCost();
gpuStats.vertexGPUCycles += vertexCost * count;
cyclesExecuted += vertexCost * count;
@ -1097,166 +1097,6 @@ void GPU_Vulkan::Execute_ColorRef(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF);
}
void GPU_Vulkan::Execute_WorldMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_WORLDMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.worldMatrix + (op & 0xF));
const int end = 12 - (op & 0xF);
int i = 0;
while ((src[i] >> 24) == GE_CMD_WORLDMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
}
if (++i >= end) {
break;
}
}
const int count = i;
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op + count) & 0xF);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPU_Vulkan::Execute_WorldMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.worldmtxnum & 0xF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) {
Flush();
((u32 *)gstate.worldMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
}
num++;
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF);
}
void GPU_Vulkan::Execute_ViewMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_VIEWMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.viewMatrix + (op & 0xF));
const int end = 12 - (op & 0xF);
int i = 0;
while ((src[i] >> 24) == GE_CMD_VIEWMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
}
if (++i >= end) {
break;
}
}
const int count = i;
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op + count) & 0xF);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPU_Vulkan::Execute_ViewMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.viewmtxnum & 0xF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) {
Flush();
((u32 *)gstate.viewMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
}
num++;
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF);
}
void GPU_Vulkan::Execute_ProjMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_PROJMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.projMatrix + (op & 0xF));
const int end = 16 - (op & 0xF);
int i = 0;
while ((src[i] >> 24) == GE_CMD_PROJMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
}
if (++i >= end) {
break;
}
}
const int count = i;
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op + count) & 0xF);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPU_Vulkan::Execute_ProjMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.projmtxnum & 0xF;
u32 newVal = op << 8;
if (newVal != ((const u32 *)gstate.projMatrix)[num]) {
Flush();
((u32 *)gstate.projMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
}
num++;
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF);
}
void GPU_Vulkan::Execute_TgenMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_TGENMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.tgenMatrix + (op & 0xF));
const int end = 12 - (op & 0xF);
int i = 0;
while ((src[i] >> 24) == GE_CMD_TGENMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
}
if (++i >= end) {
break;
}
}
const int count = i;
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op + count) & 0xF);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
currentList->pc += count * 4;
}
void GPU_Vulkan::Execute_TgenMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.texmtxnum & 0xF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) {
Flush();
((u32 *)gstate.tgenMatrix)[num] = newVal;
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
}
num++;
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF);
}
void GPU_Vulkan::Execute_BoneMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_BONEMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
@ -1264,6 +1104,7 @@ void GPU_Vulkan::Execute_BoneMtxNum(u32 op, u32 diff) {
const int end = 12 * 8 - (op & 0x7F);
int i = 0;
// If we can't use software skinning, we have to flush and dirty.
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
@ -1293,6 +1134,7 @@ void GPU_Vulkan::Execute_BoneMtxData(u32 op, u32 diff) {
int num = gstate.boneMatrixNumber & 0x7F;
u32 newVal = op << 8;
if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {
// Bone matrices should NOT flush when software skinning is enabled!
Flush();
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
((u32 *)gstate.boneMatrix)[num] = newVal;
@ -1850,7 +1692,7 @@ void GPU_Vulkan::DeviceLost() {
pipelineManager_->DeviceLost();
textureCacheVulkan_->DeviceLost();
depalShaderCache_.Clear();
shaderManager_->ClearShaders();
shaderManagerVulkan_->ClearShaders();
}
void GPU_Vulkan::DeviceRestore() {
@ -1863,7 +1705,7 @@ void GPU_Vulkan::DeviceRestore() {
drawEngine_.DeviceRestore(vulkan_);
pipelineManager_->DeviceRestore(vulkan_);
textureCacheVulkan_->DeviceRestore(vulkan_);
shaderManager_->DeviceRestore(vulkan_);
shaderManagerVulkan_->DeviceRestore(vulkan_);
}
void GPU_Vulkan::GetStats(char *buffer, size_t bufsize) {
@ -1897,8 +1739,8 @@ void GPU_Vulkan::GetStats(char *buffer, size_t bufsize) {
(int)textureCacheVulkan_->NumLoadedTextures(),
gpuStats.numTexturesDecoded,
gpuStats.numTextureInvalidations,
shaderManager_->GetNumVertexShaders(),
shaderManager_->GetNumFragmentShaders(),
shaderManagerVulkan_->GetNumVertexShaders(),
shaderManagerVulkan_->GetNumFragmentShaders(),
pipelineManager_->GetNumPipelines(),
drawStats.pushUBOSpaceUsed,
drawStats.pushVertexSpaceUsed,
@ -1930,7 +1772,7 @@ void GPU_Vulkan::DoState(PointerWrap &p) {
gstate_c.textureChanged = TEXCHANGE_UPDATED;
framebufferManagerVulkan_->DestroyAllFBOs(true);
shaderManager_->ClearShaders();
shaderManagerVulkan_->ClearShaders();
pipelineManager_->Clear();
}
}
@ -1953,7 +1795,7 @@ std::vector<std::string> GPU_Vulkan::DebugGetShaderIDs(DebugShaderType type) {
} else if (type == SHADER_TYPE_PIPELINE) {
return pipelineManager_->DebugGetObjectIDs(type);
} else {
return shaderManager_->DebugGetShaderIDs(type);
return shaderManagerVulkan_->DebugGetShaderIDs(type);
}
}
@ -1963,6 +1805,6 @@ std::string GPU_Vulkan::DebugGetShaderString(std::string id, DebugShaderType typ
} else if (type == SHADER_TYPE_PIPELINE) {
return pipelineManager_->DebugGetObjectString(id, type, stringType);
} else {
return shaderManager_->DebugGetShaderString(id, type, stringType);
return shaderManagerVulkan_->DebugGetShaderString(id, type, stringType);
}
}

View File

@ -117,14 +117,6 @@ public:
void Execute_AlphaTest(u32 op, u32 diff);
void Execute_StencilTest(u32 op, u32 diff);
void Execute_ColorRef(u32 op, u32 diff);
void Execute_WorldMtxNum(u32 op, u32 diff);
void Execute_WorldMtxData(u32 op, u32 diff);
void Execute_ViewMtxNum(u32 op, u32 diff);
void Execute_ViewMtxData(u32 op, u32 diff);
void Execute_ProjMtxNum(u32 op, u32 diff);
void Execute_ProjMtxData(u32 op, u32 diff);
void Execute_TgenMtxNum(u32 op, u32 diff);
void Execute_TgenMtxData(u32 op, u32 diff);
void Execute_BoneMtxNum(u32 op, u32 diff);
void Execute_BoneMtxData(u32 op, u32 diff);
@ -162,7 +154,7 @@ private:
DrawEngineVulkan drawEngine_;
// Manages shaders and UBO data
ShaderManagerVulkan *shaderManager_;
ShaderManagerVulkan *shaderManagerVulkan_;
// Manages state and pipeline objects
PipelineManagerVulkan *pipelineManager_;

View File

@ -157,7 +157,7 @@ static void ConvertProjMatrixToVulkan(Matrix4x4 &in, bool invertedX, bool invert
}
ShaderManagerVulkan::ShaderManagerVulkan(VulkanContext *vulkan)
: vulkan_(vulkan), lastVShader_(nullptr), lastFShader_(nullptr), globalDirty_(0xFFFFFFFF) {
: vulkan_(vulkan), lastVShader_(nullptr), lastFShader_(nullptr) {
codeBuffer_ = new char[16384];
uboAlignment_ = vulkan_->GetPhysicalDeviceProperties().limits.minUniformBufferOffsetAlignment;
memset(&ub_base, 0, sizeof(ub_base));

View File

@ -184,7 +184,7 @@ protected:
class VulkanPushBuffer;
class ShaderManagerVulkan {
class ShaderManagerVulkan : public ShaderManagerCommon {
public:
ShaderManagerVulkan(VulkanContext *vulkan);
~ShaderManagerVulkan();
@ -204,10 +204,6 @@ public:
uint32_t UpdateUniforms();
void DirtyUniform(uint64_t what) {
globalDirty_ |= what;
}
// TODO: Avoid copying these buffers if same as last draw, can still point to it assuming we're still in the same pushbuffer.
// Applies dirty changes and copies the buffer.
bool IsBaseDirty() { return true; }
@ -235,7 +231,6 @@ private:
char *codeBuffer_;
uint64_t globalDirty_;
uint64_t uboAlignment_;
// Uniform block scratchpad. These (the relevant ones) are copied to the current pushbuffer at draw time.
UB_VS_FS_Base ub_base;