diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index 0d78a89a2b..e0cc74f3e8 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -58,35 +58,6 @@ public: std::vector DebugGetVertexLoaderIDs(); std::string DebugGetVertexLoaderString(std::string id, DebugShaderStringType stringType); - int EstimatePerVertexCost() { - // TODO: This is transform cost, also account for rasterization cost somehow... although it probably - // runs in parallel with transform. - - // Also, this is all pure guesswork. If we can find a way to do measurements, that would be great. - - // GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things - // went too fast and starts doing all the work over again). - - int cost = 20; - if (gstate.isLightingEnabled()) { - cost += 10; - - for (int i = 0; i < 4; i++) { - if (gstate.isLightChanEnabled(i)) - cost += 10; - } - } - - if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) { - cost += 20; - } - int morphCount = gstate.getNumMorphWeights(); - if (morphCount > 1) { - cost += 5 * morphCount; - } - return cost; - } - protected: // Preprocessing for spline/bezier u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType); diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index 6005396611..2a5f831439 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -84,5 +84,21 @@ enum : uint64_t { DIRTY_SPLINETYPEV = 1ULL << 36, DIRTY_BONE_UNIFORMS = 0xFF000000ULL, + + DIRTY_ALL_UNIFORMS = 0x1FFFFFFFFFULL, + + // Now we can add further dirty flags that are not uniforms. + DIRTY_ALL = 0xFFFFFFFFFFFFFFFF }; + +class ShaderManagerCommon { +public: + ShaderManagerCommon() : globalDirty_(DIRTY_ALL) {} + virtual ~ShaderManagerCommon() {} + void DirtyUniform(u64 what) { + globalDirty_ |= what; + } +protected: + uint64_t globalDirty_; +}; \ No newline at end of file diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 779bb21275..078510a03e 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -332,7 +332,7 @@ static const CommandTableEntry commandTable[] = { // Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack... {GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, &GPU_DX9::Execute_LoadClut}, - {GE_CMD_TRANSFERSTART, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC, &GPU_DX9::Execute_BlockTransferStart}, + {GE_CMD_TRANSFERSTART, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC, &GPUCommon::Execute_BlockTransferStart}, // We don't use the dither table. {GE_CMD_DITH0}, @@ -341,16 +341,16 @@ static const CommandTableEntry commandTable[] = { {GE_CMD_DITH3}, // These handle their own flushing. - {GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_WorldMtxNum}, - {GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, &GPU_DX9::Execute_WorldMtxData}, - {GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_ViewMtxNum}, - {GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, &GPU_DX9::Execute_ViewMtxData}, - {GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_ProjMtxNum}, - {GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, &GPU_DX9::Execute_ProjMtxData}, - {GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_TgenMtxNum}, - {GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, &GPU_DX9::Execute_TgenMtxData}, - {GE_CMD_BONEMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_BoneMtxNum}, - {GE_CMD_BONEMATRIXDATA, FLAG_EXECUTE, &GPU_DX9::Execute_BoneMtxData}, + {GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_WorldMtxNum}, + {GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, &GPUCommon::Execute_WorldMtxData}, + {GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_ViewMtxNum}, + {GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, &GPUCommon::Execute_ViewMtxData}, + {GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_ProjMtxNum}, + {GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, &GPUCommon::Execute_ProjMtxData}, + {GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_TgenMtxNum}, + {GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, &GPUCommon::Execute_TgenMtxData}, + {GE_CMD_BONEMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPUCommon::Execute_BoneMtxNum}, + {GE_CMD_BONEMATRIXDATA, FLAG_EXECUTE, &GPUCommon::Execute_BoneMtxData}, // Vertex Screen/Texture/Color { GE_CMD_VSCX, FLAG_EXECUTE }, @@ -398,23 +398,24 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx) lastVsync_ = g_Config.bVSync ? 1 : 0; dxstate.SetVSyncInterval(g_Config.bVSync); + shaderManagerDX9_ = new ShaderManagerDX9(); framebufferManagerDX9_ = new FramebufferManagerDX9(); framebufferManager_ = framebufferManagerDX9_; textureCacheDX9_ = new TextureCacheDX9(); textureCache_ = textureCacheDX9_; drawEngineCommon_ = &drawEngine_; + shaderManager_ = shaderManagerDX9_; - shaderManager_ = new ShaderManagerDX9(); - drawEngine_.SetShaderManager(shaderManager_); + drawEngine_.SetShaderManager(shaderManagerDX9_); drawEngine_.SetTextureCache(textureCacheDX9_); drawEngine_.SetFramebufferManager(framebufferManagerDX9_); framebufferManagerDX9_->Init(); framebufferManagerDX9_->SetTextureCache(textureCacheDX9_); - framebufferManagerDX9_->SetShaderManager(shaderManager_); + framebufferManagerDX9_->SetShaderManager(shaderManagerDX9_); framebufferManagerDX9_->SetTransformDrawEngine(&drawEngine_); textureCacheDX9_->SetFramebufferManager(framebufferManagerDX9_); textureCacheDX9_->SetDepalShaderCache(&depalShaderCache_); - textureCacheDX9_->SetShaderManager(shaderManager_); + textureCacheDX9_->SetShaderManager(shaderManagerDX9_); // Sanity check gstate if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) { @@ -507,8 +508,8 @@ void GPU_DX9::CheckGPUFeatures() { GPU_DX9::~GPU_DX9() { framebufferManagerDX9_->DestroyAllFBOs(true); - shaderManager_->ClearCache(true); - delete shaderManager_; + shaderManagerDX9_->ClearCache(true); + delete shaderManagerDX9_; } // Needs to be called on GPU thread, not reporting thread. @@ -525,7 +526,7 @@ void GPU_DX9::BuildReportingInfo() { void GPU_DX9::DeviceLost() { // Simply drop all caches and textures. // FBOs appear to survive? Or no? - shaderManager_->ClearCache(false); + shaderManagerDX9_->ClearCache(false); textureCacheDX9_->Clear(false); framebufferManagerDX9_->DeviceLost(); } @@ -585,7 +586,7 @@ void GPU_DX9::BeginFrameInternal() { } else if (dumpThisFrame_) { dumpThisFrame_ = false; } - shaderManager_->DirtyShader(); + shaderManagerDX9_->DirtyShader(); framebufferManagerDX9_->BeginFrame(); } @@ -639,7 +640,7 @@ void GPU_DX9::CopyDisplayToOutputInternal() { framebufferManagerDX9_->EndFrame(); // shaderManager_->EndFrame(); - shaderManager_->DirtyLastShader(); + shaderManagerDX9_->DirtyLastShader(); gstate_c.textureChanged = TEXCHANGE_UPDATED; } @@ -755,8 +756,7 @@ void GPU_DX9::Execute_Prim(u32 op, u32 diff) { if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { drawEngine_.SetupVertexDecoder(gstate.vertType); // Rough estimate, not sure what's correct. - int vertexCost = drawEngine_.EstimatePerVertexCost(); - cyclesExecuted += vertexCost * count; + cyclesExecuted += EstimatePerVertexCost() * count; return; } @@ -787,7 +787,7 @@ void GPU_DX9::Execute_Prim(u32 op, u32 diff) { int bytesRead = 0; drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead); - int vertexCost = drawEngine_.EstimatePerVertexCost() * count; + int vertexCost = EstimatePerVertexCost() * count; gpuStats.vertexGPUCycles += vertexCost; cyclesExecuted += vertexCost; @@ -1066,230 +1066,6 @@ void GPU_DX9::Execute_ColorRef(u32 op, u32 diff) { shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF); } -void GPU_DX9::Execute_WorldMtxNum(u32 op, u32 diff) { - // This is almost always followed by GE_CMD_WORLDMATRIXDATA. - const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); - u32 *dst = (u32 *)(gstate.worldMatrix + (op & 0xF)); - const int end = 12 - (op & 0xF); - int i = 0; - - while ((src[i] >> 24) == GE_CMD_WORLDMATRIXDATA) { - const u32 newVal = src[i] << 8; - if (dst[i] != newVal) { - Flush(); - dst[i] = newVal; - shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX); - } - if (++i >= end) { - break; - } - } - - const int count = i; - gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op + count) & 0xF); - - // Skip over the loaded data, it's done now. - UpdatePC(currentList->pc, currentList->pc + count * 4); - currentList->pc += count * 4; -} - -void GPU_DX9::Execute_WorldMtxData(u32 op, u32 diff) { - // Note: it's uncommon to get here now, see above. - int num = gstate.worldmtxnum & 0xF; - u32 newVal = op << 8; - if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) { - Flush(); - ((u32 *)gstate.worldMatrix)[num] = newVal; - shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX); - } - num++; - gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF); -} - -void GPU_DX9::Execute_ViewMtxNum(u32 op, u32 diff) { - // This is almost always followed by GE_CMD_VIEWMATRIXDATA. - const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); - u32 *dst = (u32 *)(gstate.viewMatrix + (op & 0xF)); - const int end = 12 - (op & 0xF); - int i = 0; - - while ((src[i] >> 24) == GE_CMD_VIEWMATRIXDATA) { - const u32 newVal = src[i] << 8; - if (dst[i] != newVal) { - Flush(); - dst[i] = newVal; - shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX); - } - if (++i >= end) { - break; - } - } - - const int count = i; - gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op + count) & 0xF); - - // Skip over the loaded data, it's done now. - UpdatePC(currentList->pc, currentList->pc + count * 4); - currentList->pc += count * 4; -} - -void GPU_DX9::Execute_ViewMtxData(u32 op, u32 diff) { - // Note: it's uncommon to get here now, see above. - int num = gstate.viewmtxnum & 0xF; - u32 newVal = op << 8; - if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) { - Flush(); - ((u32 *)gstate.viewMatrix)[num] = newVal; - shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX); - } - num++; - gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF); -} - -void GPU_DX9::Execute_ProjMtxNum(u32 op, u32 diff) { - // This is almost always followed by GE_CMD_PROJMATRIXDATA. - const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); - u32 *dst = (u32 *)(gstate.projMatrix + (op & 0xF)); - const int end = 16 - (op & 0xF); - int i = 0; - - while ((src[i] >> 24) == GE_CMD_PROJMATRIXDATA) { - const u32 newVal = src[i] << 8; - if (dst[i] != newVal) { - Flush(); - dst[i] = newVal; - shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); - } - if (++i >= end) { - break; - } - } - - const int count = i; - gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op + count) & 0xF); - - // Skip over the loaded data, it's done now. - UpdatePC(currentList->pc, currentList->pc + count * 4); - currentList->pc += count * 4; -} - -void GPU_DX9::Execute_ProjMtxData(u32 op, u32 diff) { - // Note: it's uncommon to get here now, see above. - int num = gstate.projmtxnum & 0xF; - u32 newVal = op << 8; - if (newVal != ((const u32 *)gstate.projMatrix)[num]) { - Flush(); - ((u32 *)gstate.projMatrix)[num] = newVal; - shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); - } - num++; - gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF); -} - -void GPU_DX9::Execute_TgenMtxNum(u32 op, u32 diff) { - // This is almost always followed by GE_CMD_TGENMATRIXDATA. - const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); - u32 *dst = (u32 *)(gstate.tgenMatrix + (op & 0xF)); - const int end = 12 - (op & 0xF); - int i = 0; - - while ((src[i] >> 24) == GE_CMD_TGENMATRIXDATA) { - const u32 newVal = src[i] << 8; - if (dst[i] != newVal) { - Flush(); - dst[i] = newVal; - shaderManager_->DirtyUniform(DIRTY_TEXMATRIX); - } - if (++i >= end) { - break; - } - } - - const int count = i; - gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op + count) & 0xF); - - // Skip over the loaded data, it's done now. - UpdatePC(currentList->pc, currentList->pc + count * 4); - currentList->pc += count * 4; -} - -void GPU_DX9::Execute_TgenMtxData(u32 op, u32 diff) { - // Note: it's uncommon to get here now, see above. - int num = gstate.texmtxnum & 0xF; - u32 newVal = op << 8; - if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) { - Flush(); - ((u32 *)gstate.tgenMatrix)[num] = newVal; - shaderManager_->DirtyUniform(DIRTY_TEXMATRIX); - } - num++; - gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF); -} - -void GPU_DX9::Execute_BoneMtxNum(u32 op, u32 diff) { - // This is almost always followed by GE_CMD_BONEMATRIXDATA. - const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); - u32 *dst = (u32 *)(gstate.boneMatrix + (op & 0x7F)); - const int end = 12 * 8 - (op & 0x7F); - int i = 0; - - // If we can't use software skinning, we have to flush and dirty. - if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) { - while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) { - const u32 newVal = src[i] << 8; - if (dst[i] != newVal) { - Flush(); - dst[i] = newVal; - } - if (++i >= end) { - break; - } - } - - const int numPlusCount = (op & 0x7F) + i; - for (int num = op & 0x7F; num < numPlusCount; num += 12) { - shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12)); - } - } else { - while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) { - dst[i] = src[i] << 8; - if (++i >= end) { - break; - } - } - - const int numPlusCount = (op & 0x7F) + i; - for (int num = op & 0x7F; num < numPlusCount; num += 12) { - gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12); - } - } - - const int count = i; - gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((op + count) & 0x7F); - - // Skip over the loaded data, it's done now. - UpdatePC(currentList->pc, currentList->pc + count * 4); - currentList->pc += count * 4; -} - -void GPU_DX9::Execute_BoneMtxData(u32 op, u32 diff) { - // Note: it's uncommon to get here now, see above. - int num = gstate.boneMatrixNumber & 0x7F; - u32 newVal = op << 8; - if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) { - // Bone matrices should NOT flush when software skinning is enabled! - if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) { - Flush(); - shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12)); - } else { - gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12); - } - ((u32 *)gstate.boneMatrix)[num] = newVal; - } - num++; - gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F); -} - void GPU_DX9::Execute_Generic(u32 op, u32 diff) { u32 cmd = op >> 24; u32 data = op & 0xFFFFFF; @@ -1514,7 +1290,8 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) { case GE_CMD_LAC0: case GE_CMD_LDC0: case GE_CMD_LSC0: - shaderManager_->DirtyUniform(DIRTY_LIGHT0); + if (diff) + shaderManager_->DirtyUniform(DIRTY_LIGHT0); break; case GE_CMD_LX1:case GE_CMD_LY1:case GE_CMD_LZ1: @@ -1525,7 +1302,8 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) { case GE_CMD_LAC1: case GE_CMD_LDC1: case GE_CMD_LSC1: - shaderManager_->DirtyUniform(DIRTY_LIGHT1); + if (diff) + shaderManager_->DirtyUniform(DIRTY_LIGHT1); break; case GE_CMD_LX2:case GE_CMD_LY2:case GE_CMD_LZ2: case GE_CMD_LDX2:case GE_CMD_LDY2:case GE_CMD_LDZ2: @@ -1535,7 +1313,8 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) { case GE_CMD_LAC2: case GE_CMD_LDC2: case GE_CMD_LSC2: - shaderManager_->DirtyUniform(DIRTY_LIGHT2); + if (diff) + shaderManager_->DirtyUniform(DIRTY_LIGHT2); break; case GE_CMD_LX3:case GE_CMD_LY3:case GE_CMD_LZ3: case GE_CMD_LDX3:case GE_CMD_LDY3:case GE_CMD_LDZ3: @@ -1545,7 +1324,8 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) { case GE_CMD_LAC3: case GE_CMD_LDC3: case GE_CMD_LSC3: - shaderManager_->DirtyUniform(DIRTY_LIGHT3); + if (diff) + shaderManager_->DirtyUniform(DIRTY_LIGHT3); break; case GE_CMD_VIEWPORTXSCALE: @@ -1554,7 +1334,8 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) { case GE_CMD_VIEWPORTYCENTER: case GE_CMD_VIEWPORTZSCALE: case GE_CMD_VIEWPORTZCENTER: - Execute_ViewportType(op, diff); + if (diff) + Execute_ViewportType(op, diff); break; case GE_CMD_LIGHTENABLE0: @@ -1761,23 +1542,6 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) { } } -void GPU_DX9::FastLoadBoneMatrix(u32 target) { - const int num = gstate.boneMatrixNumber & 0x7F; - const int mtxNum = num / 12; - uint32_t uniformsToDirty = DIRTY_BONEMATRIX0 << mtxNum; - if ((num - 12 * mtxNum) != 0) { - uniformsToDirty |= DIRTY_BONEMATRIX0 << ((mtxNum + 1) & 7); - } - - if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) { - Flush(); - shaderManager_->DirtyUniform(uniformsToDirty); - } else { - gstate_c.deferredVertTypeDirty |= uniformsToDirty; - } - gstate.FastLoadBoneMatrix(target); -} - void GPU_DX9::GetStats(char *buffer, size_t bufsize) { float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f; snprintf(buffer, bufsize - 1, @@ -1807,8 +1571,8 @@ void GPU_DX9::GetStats(char *buffer, size_t bufsize) { (int)textureCacheDX9_->NumLoadedTextures(), gpuStats.numTexturesDecoded, gpuStats.numTextureInvalidations, - shaderManager_->NumVertexShaders(), - shaderManager_->NumFragmentShaders() + shaderManagerDX9_->NumVertexShaders(), + shaderManagerDX9_->NumFragmentShaders() ); } @@ -1817,7 +1581,7 @@ void GPU_DX9::ClearCacheNextFrame() { } void GPU_DX9::ClearShaderCache() { - shaderManager_->ClearCache(true); + shaderManagerDX9_->ClearCache(true); } std::vector GPU_DX9::GetFramebufferList() { @@ -1835,7 +1599,7 @@ void GPU_DX9::DoState(PointerWrap &p) { gstate_c.textureChanged = TEXCHANGE_UPDATED; framebufferManagerDX9_->DestroyAllFBOs(true); - shaderManager_->ClearCache(true); + shaderManagerDX9_->ClearCache(true); } } @@ -1956,7 +1720,7 @@ std::vector GPU_DX9::DebugGetShaderIDs(DebugShaderType type) { if (type == SHADER_TYPE_VERTEXLOADER) { return drawEngine_.DebugGetVertexLoaderIDs(); } else { - return shaderManager_->DebugGetShaderIDs(type); + return shaderManagerDX9_->DebugGetShaderIDs(type); } } @@ -1964,7 +1728,7 @@ std::string GPU_DX9::DebugGetShaderString(std::string id, DebugShaderType type, if (type == SHADER_TYPE_VERTEXLOADER) { return drawEngine_.DebugGetVertexLoaderString(id, stringType); } else { - return shaderManager_->DebugGetShaderString(id, type, stringType); + return shaderManagerDX9_->DebugGetShaderString(id, type, stringType); } } diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h index fb91c0e775..836550ed30 100644 --- a/GPU/Directx9/GPU_DX9.h +++ b/GPU/Directx9/GPU_DX9.h @@ -125,24 +125,12 @@ public: void Execute_StencilTest(u32 op, u32 diff); void Execute_ColorRef(u32 op, u32 diff); - void Execute_WorldMtxNum(u32 op, u32 diff); - void Execute_WorldMtxData(u32 op, u32 diff); - void Execute_ViewMtxNum(u32 op, u32 diff); - void Execute_ViewMtxData(u32 op, u32 diff); - void Execute_ProjMtxNum(u32 op, u32 diff); - void Execute_ProjMtxData(u32 op, u32 diff); - void Execute_TgenMtxNum(u32 op, u32 diff); - void Execute_TgenMtxData(u32 op, u32 diff); - void Execute_BoneMtxNum(u32 op, u32 diff); - void Execute_BoneMtxData(u32 op, u32 diff); - // Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend. std::vector DebugGetShaderIDs(DebugShaderType shader) override; std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override; protected: void FastRunLoop(DisplayList &list) override; - void FastLoadBoneMatrix(u32 target) override; void FinishDeferred() override; private: @@ -163,7 +151,7 @@ private: TextureCacheDX9 *textureCacheDX9_; DepalShaderCacheDX9 depalShaderCache_; DrawEngineDX9 drawEngine_; - ShaderManagerDX9 *shaderManager_; + ShaderManagerDX9 *shaderManagerDX9_; static CommandInfo cmdInfo_[256]; diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 9077c358e6..808d26e8b0 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -505,7 +505,7 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) { } } -ShaderManagerDX9::ShaderManagerDX9() : lastVShader_(nullptr), lastPShader_(nullptr), globalDirty_(0xFFFFFFFF) { +ShaderManagerDX9::ShaderManagerDX9() : lastVShader_(nullptr), lastPShader_(nullptr) { codeBuffer_ = new char[16384]; } diff --git a/GPU/Directx9/ShaderManagerDX9.h b/GPU/Directx9/ShaderManagerDX9.h index 051381e9d7..60e7283d04 100644 --- a/GPU/Directx9/ShaderManagerDX9.h +++ b/GPU/Directx9/ShaderManagerDX9.h @@ -76,7 +76,7 @@ protected: ShaderID id_; }; -class ShaderManagerDX9 { +class ShaderManagerDX9 : public ShaderManagerCommon { public: ShaderManagerDX9(); ~ShaderManagerDX9(); @@ -84,9 +84,6 @@ public: void ClearCache(bool deleteThem); // TODO: deleteThem currently not respected VSShader *ApplyShader(int prim, u32 vertType); void DirtyShader(); - void DirtyUniform(u64 what) { - globalDirty_ |= what; - } void DirtyLastShader(); int NumVertexShaders() const { return (int)vsCache_.size(); } @@ -119,7 +116,6 @@ private: ShaderID lastFSID_; ShaderID lastVSID_; - u64 globalDirty_; char *codeBuffer_; VSShader *lastVShader_; diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index e936385282..979b92af70 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -401,24 +401,26 @@ GPU_GLES::GPU_GLES(GraphicsContext *ctx) UpdateVsyncInterval(true); CheckGPUFeatures(); - shaderManager_ = new ShaderManagerGLES(); + shaderManagerGL_ = new ShaderManagerGLES(); framebufferManagerGL_ = new FramebufferManagerGLES(); framebufferManager_ = framebufferManagerGL_; textureCacheGL_ = new TextureCacheGLES(); textureCache_ = textureCacheGL_; drawEngineCommon_ = &drawEngine_; + shaderManager_ = shaderManagerGL_; + drawEngineCommon_ = &drawEngine_; - drawEngine_.SetShaderManager(shaderManager_); + drawEngine_.SetShaderManager(shaderManagerGL_); drawEngine_.SetTextureCache(textureCacheGL_); drawEngine_.SetFramebufferManager(framebufferManagerGL_); drawEngine_.SetFragmentTestCache(&fragmentTestCache_); framebufferManagerGL_->Init(); framebufferManagerGL_->SetTextureCache(textureCacheGL_); - framebufferManagerGL_->SetShaderManager(shaderManager_); + framebufferManagerGL_->SetShaderManager(shaderManagerGL_); framebufferManagerGL_->SetTransformDrawEngine(&drawEngine_); textureCacheGL_->SetFramebufferManager(framebufferManagerGL_); textureCacheGL_->SetDepalShaderCache(&depalShaderCache_); - textureCacheGL_->SetShaderManager(shaderManager_); + textureCacheGL_->SetShaderManager(shaderManagerGL_); textureCacheGL_->SetTransformDrawEngine(&drawEngine_); fragmentTestCache_.SetTextureCache(textureCacheGL_); @@ -470,20 +472,20 @@ GPU_GLES::GPU_GLES(GraphicsContext *ctx) if (discID.size()) { File::CreateFullPath(GetSysDirectory(DIRECTORY_APP_CACHE)); shaderCachePath_ = GetSysDirectory(DIRECTORY_APP_CACHE) + "/" + g_paramSFO.GetValueString("DISC_ID") + ".glshadercache"; - shaderManager_->LoadAndPrecompile(shaderCachePath_); + shaderManagerGL_->LoadAndPrecompile(shaderCachePath_); } } GPU_GLES::~GPU_GLES() { framebufferManagerGL_->DestroyAllFBOs(true); - shaderManager_->ClearCache(true); + shaderManagerGL_->ClearCache(true); depalShaderCache_.Clear(); fragmentTestCache_.Clear(); if (!shaderCachePath_.empty()) { - shaderManager_->Save(shaderCachePath_); + shaderManagerGL_->Save(shaderCachePath_); } - delete shaderManager_; - shaderManager_ = nullptr; + delete shaderManagerGL_; + shaderManagerGL_ = nullptr; #ifdef _WIN32 gfxCtx_->SwapInterval(0); @@ -652,7 +654,7 @@ void GPU_GLES::DeviceLost() { // Simply drop all caches and textures. // FBOs appear to survive? Or no? // TransformDraw has registered as a GfxResourceHolder. - shaderManager_->ClearCache(false); + shaderManagerGL_->ClearCache(false); textureCacheGL_->Clear(false); fragmentTestCache_.Clear(false); depalShaderCache_.Clear(); @@ -662,6 +664,7 @@ void GPU_GLES::DeviceLost() { void GPU_GLES::DeviceRestore() { ILOG("GPU_GLES: DeviceRestore"); + UpdateCmdInfo(); UpdateVsyncInterval(true); } @@ -760,10 +763,10 @@ void GPU_GLES::BeginFrameInternal() { // Save the cache from time to time. TODO: How often? if (!shaderCachePath_.empty() && (gpuStats.numFlips & 1023) == 0) { - shaderManager_->Save(shaderCachePath_); + shaderManagerGL_->Save(shaderCachePath_); } - shaderManager_->DirtyShader(); + shaderManagerGL_->DirtyShader(); // Not sure if this is really needed. shaderManager_->DirtyUniform(DIRTY_ALL); @@ -811,7 +814,7 @@ void GPU_GLES::CopyDisplayToOutputInternal() { framebufferManagerGL_->RebindFramebuffer(); drawEngine_.Flush(); - shaderManager_->DirtyLastShader(); + shaderManagerGL_->DirtyLastShader(); glstate.depthWrite.set(GL_TRUE); glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); @@ -923,8 +926,7 @@ void GPU_GLES::Execute_Prim(u32 op, u32 diff) { if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { drawEngine_.SetupVertexDecoder(gstate.vertType); // Rough estimate, not sure what's correct. - int vertexCost = drawEngine_.EstimatePerVertexCost(); - cyclesExecuted += vertexCost * count; + cyclesExecuted += EstimatePerVertexCost() * count; return; } @@ -952,7 +954,7 @@ void GPU_GLES::Execute_Prim(u32 op, u32 diff) { int bytesRead = 0; drawEngine_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead); - int vertexCost = drawEngine_.EstimatePerVertexCost(); + int vertexCost = EstimatePerVertexCost(); gpuStats.vertexGPUCycles += vertexCost * count; cyclesExecuted += vertexCost * count; @@ -1289,230 +1291,6 @@ void GPU_GLES::Execute_ColorRef(u32 op, u32 diff) { shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF); } -void GPU_GLES::Execute_WorldMtxNum(u32 op, u32 diff) { - // This is almost always followed by GE_CMD_WORLDMATRIXDATA. - const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); - u32 *dst = (u32 *)(gstate.worldMatrix + (op & 0xF)); - const int end = 12 - (op & 0xF); - int i = 0; - - while ((src[i] >> 24) == GE_CMD_WORLDMATRIXDATA) { - const u32 newVal = src[i] << 8; - if (dst[i] != newVal) { - Flush(); - dst[i] = newVal; - shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX); - } - if (++i >= end) { - break; - } - } - - const int count = i; - gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op + count) & 0xF); - - // Skip over the loaded data, it's done now. - UpdatePC(currentList->pc, currentList->pc + count * 4); - currentList->pc += count * 4; -} - -void GPU_GLES::Execute_WorldMtxData(u32 op, u32 diff) { - // Note: it's uncommon to get here now, see above. - int num = gstate.worldmtxnum & 0xF; - u32 newVal = op << 8; - if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) { - Flush(); - ((u32 *)gstate.worldMatrix)[num] = newVal; - shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX); - } - num++; - gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF); -} - -void GPU_GLES::Execute_ViewMtxNum(u32 op, u32 diff) { - // This is almost always followed by GE_CMD_VIEWMATRIXDATA. - const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); - u32 *dst = (u32 *)(gstate.viewMatrix + (op & 0xF)); - const int end = 12 - (op & 0xF); - int i = 0; - - while ((src[i] >> 24) == GE_CMD_VIEWMATRIXDATA) { - const u32 newVal = src[i] << 8; - if (dst[i] != newVal) { - Flush(); - dst[i] = newVal; - shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX); - } - if (++i >= end) { - break; - } - } - - const int count = i; - gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op + count) & 0xF); - - // Skip over the loaded data, it's done now. - UpdatePC(currentList->pc, currentList->pc + count * 4); - currentList->pc += count * 4; -} - -void GPU_GLES::Execute_ViewMtxData(u32 op, u32 diff) { - // Note: it's uncommon to get here now, see above. - int num = gstate.viewmtxnum & 0xF; - u32 newVal = op << 8; - if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) { - Flush(); - ((u32 *)gstate.viewMatrix)[num] = newVal; - shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX); - } - num++; - gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF); -} - -void GPU_GLES::Execute_ProjMtxNum(u32 op, u32 diff) { - // This is almost always followed by GE_CMD_PROJMATRIXDATA. - const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); - u32 *dst = (u32 *)(gstate.projMatrix + (op & 0xF)); - const int end = 16 - (op & 0xF); - int i = 0; - - while ((src[i] >> 24) == GE_CMD_PROJMATRIXDATA) { - const u32 newVal = src[i] << 8; - if (dst[i] != newVal) { - Flush(); - dst[i] = newVal; - shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); - } - if (++i >= end) { - break; - } - } - - const int count = i; - gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op + count) & 0xF); - - // Skip over the loaded data, it's done now. - UpdatePC(currentList->pc, currentList->pc + count * 4); - currentList->pc += count * 4; -} - -void GPU_GLES::Execute_ProjMtxData(u32 op, u32 diff) { - // Note: it's uncommon to get here now, see above. - int num = gstate.projmtxnum & 0xF; - u32 newVal = op << 8; - if (newVal != ((const u32 *)gstate.projMatrix)[num]) { - Flush(); - ((u32 *)gstate.projMatrix)[num] = newVal; - shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); - } - num++; - gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF); -} - -void GPU_GLES::Execute_TgenMtxNum(u32 op, u32 diff) { - // This is almost always followed by GE_CMD_TGENMATRIXDATA. - const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); - u32 *dst = (u32 *)(gstate.tgenMatrix + (op & 0xF)); - const int end = 12 - (op & 0xF); - int i = 0; - - while ((src[i] >> 24) == GE_CMD_TGENMATRIXDATA) { - const u32 newVal = src[i] << 8; - if (dst[i] != newVal) { - Flush(); - dst[i] = newVal; - shaderManager_->DirtyUniform(DIRTY_TEXMATRIX); - } - if (++i >= end) { - break; - } - } - - const int count = i; - gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op + count) & 0xF); - - // Skip over the loaded data, it's done now. - UpdatePC(currentList->pc, currentList->pc + count * 4); - currentList->pc += count * 4; -} - -void GPU_GLES::Execute_TgenMtxData(u32 op, u32 diff) { - // Note: it's uncommon to get here now, see above. - int num = gstate.texmtxnum & 0xF; - u32 newVal = op << 8; - if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) { - Flush(); - ((u32 *)gstate.tgenMatrix)[num] = newVal; - shaderManager_->DirtyUniform(DIRTY_TEXMATRIX); - } - num++; - gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF); -} - -void GPU_GLES::Execute_BoneMtxNum(u32 op, u32 diff) { - // This is almost always followed by GE_CMD_BONEMATRIXDATA. - const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); - u32 *dst = (u32 *)(gstate.boneMatrix + (op & 0x7F)); - const int end = 12 * 8 - (op & 0x7F); - int i = 0; - - // If we can't use software skinning, we have to flush and dirty. - if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) { - while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) { - const u32 newVal = src[i] << 8; - if (dst[i] != newVal) { - Flush(); - dst[i] = newVal; - } - if (++i >= end) { - break; - } - } - - const int numPlusCount = (op & 0x7F) + i; - for (int num = op & 0x7F; num < numPlusCount; num += 12) { - shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12)); - } - } else { - while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) { - dst[i] = src[i] << 8; - if (++i >= end) { - break; - } - } - - const int numPlusCount = (op & 0x7F) + i; - for (int num = op & 0x7F; num < numPlusCount; num += 12) { - gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12); - } - } - - const int count = i; - gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((op + count) & 0x7F); - - // Skip over the loaded data, it's done now. - UpdatePC(currentList->pc, currentList->pc + count * 4); - currentList->pc += count * 4; -} - -void GPU_GLES::Execute_BoneMtxData(u32 op, u32 diff) { - // Note: it's uncommon to get here now, see above. - int num = gstate.boneMatrixNumber & 0x7F; - u32 newVal = op << 8; - if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) { - // Bone matrices should NOT flush when software skinning is enabled! - if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) { - Flush(); - shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12)); - } else { - gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12); - } - ((u32 *)gstate.boneMatrix)[num] = newVal; - } - num++; - gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F); -} - void GPU_GLES::Execute_Generic(u32 op, u32 diff) { u32 cmd = op >> 24; u32 data = op & 0xFFFFFF; @@ -2044,23 +1822,6 @@ void GPU_GLES::Execute_Generic(u32 op, u32 diff) { } } -void GPU_GLES::FastLoadBoneMatrix(u32 target) { - const int num = gstate.boneMatrixNumber & 0x7F; - const int mtxNum = num / 12; - uint32_t uniformsToDirty = DIRTY_BONEMATRIX0 << mtxNum; - if ((num - 12 * mtxNum) != 0) { - uniformsToDirty |= DIRTY_BONEMATRIX0 << ((mtxNum + 1) & 7); - } - - if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) { - Flush(); - shaderManager_->DirtyUniform(uniformsToDirty); - } else { - gstate_c.deferredVertTypeDirty |= uniformsToDirty; - } - gstate.FastLoadBoneMatrix(target); -} - void GPU_GLES::GetStats(char *buffer, size_t bufsize) { float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f; snprintf(buffer, bufsize - 1, @@ -2090,9 +1851,9 @@ void GPU_GLES::GetStats(char *buffer, size_t bufsize) { (int)textureCacheGL_->NumLoadedTextures(), gpuStats.numTexturesDecoded, gpuStats.numTextureInvalidations, - shaderManager_->NumVertexShaders(), - shaderManager_->NumFragmentShaders(), - shaderManager_->NumPrograms()); + shaderManagerGL_->NumVertexShaders(), + shaderManagerGL_->NumFragmentShaders(), + shaderManagerGL_->NumPrograms()); } void GPU_GLES::ClearCacheNextFrame() { @@ -2100,12 +1861,12 @@ void GPU_GLES::ClearCacheNextFrame() { } void GPU_GLES::ClearShaderCache() { - shaderManager_->ClearCache(true); + shaderManagerGL_->ClearCache(true); } void GPU_GLES::CleanupBeforeUI() { // Clear any enabled vertex arrays. - shaderManager_->DirtyLastShader(); + shaderManagerGL_->DirtyLastShader(); glstate.arrayBuffer.bind(0); glstate.elementArrayBuffer.bind(0); } @@ -2127,7 +1888,7 @@ void GPU_GLES::DoState(PointerWrap &p) { gstate_c.textureChanged = TEXCHANGE_UPDATED; framebufferManagerGL_->DestroyAllFBOs(true); - shaderManager_->ClearCache(true); + shaderManagerGL_->ClearCache(true); } } @@ -2216,7 +1977,7 @@ std::vector GPU_GLES::DebugGetShaderIDs(DebugShaderType type) { if (type == SHADER_TYPE_VERTEXLOADER) { return drawEngine_.DebugGetVertexLoaderIDs(); } else { - return shaderManager_->DebugGetShaderIDs(type); + return shaderManagerGL_->DebugGetShaderIDs(type); } } @@ -2224,6 +1985,6 @@ std::string GPU_GLES::DebugGetShaderString(std::string id, DebugShaderType type, if (type == SHADER_TYPE_VERTEXLOADER) { return drawEngine_.DebugGetVertexLoaderString(id, stringType); } else { - return shaderManager_->DebugGetShaderString(id, type, stringType); + return shaderManagerGL_->DebugGetShaderString(id, type, stringType); } } diff --git a/GPU/GLES/GPU_GLES.h b/GPU/GLES/GPU_GLES.h index ee6796a3c5..b4b5647e05 100644 --- a/GPU/GLES/GPU_GLES.h +++ b/GPU/GLES/GPU_GLES.h @@ -130,16 +130,6 @@ public: void Execute_AlphaTest(u32 op, u32 diff); void Execute_StencilTest(u32 op, u32 diff); void Execute_ColorRef(u32 op, u32 diff); - void Execute_WorldMtxNum(u32 op, u32 diff); - void Execute_WorldMtxData(u32 op, u32 diff); - void Execute_ViewMtxNum(u32 op, u32 diff); - void Execute_ViewMtxData(u32 op, u32 diff); - void Execute_ProjMtxNum(u32 op, u32 diff); - void Execute_ProjMtxData(u32 op, u32 diff); - void Execute_TgenMtxNum(u32 op, u32 diff); - void Execute_TgenMtxData(u32 op, u32 diff); - void Execute_BoneMtxNum(u32 op, u32 diff); - void Execute_BoneMtxData(u32 op, u32 diff); // Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend. std::vector DebugGetShaderIDs(DebugShaderType shader) override; @@ -147,7 +137,6 @@ public: protected: void FastRunLoop(DisplayList &list) override; - void FastLoadBoneMatrix(u32 target) override; void FinishDeferred() override; private: @@ -172,7 +161,7 @@ private: DepalShaderCacheGLES depalShaderCache_; DrawEngineGLES drawEngine_; FragmentTestCacheGLES fragmentTestCache_; - ShaderManagerGLES *shaderManager_; + ShaderManagerGLES *shaderManagerGL_; int lastVsync_; diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index f12f71fca7..7b3f386b3f 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -748,7 +748,7 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) { } ShaderManagerGLES::ShaderManagerGLES() - : lastShader_(nullptr), globalDirty_(DIRTY_ALL), shaderSwitchDirty_(0), diskCacheDirty_(false) { + : lastShader_(nullptr), shaderSwitchDirty_(0), diskCacheDirty_(false) { codeBuffer_ = new char[16384]; lastFSID_.set_invalid(); lastVSID_.set_invalid(); diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index a19ca3c29c..af735bcc86 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -140,7 +140,7 @@ private: bool isFragment_; }; -class ShaderManagerGLES { +class ShaderManagerGLES : public ShaderManagerCommon { public: ShaderManagerGLES(); ~ShaderManagerGLES(); @@ -153,9 +153,6 @@ public: LinkedShader *ApplyFragmentShader(ShaderID VSID, Shader *vs, u32 vertType, int prim); void DirtyShader(); - void DirtyUniform(u64 what) { - globalDirty_ |= what; - } void DirtyLastShader(); // disables vertex arrays int NumVertexShaders() const { return (int)vsCache_.size(); } @@ -191,7 +188,6 @@ private: ShaderID lastVSID_; LinkedShader *lastShader_; - u64 globalDirty_; u64 shaderSwitchDirty_; char *codeBuffer_; diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index a03d8ecf4e..b62f888b2a 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -25,6 +25,10 @@ #include "GPU/Common/TextureCacheCommon.h" #include "GPU/Common/DrawEngineCommon.h" +void GPUCommon::Flush() { + drawEngineCommon_->DispatchFlush(); +} + GPUCommon::GPUCommon() : dumpNextFrame_(false), dumpThisFrame_(false), @@ -84,6 +88,35 @@ void GPUCommon::Reinitialize() { ScheduleEvent(GPU_EVENT_REINITIALIZE); } +int GPUCommon::EstimatePerVertexCost() { + // TODO: This is transform cost, also account for rasterization cost somehow... although it probably + // runs in parallel with transform. + + // Also, this is all pure guesswork. If we can find a way to do measurements, that would be great. + + // GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things + // went too fast and starts doing all the work over again). + + int cost = 20; + if (gstate.isLightingEnabled()) { + cost += 10; + + for (int i = 0; i < 4; i++) { + if (gstate.isLightChanEnabled(i)) + cost += 10; + } + } + + if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) { + cost += 20; + } + int morphCount = gstate.getNumMorphWeights(); + if (morphCount > 1) { + cost += 5 * morphCount; + } + return cost; +} + void GPUCommon::PopDLQueue() { easy_guard guard(listLock); if(!dlQueue.empty()) { @@ -1163,6 +1196,230 @@ void GPUCommon::Execute_BlockTransferStart(u32 op, u32 diff) { gstate_c.textureChanged = TEXCHANGE_UPDATED; } +void GPUCommon::Execute_WorldMtxNum(u32 op, u32 diff) { + // This is almost always followed by GE_CMD_WORLDMATRIXDATA. + const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); + u32 *dst = (u32 *)(gstate.worldMatrix + (op & 0xF)); + const int end = 12 - (op & 0xF); + int i = 0; + + while ((src[i] >> 24) == GE_CMD_WORLDMATRIXDATA) { + const u32 newVal = src[i] << 8; + if (dst[i] != newVal) { + Flush(); + dst[i] = newVal; + shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX); + } + if (++i >= end) { + break; + } + } + + const int count = i; + gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op + count) & 0xF); + + // Skip over the loaded data, it's done now. + UpdatePC(currentList->pc, currentList->pc + count * 4); + currentList->pc += count * 4; +} + +void GPUCommon::Execute_WorldMtxData(u32 op, u32 diff) { + // Note: it's uncommon to get here now, see above. + int num = gstate.worldmtxnum & 0xF; + u32 newVal = op << 8; + if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) { + Flush(); + ((u32 *)gstate.worldMatrix)[num] = newVal; + shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX); + } + num++; + gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF); +} + +void GPUCommon::Execute_ViewMtxNum(u32 op, u32 diff) { + // This is almost always followed by GE_CMD_VIEWMATRIXDATA. + const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); + u32 *dst = (u32 *)(gstate.viewMatrix + (op & 0xF)); + const int end = 12 - (op & 0xF); + int i = 0; + + while ((src[i] >> 24) == GE_CMD_VIEWMATRIXDATA) { + const u32 newVal = src[i] << 8; + if (dst[i] != newVal) { + Flush(); + dst[i] = newVal; + shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX); + } + if (++i >= end) { + break; + } + } + + const int count = i; + gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op + count) & 0xF); + + // Skip over the loaded data, it's done now. + UpdatePC(currentList->pc, currentList->pc + count * 4); + currentList->pc += count * 4; +} + +void GPUCommon::Execute_ViewMtxData(u32 op, u32 diff) { + // Note: it's uncommon to get here now, see above. + int num = gstate.viewmtxnum & 0xF; + u32 newVal = op << 8; + if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) { + Flush(); + ((u32 *)gstate.viewMatrix)[num] = newVal; + shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX); + } + num++; + gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF); +} + +void GPUCommon::Execute_ProjMtxNum(u32 op, u32 diff) { + // This is almost always followed by GE_CMD_PROJMATRIXDATA. + const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); + u32 *dst = (u32 *)(gstate.projMatrix + (op & 0xF)); + const int end = 16 - (op & 0xF); + int i = 0; + + while ((src[i] >> 24) == GE_CMD_PROJMATRIXDATA) { + const u32 newVal = src[i] << 8; + if (dst[i] != newVal) { + Flush(); + dst[i] = newVal; + shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); + } + if (++i >= end) { + break; + } + } + + const int count = i; + gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op + count) & 0xF); + + // Skip over the loaded data, it's done now. + UpdatePC(currentList->pc, currentList->pc + count * 4); + currentList->pc += count * 4; +} + +void GPUCommon::Execute_ProjMtxData(u32 op, u32 diff) { + // Note: it's uncommon to get here now, see above. + int num = gstate.projmtxnum & 0xF; + u32 newVal = op << 8; + if (newVal != ((const u32 *)gstate.projMatrix)[num]) { + Flush(); + ((u32 *)gstate.projMatrix)[num] = newVal; + shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); + } + num++; + gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF); +} + +void GPUCommon::Execute_TgenMtxNum(u32 op, u32 diff) { + // This is almost always followed by GE_CMD_TGENMATRIXDATA. + const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); + u32 *dst = (u32 *)(gstate.tgenMatrix + (op & 0xF)); + const int end = 12 - (op & 0xF); + int i = 0; + + while ((src[i] >> 24) == GE_CMD_TGENMATRIXDATA) { + const u32 newVal = src[i] << 8; + if (dst[i] != newVal) { + Flush(); + dst[i] = newVal; + shaderManager_->DirtyUniform(DIRTY_TEXMATRIX); + } + if (++i >= end) { + break; + } + } + + const int count = i; + gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op + count) & 0xF); + + // Skip over the loaded data, it's done now. + UpdatePC(currentList->pc, currentList->pc + count * 4); + currentList->pc += count * 4; +} + +void GPUCommon::Execute_TgenMtxData(u32 op, u32 diff) { + // Note: it's uncommon to get here now, see above. + int num = gstate.texmtxnum & 0xF; + u32 newVal = op << 8; + if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) { + Flush(); + ((u32 *)gstate.tgenMatrix)[num] = newVal; + shaderManager_->DirtyUniform(DIRTY_TEXMATRIX); + } + num++; + gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF); +} + +void GPUCommon::Execute_BoneMtxNum(u32 op, u32 diff) { + // This is almost always followed by GE_CMD_BONEMATRIXDATA. + const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); + u32 *dst = (u32 *)(gstate.boneMatrix + (op & 0x7F)); + const int end = 12 * 8 - (op & 0x7F); + int i = 0; + + // If we can't use software skinning, we have to flush and dirty. + if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) { + while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) { + const u32 newVal = src[i] << 8; + if (dst[i] != newVal) { + Flush(); + dst[i] = newVal; + } + if (++i >= end) { + break; + } + } + + const int numPlusCount = (op & 0x7F) + i; + for (int num = op & 0x7F; num < numPlusCount; num += 12) { + shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12)); + } + } else { + while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) { + dst[i] = src[i] << 8; + if (++i >= end) { + break; + } + } + + const int numPlusCount = (op & 0x7F) + i; + for (int num = op & 0x7F; num < numPlusCount; num += 12) { + gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12); + } + } + + const int count = i; + gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((op + count) & 0x7F); + + // Skip over the loaded data, it's done now. + UpdatePC(currentList->pc, currentList->pc + count * 4); + currentList->pc += count * 4; +} + +void GPUCommon::Execute_BoneMtxData(u32 op, u32 diff) { + // Note: it's uncommon to get here now, see above. + int num = gstate.boneMatrixNumber & 0x7F; + u32 newVal = op << 8; + if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) { + // Bone matrices should NOT flush when software skinning is enabled! + if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) { + Flush(); + shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12)); + } else { + gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12); + } + ((u32 *)gstate.boneMatrix)[num] = newVal; + } + num++; + gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F); +} + void GPUCommon::ExecuteOp(u32 op, u32 diff) { const u32 cmd = op >> 24; @@ -1211,6 +1468,19 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) { } void GPUCommon::FastLoadBoneMatrix(u32 target) { + const int num = gstate.boneMatrixNumber & 0x7F; + const int mtxNum = num / 12; + uint32_t uniformsToDirty = DIRTY_BONEMATRIX0 << mtxNum; + if ((num - 12 * mtxNum) != 0) { + uniformsToDirty |= DIRTY_BONEMATRIX0 << ((mtxNum + 1) & 7); + } + + if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) { + Flush(); + shaderManager_->DirtyUniform(uniformsToDirty); + } else { + gstate_c.deferredVertTypeDirty |= uniformsToDirty; + } gstate.FastLoadBoneMatrix(target); } diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index c3fad930e1..8eb5fe1e66 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -85,6 +85,22 @@ public: void Execute_BoundingBox(u32 op, u32 diff); void Execute_BlockTransferStart(u32 op, u32 diff); + void Execute_WorldMtxNum(u32 op, u32 diff); + void Execute_WorldMtxData(u32 op, u32 diff); + void Execute_ViewMtxNum(u32 op, u32 diff); + void Execute_ViewMtxData(u32 op, u32 diff); + void Execute_ProjMtxNum(u32 op, u32 diff); + void Execute_ProjMtxData(u32 op, u32 diff); + void Execute_TgenMtxNum(u32 op, u32 diff); + void Execute_TgenMtxData(u32 op, u32 diff); + void Execute_BoneMtxNum(u32 op, u32 diff); + void Execute_BoneMtxData(u32 op, u32 diff); + + int EstimatePerVertexCost(); + + // Note: Not virtual! + inline void Flush(); + u64 GetTickEstimate() override { #if defined(_M_X64) || defined(__ANDROID__) return curTickEst_; @@ -207,6 +223,7 @@ protected: FramebufferManagerCommon *framebufferManager_; TextureCacheCommon *textureCache_; DrawEngineCommon *drawEngineCommon_; + ShaderManagerCommon *shaderManager_; typedef std::list DisplayListQueue; diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 43b954b1c3..2ee1773785 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -283,40 +283,12 @@ void SoftGPU::FastRunLoop(DisplayList &list) { } } -int EstimatePerVertexCost() { - // TODO: This is transform cost, also account for rasterization cost somehow... although it probably - // runs in parallel with transform. - - // Also, this is all pure guesswork. If we can find a way to do measurements, that would be great. - - // GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things - // went too fast and starts doing all the work over again). - - int cost = 20; - if (gstate.isLightingEnabled()) { - cost += 10; - } - - for (int i = 0; i < 4; i++) { - if (gstate.isLightChanEnabled(i)) - cost += 10; - } - if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) { - cost += 20; - } - // TODO: morphcount - - return cost; -} - -void SoftGPU::ExecuteOp(u32 op, u32 diff) -{ +void SoftGPU::ExecuteOp(u32 op, u32 diff) { u32 cmd = op >> 24; u32 data = op & 0xFFFFFF; // Handle control and drawing commands here directly. The others we delegate. - switch (cmd) - { + switch (cmd) { case GE_CMD_BASE: break; diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 94c5903e82..45a10a74cc 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -319,7 +319,7 @@ static const CommandTableEntry commandTable[] = { { GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_BJump }, // EXECUTE { GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_BoundingBox }, // + FLUSHBEFORE when we implement... or not, do we need to? - // Changing the vertex type requires us to flush. + // Changing the vertex type requires us to flush. { GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPU_Vulkan::Execute_VertexType }, { GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_Bezier }, @@ -340,14 +340,14 @@ static const CommandTableEntry commandTable[] = { { GE_CMD_DITH3 }, // These handle their own flushing. - { GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPU_Vulkan::Execute_WorldMtxNum }, - { GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_WorldMtxData }, - { GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPU_Vulkan::Execute_ViewMtxNum }, - { GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_ViewMtxData }, - { GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPU_Vulkan::Execute_ProjMtxNum }, - { GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_ProjMtxData }, - { GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPU_Vulkan::Execute_TgenMtxNum }, - { GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_TgenMtxData }, + { GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_WorldMtxNum }, + { GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommon::Execute_WorldMtxData }, + { GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_ViewMtxNum }, + { GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommon::Execute_ViewMtxData }, + { GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_ProjMtxNum }, + { GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommon::Execute_ProjMtxData }, + { GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_TgenMtxNum }, + { GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommon::Execute_TgenMtxData }, { GE_CMD_BONEMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPU_Vulkan::Execute_BoneMtxNum }, { GE_CMD_BONEMATRIXDATA, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_BoneMtxData }, @@ -397,24 +397,25 @@ GPU_Vulkan::GPU_Vulkan(GraphicsContext *ctx) UpdateVsyncInterval(true); CheckGPUFeatures(); - shaderManager_ = new ShaderManagerVulkan(vulkan_); + shaderManagerVulkan_ = new ShaderManagerVulkan(vulkan_); pipelineManager_ = new PipelineManagerVulkan(vulkan_); framebufferManagerVulkan_ = new FramebufferManagerVulkan(vulkan_); framebufferManager_ = framebufferManagerVulkan_; textureCacheVulkan_ = new TextureCacheVulkan(vulkan_); textureCache_ = textureCacheVulkan_; drawEngineCommon_ = &drawEngine_; + shaderManager_ = shaderManagerVulkan_; drawEngine_.SetTextureCache(textureCacheVulkan_); drawEngine_.SetFramebufferManager(framebufferManagerVulkan_); - drawEngine_.SetShaderManager(shaderManager_); + drawEngine_.SetShaderManager(shaderManagerVulkan_); drawEngine_.SetPipelineManager(pipelineManager_); framebufferManagerVulkan_->Init(); framebufferManagerVulkan_->SetTextureCache(textureCacheVulkan_); framebufferManagerVulkan_->SetDrawEngine(&drawEngine_); textureCacheVulkan_->SetFramebufferManager(framebufferManagerVulkan_); textureCacheVulkan_->SetDepalShaderCache(&depalShaderCache_); - textureCacheVulkan_->SetShaderManager(shaderManager_); + textureCacheVulkan_->SetShaderManager(shaderManagerVulkan_); textureCacheVulkan_->SetTransformDrawEngine(&drawEngine_); // Sanity check gstate @@ -458,7 +459,7 @@ GPU_Vulkan::~GPU_Vulkan() { framebufferManagerVulkan_->DestroyAllFBOs(true); depalShaderCache_.Clear(); delete pipelineManager_; - delete shaderManager_; + delete shaderManagerVulkan_; } void GPU_Vulkan::CheckGPUFeatures() { @@ -506,7 +507,7 @@ void GPU_Vulkan::BeginHostFrame() { framebufferManagerVulkan_->BeginFrameVulkan(); - shaderManager_->DirtyShader(); + shaderManagerVulkan_->DirtyShader(); shaderManager_->DirtyUniform(DIRTY_ALL); if (dumpNextFrame_) { @@ -686,7 +687,7 @@ void GPU_Vulkan::CopyDisplayToOutputInternal() { // Flush anything left over. drawEngine_.Flush(curCmd_); - shaderManager_->DirtyLastShader(); + shaderManagerVulkan_->DirtyLastShader(); framebufferManagerVulkan_->CopyDisplayToOutput(); @@ -783,8 +784,7 @@ void GPU_Vulkan::Execute_Prim(u32 op, u32 diff) { if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { drawEngine_.SetupVertexDecoder(gstate.vertType); // Rough estimate, not sure what's correct. - int vertexCost = drawEngine_.EstimatePerVertexCost(); - cyclesExecuted += vertexCost * count; + cyclesExecuted += EstimatePerVertexCost() * count; return; } @@ -812,7 +812,7 @@ void GPU_Vulkan::Execute_Prim(u32 op, u32 diff) { int bytesRead = 0; drawEngine_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead); - int vertexCost = drawEngine_.EstimatePerVertexCost(); + int vertexCost = EstimatePerVertexCost(); gpuStats.vertexGPUCycles += vertexCost * count; cyclesExecuted += vertexCost * count; @@ -1097,166 +1097,6 @@ void GPU_Vulkan::Execute_ColorRef(u32 op, u32 diff) { shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF); } -void GPU_Vulkan::Execute_WorldMtxNum(u32 op, u32 diff) { - // This is almost always followed by GE_CMD_WORLDMATRIXDATA. - const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); - u32 *dst = (u32 *)(gstate.worldMatrix + (op & 0xF)); - const int end = 12 - (op & 0xF); - int i = 0; - - while ((src[i] >> 24) == GE_CMD_WORLDMATRIXDATA) { - const u32 newVal = src[i] << 8; - if (dst[i] != newVal) { - Flush(); - dst[i] = newVal; - shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX); - } - if (++i >= end) { - break; - } - } - - const int count = i; - gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op + count) & 0xF); - - // Skip over the loaded data, it's done now. - UpdatePC(currentList->pc, currentList->pc + count * 4); - currentList->pc += count * 4; -} - -void GPU_Vulkan::Execute_WorldMtxData(u32 op, u32 diff) { - // Note: it's uncommon to get here now, see above. - int num = gstate.worldmtxnum & 0xF; - u32 newVal = op << 8; - if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) { - Flush(); - ((u32 *)gstate.worldMatrix)[num] = newVal; - shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX); - } - num++; - gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF); -} - -void GPU_Vulkan::Execute_ViewMtxNum(u32 op, u32 diff) { - // This is almost always followed by GE_CMD_VIEWMATRIXDATA. - const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); - u32 *dst = (u32 *)(gstate.viewMatrix + (op & 0xF)); - const int end = 12 - (op & 0xF); - int i = 0; - - while ((src[i] >> 24) == GE_CMD_VIEWMATRIXDATA) { - const u32 newVal = src[i] << 8; - if (dst[i] != newVal) { - Flush(); - dst[i] = newVal; - shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX); - } - if (++i >= end) { - break; - } - } - - const int count = i; - gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op + count) & 0xF); - - // Skip over the loaded data, it's done now. - UpdatePC(currentList->pc, currentList->pc + count * 4); - currentList->pc += count * 4; -} - -void GPU_Vulkan::Execute_ViewMtxData(u32 op, u32 diff) { - // Note: it's uncommon to get here now, see above. - int num = gstate.viewmtxnum & 0xF; - u32 newVal = op << 8; - if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) { - Flush(); - ((u32 *)gstate.viewMatrix)[num] = newVal; - shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX); - } - num++; - gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF); -} - -void GPU_Vulkan::Execute_ProjMtxNum(u32 op, u32 diff) { - // This is almost always followed by GE_CMD_PROJMATRIXDATA. - const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); - u32 *dst = (u32 *)(gstate.projMatrix + (op & 0xF)); - const int end = 16 - (op & 0xF); - int i = 0; - - while ((src[i] >> 24) == GE_CMD_PROJMATRIXDATA) { - const u32 newVal = src[i] << 8; - if (dst[i] != newVal) { - Flush(); - dst[i] = newVal; - shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); - } - if (++i >= end) { - break; - } - } - - const int count = i; - gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op + count) & 0xF); - - // Skip over the loaded data, it's done now. - UpdatePC(currentList->pc, currentList->pc + count * 4); - currentList->pc += count * 4; -} - -void GPU_Vulkan::Execute_ProjMtxData(u32 op, u32 diff) { - // Note: it's uncommon to get here now, see above. - int num = gstate.projmtxnum & 0xF; - u32 newVal = op << 8; - if (newVal != ((const u32 *)gstate.projMatrix)[num]) { - Flush(); - ((u32 *)gstate.projMatrix)[num] = newVal; - shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); - } - num++; - gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF); -} - -void GPU_Vulkan::Execute_TgenMtxNum(u32 op, u32 diff) { - // This is almost always followed by GE_CMD_TGENMATRIXDATA. - const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); - u32 *dst = (u32 *)(gstate.tgenMatrix + (op & 0xF)); - const int end = 12 - (op & 0xF); - int i = 0; - - while ((src[i] >> 24) == GE_CMD_TGENMATRIXDATA) { - const u32 newVal = src[i] << 8; - if (dst[i] != newVal) { - Flush(); - dst[i] = newVal; - shaderManager_->DirtyUniform(DIRTY_TEXMATRIX); - } - if (++i >= end) { - break; - } - } - - const int count = i; - gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op + count) & 0xF); - - // Skip over the loaded data, it's done now. - UpdatePC(currentList->pc, currentList->pc + count * 4); - currentList->pc += count * 4; -} - -void GPU_Vulkan::Execute_TgenMtxData(u32 op, u32 diff) { - // Note: it's uncommon to get here now, see above. - int num = gstate.texmtxnum & 0xF; - u32 newVal = op << 8; - if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) { - Flush(); - ((u32 *)gstate.tgenMatrix)[num] = newVal; - shaderManager_->DirtyUniform(DIRTY_TEXMATRIX); - } - num++; - gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF); -} - void GPU_Vulkan::Execute_BoneMtxNum(u32 op, u32 diff) { // This is almost always followed by GE_CMD_BONEMATRIXDATA. const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4); @@ -1264,6 +1104,7 @@ void GPU_Vulkan::Execute_BoneMtxNum(u32 op, u32 diff) { const int end = 12 * 8 - (op & 0x7F); int i = 0; + // If we can't use software skinning, we have to flush and dirty. while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) { const u32 newVal = src[i] << 8; if (dst[i] != newVal) { @@ -1293,6 +1134,7 @@ void GPU_Vulkan::Execute_BoneMtxData(u32 op, u32 diff) { int num = gstate.boneMatrixNumber & 0x7F; u32 newVal = op << 8; if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) { + // Bone matrices should NOT flush when software skinning is enabled! Flush(); shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12)); ((u32 *)gstate.boneMatrix)[num] = newVal; @@ -1850,7 +1692,7 @@ void GPU_Vulkan::DeviceLost() { pipelineManager_->DeviceLost(); textureCacheVulkan_->DeviceLost(); depalShaderCache_.Clear(); - shaderManager_->ClearShaders(); + shaderManagerVulkan_->ClearShaders(); } void GPU_Vulkan::DeviceRestore() { @@ -1863,7 +1705,7 @@ void GPU_Vulkan::DeviceRestore() { drawEngine_.DeviceRestore(vulkan_); pipelineManager_->DeviceRestore(vulkan_); textureCacheVulkan_->DeviceRestore(vulkan_); - shaderManager_->DeviceRestore(vulkan_); + shaderManagerVulkan_->DeviceRestore(vulkan_); } void GPU_Vulkan::GetStats(char *buffer, size_t bufsize) { @@ -1897,8 +1739,8 @@ void GPU_Vulkan::GetStats(char *buffer, size_t bufsize) { (int)textureCacheVulkan_->NumLoadedTextures(), gpuStats.numTexturesDecoded, gpuStats.numTextureInvalidations, - shaderManager_->GetNumVertexShaders(), - shaderManager_->GetNumFragmentShaders(), + shaderManagerVulkan_->GetNumVertexShaders(), + shaderManagerVulkan_->GetNumFragmentShaders(), pipelineManager_->GetNumPipelines(), drawStats.pushUBOSpaceUsed, drawStats.pushVertexSpaceUsed, @@ -1930,7 +1772,7 @@ void GPU_Vulkan::DoState(PointerWrap &p) { gstate_c.textureChanged = TEXCHANGE_UPDATED; framebufferManagerVulkan_->DestroyAllFBOs(true); - shaderManager_->ClearShaders(); + shaderManagerVulkan_->ClearShaders(); pipelineManager_->Clear(); } } @@ -1953,7 +1795,7 @@ std::vector GPU_Vulkan::DebugGetShaderIDs(DebugShaderType type) { } else if (type == SHADER_TYPE_PIPELINE) { return pipelineManager_->DebugGetObjectIDs(type); } else { - return shaderManager_->DebugGetShaderIDs(type); + return shaderManagerVulkan_->DebugGetShaderIDs(type); } } @@ -1963,6 +1805,6 @@ std::string GPU_Vulkan::DebugGetShaderString(std::string id, DebugShaderType typ } else if (type == SHADER_TYPE_PIPELINE) { return pipelineManager_->DebugGetObjectString(id, type, stringType); } else { - return shaderManager_->DebugGetShaderString(id, type, stringType); + return shaderManagerVulkan_->DebugGetShaderString(id, type, stringType); } } diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h index 14b755d567..4bd62f98e1 100644 --- a/GPU/Vulkan/GPU_Vulkan.h +++ b/GPU/Vulkan/GPU_Vulkan.h @@ -117,14 +117,6 @@ public: void Execute_AlphaTest(u32 op, u32 diff); void Execute_StencilTest(u32 op, u32 diff); void Execute_ColorRef(u32 op, u32 diff); - void Execute_WorldMtxNum(u32 op, u32 diff); - void Execute_WorldMtxData(u32 op, u32 diff); - void Execute_ViewMtxNum(u32 op, u32 diff); - void Execute_ViewMtxData(u32 op, u32 diff); - void Execute_ProjMtxNum(u32 op, u32 diff); - void Execute_ProjMtxData(u32 op, u32 diff); - void Execute_TgenMtxNum(u32 op, u32 diff); - void Execute_TgenMtxData(u32 op, u32 diff); void Execute_BoneMtxNum(u32 op, u32 diff); void Execute_BoneMtxData(u32 op, u32 diff); @@ -162,7 +154,7 @@ private: DrawEngineVulkan drawEngine_; // Manages shaders and UBO data - ShaderManagerVulkan *shaderManager_; + ShaderManagerVulkan *shaderManagerVulkan_; // Manages state and pipeline objects PipelineManagerVulkan *pipelineManager_; diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index b7c6c52220..f9f7767d9d 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -157,7 +157,7 @@ static void ConvertProjMatrixToVulkan(Matrix4x4 &in, bool invertedX, bool invert } ShaderManagerVulkan::ShaderManagerVulkan(VulkanContext *vulkan) - : vulkan_(vulkan), lastVShader_(nullptr), lastFShader_(nullptr), globalDirty_(0xFFFFFFFF) { + : vulkan_(vulkan), lastVShader_(nullptr), lastFShader_(nullptr) { codeBuffer_ = new char[16384]; uboAlignment_ = vulkan_->GetPhysicalDeviceProperties().limits.minUniformBufferOffsetAlignment; memset(&ub_base, 0, sizeof(ub_base)); diff --git a/GPU/Vulkan/ShaderManagerVulkan.h b/GPU/Vulkan/ShaderManagerVulkan.h index 1c4b415297..ed5bae7191 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.h +++ b/GPU/Vulkan/ShaderManagerVulkan.h @@ -184,7 +184,7 @@ protected: class VulkanPushBuffer; -class ShaderManagerVulkan { +class ShaderManagerVulkan : public ShaderManagerCommon { public: ShaderManagerVulkan(VulkanContext *vulkan); ~ShaderManagerVulkan(); @@ -204,10 +204,6 @@ public: uint32_t UpdateUniforms(); - void DirtyUniform(uint64_t what) { - globalDirty_ |= what; - } - // TODO: Avoid copying these buffers if same as last draw, can still point to it assuming we're still in the same pushbuffer. // Applies dirty changes and copies the buffer. bool IsBaseDirty() { return true; } @@ -235,7 +231,6 @@ private: char *codeBuffer_; - uint64_t globalDirty_; uint64_t uboAlignment_; // Uniform block scratchpad. These (the relevant ones) are copied to the current pushbuffer at draw time. UB_VS_FS_Base ub_base;