softgpu: Use dirty flags for render overlap checks.

This commit is contained in:
Unknown W. Brackets 2022-01-23 00:55:19 -08:00
parent 77db9c818f
commit a27da25cd6
3 changed files with 79 additions and 69 deletions

View File

@ -165,23 +165,6 @@ void BinManager::UpdateState() {
ComputeRasterizerState(&states_[stateIndex_]);
states_[stateIndex_].samplerID.cached.clut = cluts_[clutIndex_].readable;
DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1());
DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2());
ScreenCoords screenScissorTL = TransformUnit::DrawingToScreen(scissorTL, 0);
ScreenCoords screenScissorBR = TransformUnit::DrawingToScreen(scissorBR, 0);
scissor_.x1 = screenScissorTL.x;
scissor_.y1 = screenScissorTL.y;
scissor_.x2 = screenScissorBR.x + 15;
scissor_.y2 = screenScissorBR.y + 15;
// Our bin sizes are based on offset, so if that changes we have to flush.
if (queueOffsetX_ != gstate.getOffsetX16() || queueOffsetY_ != gstate.getOffsetY16()) {
Flush("offset");
queueOffsetX_ = gstate.getOffsetX16();
queueOffsetY_ = gstate.getOffsetY16();
}
if (lastFlipstats_ != gpuStats.numFlips) {
lastFlipstats_ = gpuStats.numFlips;
ResetStats();
@ -193,31 +176,56 @@ void BinManager::UpdateState() {
if (HasTextureWrite(state))
Flush("tex");
// Okay, now update what's pending.
constexpr uint32_t mirrorMask = 0x0FFFFFFF & ~0x00600000;
const uint32_t bpp = state.pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2;
pendingWrites_[0].Expand(gstate.getFrameBufAddress() & mirrorMask, bpp, gstate.FrameBufStride(), scissorTL, scissorBR);
if (state.pixelID.depthWrite)
pendingWrites_[1].Expand(gstate.getDepthBufAddress() & mirrorMask, 2, gstate.DepthBufStride(), scissorTL, scissorBR);
if (dirty_ & SoftDirty::BINNER_RANGE) {
DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1());
DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2());
ScreenCoords screenScissorTL = TransformUnit::DrawingToScreen(scissorTL, 0);
ScreenCoords screenScissorBR = TransformUnit::DrawingToScreen(scissorBR, 0);
// Disallow threads when rendering to the target, even offset.
bool selfRender = HasTextureWrite(state);
int newMaxTasks = selfRender ? 1 : g_threadManager.GetNumLooperThreads();
if (newMaxTasks > MAX_POSSIBLE_TASKS)
newMaxTasks = MAX_POSSIBLE_TASKS;
// We don't want to overlap wrong, so flush any pending.
if (maxTasks_ != newMaxTasks) {
maxTasks_ = newMaxTasks;
Flush("selfrender");
scissor_.x1 = screenScissorTL.x;
scissor_.y1 = screenScissorTL.y;
scissor_.x2 = screenScissorBR.x + 15;
scissor_.y2 = screenScissorBR.y + 15;
// Our bin sizes are based on offset, so if that changes we have to flush.
if (queueOffsetX_ != gstate.getOffsetX16() || queueOffsetY_ != gstate.getOffsetY16()) {
Flush("offset");
queueOffsetX_ = gstate.getOffsetX16();
queueOffsetY_ = gstate.getOffsetY16();
}
// Okay, now update what's pending.
constexpr uint32_t mirrorMask = 0x0FFFFFFF & ~0x00600000;
const uint32_t bpp = state.pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2;
pendingWrites_[0].Expand(gstate.getFrameBufAddress() & mirrorMask, bpp, gstate.FrameBufStride(), scissorTL, scissorBR);
if (state.pixelID.depthWrite)
pendingWrites_[1].Expand(gstate.getDepthBufAddress() & mirrorMask, 2, gstate.DepthBufStride(), scissorTL, scissorBR);
dirty_ &= ~SoftDirty::BINNER_RANGE;
}
// Lastly, we have to check if we're newly writing depth we were texturing before.
// This happens in Call of Duty (depth clear after depth texture), for example.
if (!hadDepth && state.pixelID.depthWrite) {
for (size_t i = 0; i < states_.Size(); ++i) {
if (HasTextureWrite(states_.Peek(i)))
Flush("selfdepth");
if (dirty_ & SoftDirty::BINNER_OVERLAP) {
// Disallow threads when rendering to the target, even offset.
bool selfRender = HasTextureWrite(state);
int newMaxTasks = selfRender ? 1 : g_threadManager.GetNumLooperThreads();
if (newMaxTasks > MAX_POSSIBLE_TASKS)
newMaxTasks = MAX_POSSIBLE_TASKS;
// We don't want to overlap wrong, so flush any pending.
if (maxTasks_ != newMaxTasks) {
maxTasks_ = newMaxTasks;
Flush("selfrender");
}
// Lastly, we have to check if we're newly writing depth we were texturing before.
// This happens in Call of Duty (depth clear after depth texture), for example.
if (!hadDepth && state.pixelID.depthWrite) {
for (size_t i = 0; i < states_.Size(); ++i) {
if (HasTextureWrite(states_.Peek(i))) {
Flush("selfdepth");
}
}
}
dirty_ &= ~SoftDirty::BINNER_OVERLAP;
}
}
@ -435,12 +443,13 @@ void BinManager::Flush(const char *reason) {
queueRange_.y1 = 0x7FFFFFFF;
queueRange_.x2 = 0;
queueRange_.y2 = 0;
queueOffsetX_ = -1;
queueOffsetY_ = -1;
for (auto &pending : pendingWrites_)
pending.base = 0;
// We'll need to set the pending writes again, since we just flushed it.
dirty_ |= SoftDirty::BINNER_RANGE;
if (coreCollectDebugStats) {
double et = time_now_d();
flushReasonTimes_[reason] += et - st;

View File

@ -106,8 +106,8 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = {
{ GE_CMD_FOG1, 0, SoftDirty::TRANSFORM_FOG },
{ GE_CMD_FOG2, 0, SoftDirty::TRANSFORM_FOG },
{ GE_CMD_CLEARMODE, 0, SoftDirty::TRANSFORM_BASIC | SoftDirty::RAST_TEX | SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_ALPHA | SoftDirty::PIXEL_STENCIL | SoftDirty::PIXEL_CACHED },
{ GE_CMD_TEXTUREMAPENABLE, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::RAST_TEX | SoftDirty::TRANSFORM_BASIC },
{ GE_CMD_CLEARMODE, 0, SoftDirty::TRANSFORM_BASIC | SoftDirty::RAST_TEX | SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_ALPHA | SoftDirty::PIXEL_STENCIL | SoftDirty::PIXEL_CACHED | SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXTUREMAPENABLE, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::RAST_TEX | SoftDirty::TRANSFORM_BASIC | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_FOGENABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED | SoftDirty::TRANSFORM_BASIC | SoftDirty::TRANSFORM_FOG | SoftDirty::TRANSFORM_MATRIX },
{ GE_CMD_TEXMODE, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::RAST_TEX },
// Currently this doesn't affect any state, but maybe it should.
@ -155,8 +155,8 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = {
{ GE_CMD_MASKRGB, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_WRITEMASK },
{ GE_CMD_MASKALPHA, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_WRITEMASK },
{ GE_CMD_ZTEST, 0, SoftDirty::PIXEL_BASIC },
{ GE_CMD_ZTESTENABLE, 0, SoftDirty::PIXEL_BASIC },
{ GE_CMD_ZWRITEDISABLE, 0, SoftDirty::PIXEL_BASIC },
{ GE_CMD_ZTESTENABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_ZWRITEDISABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_LOGICOP, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED },
{ GE_CMD_LOGICOPENABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED },
@ -168,33 +168,33 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = {
{ GE_CMD_TEXOFFSETU },
{ GE_CMD_TEXOFFSETV },
{ GE_CMD_TEXSIZE0, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXSIZE1, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXSIZE2, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXSIZE3, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXSIZE4, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXSIZE5, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXSIZE6, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXSIZE7, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXFORMAT, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXSIZE0, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXSIZE1, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXSIZE2, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXSIZE3, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXSIZE4, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXSIZE5, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXSIZE6, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXSIZE7, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXFORMAT, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXLEVEL, 0, SoftDirty::RAST_TEX },
{ GE_CMD_TEXLODSLOPE, 0, SoftDirty::RAST_TEX },
{ GE_CMD_TEXADDR0, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXADDR1, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXADDR2, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXADDR3, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXADDR4, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXADDR5, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXADDR6, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXADDR7, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXBUFWIDTH0, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXBUFWIDTH1, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXBUFWIDTH2, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXBUFWIDTH3, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXBUFWIDTH4, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXBUFWIDTH5, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXBUFWIDTH6, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXBUFWIDTH7, 0, SoftDirty::SAMPLER_TEXLIST },
{ GE_CMD_TEXADDR0, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXADDR1, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXADDR2, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXADDR3, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXADDR4, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXADDR5, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXADDR6, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXADDR7, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXBUFWIDTH0, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXBUFWIDTH1, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXBUFWIDTH2, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXBUFWIDTH3, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXBUFWIDTH4, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXBUFWIDTH5, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXBUFWIDTH6, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXBUFWIDTH7, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_CLUTADDR },
{ GE_CMD_CLUTADDRUPPER },

View File

@ -84,6 +84,7 @@ enum class SoftDirty : uint64_t {
TRANSFORM_FOG = 1ULL << 20,
BINNER_RANGE = 1ULL << 21,
BINNER_OVERLAP = 1ULL << 22,
};
static inline SoftDirty operator |(const SoftDirty &lhs, const SoftDirty &rhs) {
return SoftDirty((uint64_t)lhs | (uint64_t)rhs);