Merge pull request #15318 from unknownbrackets/softgpu-opt

softgpu: Heuristic to avoid over-draining
This commit is contained in:
Henrik Rydgård 2022-01-17 07:43:34 +01:00 committed by GitHub
commit 128e2fa14e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 24 additions and 8 deletions

View File

@ -183,6 +183,11 @@ void BinManager::UpdateState() {
queueOffsetX_ = gstate.getOffsetX16();
queueOffsetY_ = gstate.getOffsetY16();
}
if (lastFlipstats_ != gpuStats.numFlips) {
lastFlipstats_ = gpuStats.numFlips;
ResetStats();
}
}
void BinManager::UpdateClut(const void *src) {
@ -320,15 +325,22 @@ void BinManager::Drain() {
queue_.SkipNext();
}
int threads = 0;
for (int i = 0; i < (int)taskRanges_.size(); ++i) {
if (taskQueues_[i].Empty() || taskStatus_[i])
if (taskQueues_[i].Empty())
continue;
threads++;
if (taskStatus_[i])
continue;
waitable_->Fill();
taskStatus_[i] = true;
DrawBinItemsTask *task = new DrawBinItemsTask(waitable_, taskQueues_[i], taskStatus_[i], states_);
g_threadManager.EnqueueTaskOnThread(i, task, true);
enqueues_++;
}
mostThreads_ = std::max(mostThreads_, threads);
}
}
@ -392,13 +404,13 @@ void BinManager::GetStats(char *buffer, size_t bufsize) {
"Slowest individual flush: %s (%0.4f)\n"
"Slowest frame flush: %s (%0.4f)\n"
"Slowest recent flush: %s (%0.4f)\n"
"Total flush time: %0.4f (%05.2f%%, last 2: %05.2f%%)\n",
"Total flush time: %0.4f (%05.2f%%, last 2: %05.2f%%)\n"
"Thread enqueues: %d, count %d",
slowestFlushReason_, slowestFlushTime_,
slowestTotalReason, slowestTotalTime,
slowestRecentReason, slowestRecentTime,
allTotal, allTotal * (6000.0 / 1.001), recentTotal * (3000.0 / 1.001));
constexpr int foo = sizeof(BinItem);
allTotal, allTotal * (6000.0 / 1.001), recentTotal * (3000.0 / 1.001),
enqueues_, mostThreads_);
}
void BinManager::ResetStats() {
@ -406,6 +418,8 @@ void BinManager::ResetStats() {
flushReasonTimes_.clear();
slowestFlushReason_ = nullptr;
slowestFlushTime_ = 0.0;
enqueues_ = 0;
mostThreads_ = 0;
}
inline BinCoords BinCoords::Intersect(const BinCoords &range) const {
@ -454,7 +468,7 @@ void BinManager::Expand(const BinCoords &range) {
queueRange_.x2 = std::max(queueRange_.x2, range.x2);
queueRange_.y2 = std::max(queueRange_.y2, range.y2);
if (maxTasks_ == 1 || queueRange_.y2 - queueRange_.y1 >= 224 * 16) {
if (maxTasks_ == 1 || (queueRange_.y2 - queueRange_.y1 >= 224 * 16 && enqueues_ < 36 * maxTasks_)) {
Drain();
}
}

View File

@ -211,6 +211,9 @@ private:
std::unordered_map<const char *, double> lastFlushReasonTimes_;
const char *slowestFlushReason_ = nullptr;
double slowestFlushTime_ = 0.0;
int lastFlipstats_ = 0;
int enqueues_ = 0;
int mostThreads_ = 0;
BinCoords Scissor(BinCoords range);
BinCoords Range(const VertexData &v0, const VertexData &v1, const VertexData &v2);

View File

@ -689,7 +689,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) {
case GE_CMD_TEXSIZE5:
case GE_CMD_TEXSIZE6:
case GE_CMD_TEXSIZE7:
drawEngine_->transformUnit.FlushIfOverlap("texbufw", gstate.getTextureAddress(cmd - GE_CMD_TEXSIZE0), 4 * gstate.getTextureWidth(cmd - GE_CMD_TEXSIZE0) * gstate.getTextureHeight(cmd - GE_CMD_TEXSIZE0));
drawEngine_->transformUnit.FlushIfOverlap("texsize", gstate.getTextureAddress(cmd - GE_CMD_TEXSIZE0), 4 * gstate.getTextureWidth(cmd - GE_CMD_TEXSIZE0) * gstate.getTextureHeight(cmd - GE_CMD_TEXSIZE0));
break;
case GE_CMD_ZBUFPTR:

View File

@ -765,7 +765,6 @@ void TransformUnit::Flush(const char *reason) {
void TransformUnit::GetStats(char *buffer, size_t bufsize) {
// TODO: More stats?
binner_->GetStats(buffer, bufsize);
binner_->ResetStats();
}
void TransformUnit::FlushIfOverlap(const char *reason, uint32_t addr, uint32_t sz) {