From 2539fb7c3c9b8c26f4f942571371c6eb17a97589 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 15 Jan 2022 18:43:44 -0800 Subject: [PATCH 1/2] softgpu: Tune queue push/pop to reduce overhead. These aren't safetly atomic with concurrent pushers or poppers, but as long as there's only one of each, they're still safe. Shaves a decent % off Drain time for heavy scenes. --- GPU/Software/BinManager.cpp | 19 ++++++++++++------- GPU/Software/BinManager.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/GPU/Software/BinManager.cpp b/GPU/Software/BinManager.cpp index d85708c541..993bf8712e 100644 --- a/GPU/Software/BinManager.cpp +++ b/GPU/Software/BinManager.cpp @@ -105,8 +105,9 @@ public: private: void ProcessItems() { while (!items_.Empty()) { - const BinItem item = items_.Pop(); + const BinItem &item = items_.PeekNext(); DrawBinItem(item, states_[item.stateIndex]); + items_.SkipNext(); } } @@ -261,12 +262,13 @@ void BinManager::Drain() { if (taskRanges_.size() <= 1) { while (!queue_.Empty()) { - const BinItem item = queue_.Pop(); + const BinItem &item = queue_.PeekNext(); DrawBinItem(item, states_[item.stateIndex]); + queue_.SkipNext(); } } else { while (!queue_.Empty()) { - const BinItem item = queue_.Pop(); + const BinItem &item = queue_.PeekNext(); for (int i = 0; i < (int)taskRanges_.size(); ++i) { const BinCoords range = taskRanges_[i].Intersect(item.range); if (range.Invalid()) @@ -276,10 +278,13 @@ void BinManager::Drain() { if (taskQueues_[i].Full()) waitable_->Wait(); - BinItem subitem = item; - subitem.range = range; - taskQueues_[i].Push(subitem); + BinItem &taskItem = taskQueues_[i].PeekPush(); + taskItem = item; + taskItem.range = range; + taskQueues_[i].PushPeeked(); + } + queue_.SkipNext(); } for (int i = 0; i < (int)taskRanges_.size(); ++i) { @@ -302,7 +307,7 @@ void BinManager::Flush() { queue_.Reset(); while (states_.Size() > 1) - states_.Pop(); + states_.SkipNext(); queueRange_.x1 = 0x7FFFFFFF; queueRange_.y1 = 0x7FFFFFFF; diff --git a/GPU/Software/BinManager.h b/GPU/Software/BinManager.h index b25921cf8f..0848ed8d5c 100644 --- a/GPU/Software/BinManager.h +++ b/GPU/Software/BinManager.h @@ -89,6 +89,34 @@ struct BinQueue { return item; } + // Only safe if you're the only one reading. + T &PeekNext() { + _dbg_assert_(!Empty()); + return items_[head_]; + } + + void SkipNext() { + _dbg_assert_(!Empty()); + size_t i = head_++; + if (i + 1 == N) + head_ -= N; + size_--; + } + + // Only safe if you're the only one writing. + T &PeekPush() { + _dbg_assert_(size_ < N); + return items_[tail_]; + } + + void PushPeeked() { + _dbg_assert_(size_ < N); + size_t i = tail_++; + if (i + 1 == N) + tail_ -= N; + size_++; + } + size_t Size() const { return size_; } From b42ebe15d8d3eddd78ab990b22ad27c3496e9d22 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 15 Jan 2022 21:59:23 -0800 Subject: [PATCH 2/2] softgpu: Fix off-by-one size limit on bin queues. --- GPU/Software/BinManager.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/GPU/Software/BinManager.h b/GPU/Software/BinManager.h index 0848ed8d5c..4d32ea0335 100644 --- a/GPU/Software/BinManager.h +++ b/GPU/Software/BinManager.h @@ -70,7 +70,7 @@ struct BinQueue { } size_t Push(const T &item) { - _dbg_assert_(size_ < N); + _dbg_assert_(size_ < N - 1); size_t i = tail_++; if (i + 1 == N) tail_ -= N; @@ -105,12 +105,12 @@ struct BinQueue { // Only safe if you're the only one writing. T &PeekPush() { - _dbg_assert_(size_ < N); + _dbg_assert_(size_ < N - 1); return items_[tail_]; } void PushPeeked() { - _dbg_assert_(size_ < N); + _dbg_assert_(size_ < N - 1); size_t i = tail_++; if (i + 1 == N) tail_ -= N; @@ -122,7 +122,7 @@ struct BinQueue { } bool Full() const { - return size_ == N; + return size_ == N - 1; } bool Empty() const {