mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 05:19:56 +00:00
24560eef5c
If we can, we want to keep the thread queues healthy, but not full. Reduce the amount we push on a typical drain to avoid the Wait().
284 lines
6.4 KiB
C++
284 lines
6.4 KiB
C++
// Copyright (c) 2022- PPSSPP Project.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, version 2.0 or later versions.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
// Official git repository and contact information can be found at
|
|
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
|
|
|
#pragma once
|
|
|
|
#include <atomic>
|
|
#include <unordered_map>
|
|
#include "GPU/Software/Rasterizer.h"
|
|
|
|
struct BinWaitable;
|
|
class DrawBinItemsTask;
|
|
|
|
enum class BinItemType : uint8_t {
|
|
TRIANGLE,
|
|
CLEAR_RECT,
|
|
RECT,
|
|
SPRITE,
|
|
LINE,
|
|
POINT,
|
|
};
|
|
|
|
struct BinCoords {
|
|
int x1;
|
|
int y1;
|
|
int x2;
|
|
int y2;
|
|
|
|
bool Invalid() const {
|
|
return x2 < x1 || y2 < y1;
|
|
}
|
|
|
|
BinCoords Intersect(const BinCoords &range) const;
|
|
};
|
|
|
|
struct BinItem {
|
|
BinItemType type;
|
|
uint16_t stateIndex;
|
|
BinCoords range;
|
|
VertexData v0;
|
|
VertexData v1;
|
|
VertexData v2;
|
|
};
|
|
|
|
template <typename T, size_t N>
|
|
struct BinQueue {
|
|
BinQueue() {
|
|
Reset();
|
|
}
|
|
~BinQueue() {
|
|
FreeAlignedMemory(items_);
|
|
}
|
|
|
|
void Setup() {
|
|
items_ = (T *)AllocateAlignedMemory(sizeof(T) * N, 16);
|
|
}
|
|
|
|
void Reset() {
|
|
head_ = 0;
|
|
tail_ = 0;
|
|
size_ = 0;
|
|
}
|
|
|
|
size_t Push(const T &item) {
|
|
size_t i = tail_++;
|
|
if (i + 1 == N)
|
|
tail_ -= N;
|
|
items_[i] = item;
|
|
size_++;
|
|
return i;
|
|
}
|
|
|
|
T Pop() {
|
|
size_t i = head_++;
|
|
if (i + 1 == N)
|
|
head_ -= N;
|
|
T item = items_[i];
|
|
size_--;
|
|
return item;
|
|
}
|
|
|
|
// Only safe if you're the only one reading.
|
|
T &PeekNext() {
|
|
return items_[head_];
|
|
}
|
|
|
|
void SkipNext() {
|
|
size_t i = head_++;
|
|
if (i + 1 == N)
|
|
head_ -= N;
|
|
size_--;
|
|
}
|
|
|
|
// Only safe if you're the only one reading.
|
|
const T &Peek(size_t offset) const {
|
|
size_t i = head_ + offset;
|
|
if (i >= N)
|
|
i -= N;
|
|
return items_[i];
|
|
}
|
|
|
|
// Only safe if you're the only one writing.
|
|
T &PeekPush() {
|
|
return items_[tail_];
|
|
}
|
|
|
|
void PushPeeked() {
|
|
size_t i = tail_++;
|
|
if (i + 1 == N)
|
|
tail_ -= N;
|
|
size_++;
|
|
}
|
|
|
|
size_t Size() const {
|
|
return size_;
|
|
}
|
|
|
|
bool Full() const {
|
|
return size_ == N - 1;
|
|
}
|
|
|
|
bool NearFull() const {
|
|
return size_ >= N - 2;
|
|
}
|
|
|
|
bool Empty() const {
|
|
return size_ == 0;
|
|
}
|
|
|
|
T &operator[](size_t index) {
|
|
return items_[index];
|
|
}
|
|
|
|
const T &operator[](size_t index) const {
|
|
return items_[index];
|
|
}
|
|
|
|
T *items_ = nullptr;
|
|
std::atomic<size_t> head_;
|
|
std::atomic<size_t> tail_ ;
|
|
std::atomic<size_t> size_;
|
|
};
|
|
|
|
union BinClut {
|
|
uint8_t readable[1024];
|
|
};
|
|
|
|
struct BinTaskList {
|
|
// We shouldn't ever need more than two at once, since we use an atomic to run one at a time.
|
|
// A second could run due to overlap during teardown.
|
|
static constexpr int N = 2;
|
|
|
|
DrawBinItemsTask *tasks[N]{};
|
|
int count = 0;
|
|
|
|
DrawBinItemsTask *Next() {
|
|
return tasks[count % N];
|
|
}
|
|
};
|
|
|
|
struct BinDirtyRange {
|
|
uint32_t base;
|
|
uint32_t strideBytes;
|
|
uint32_t widthBytes;
|
|
uint32_t height;
|
|
|
|
void Expand(uint32_t newBase, uint32_t bpp, uint32_t stride, DrawingCoords &tl, DrawingCoords &br);
|
|
};
|
|
|
|
class BinManager {
|
|
public:
|
|
BinManager();
|
|
~BinManager();
|
|
|
|
void UpdateState();
|
|
void UpdateClut(const void *src);
|
|
|
|
const Rasterizer::RasterizerState &State() {
|
|
return states_[stateIndex_];
|
|
}
|
|
|
|
void AddTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2);
|
|
void AddClearRect(const VertexData &v0, const VertexData &v1);
|
|
void AddRect(const VertexData &v0, const VertexData &v1);
|
|
void AddSprite(const VertexData &v0, const VertexData &v1);
|
|
void AddLine(const VertexData &v0, const VertexData &v1);
|
|
void AddPoint(const VertexData &v0);
|
|
|
|
void Drain(bool flushing = false);
|
|
void Flush(const char *reason);
|
|
bool HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, uint32_t h);
|
|
// Assumes you've also checked for a write (writes are partial so are automatically reads.)
|
|
bool HasPendingRead(uint32_t start, uint32_t stride, uint32_t w, uint32_t h);
|
|
|
|
void GetStats(char *buffer, size_t bufsize);
|
|
void ResetStats();
|
|
|
|
void SetDirty(SoftDirty flags) {
|
|
dirty_ |= flags;
|
|
}
|
|
void ClearDirty(SoftDirty flags) {
|
|
dirty_ &= ~flags;
|
|
}
|
|
SoftDirty GetDirty() {
|
|
return dirty_;
|
|
}
|
|
bool HasDirty(SoftDirty flags) {
|
|
return dirty_ & flags;
|
|
}
|
|
|
|
protected:
|
|
#if PPSSPP_ARCH(32BIT)
|
|
// Use less memory and less address space. We're unlikely to have 32 cores on a 32-bit CPU.
|
|
static constexpr int MAX_POSSIBLE_TASKS = 16;
|
|
#else
|
|
static constexpr int MAX_POSSIBLE_TASKS = 64;
|
|
#endif
|
|
// This is about 1MB of state data.
|
|
static constexpr int QUEUED_STATES = 4096;
|
|
// These are 1KB each, so half an MB.
|
|
static constexpr int QUEUED_CLUTS = 512;
|
|
// About 360 KB, but we have usually 16 or less of them, so 5 MB - 22 MB.
|
|
static constexpr int QUEUED_PRIMS = 2048;
|
|
|
|
typedef BinQueue<Rasterizer::RasterizerState, QUEUED_STATES> BinStateQueue;
|
|
typedef BinQueue<BinClut, QUEUED_CLUTS> BinClutQueue;
|
|
typedef BinQueue<BinItem, QUEUED_PRIMS> BinItemQueue;
|
|
|
|
private:
|
|
BinStateQueue states_;
|
|
BinClutQueue cluts_;
|
|
uint16_t stateIndex_;
|
|
uint16_t clutIndex_;
|
|
BinCoords scissor_;
|
|
BinItemQueue queue_;
|
|
BinCoords queueRange_;
|
|
SoftDirty dirty_ = SoftDirty::NONE;
|
|
|
|
int maxTasks_ = 1;
|
|
bool tasksSplit_ = false;
|
|
std::vector<BinCoords> taskRanges_;
|
|
BinItemQueue taskQueues_[MAX_POSSIBLE_TASKS];
|
|
BinTaskList taskLists_[MAX_POSSIBLE_TASKS];
|
|
std::atomic<bool> taskStatus_[MAX_POSSIBLE_TASKS];
|
|
BinWaitable *waitable_ = nullptr;
|
|
|
|
BinDirtyRange pendingWrites_[2]{};
|
|
std::unordered_map<uint32_t, BinDirtyRange> pendingReads_;
|
|
|
|
bool pendingOverlap_ = false;
|
|
|
|
std::unordered_map<const char *, double> flushReasonTimes_;
|
|
std::unordered_map<const char *, double> lastFlushReasonTimes_;
|
|
const char *slowestFlushReason_ = nullptr;
|
|
double slowestFlushTime_ = 0.0;
|
|
int lastFlipstats_ = 0;
|
|
int enqueues_ = 0;
|
|
int mostThreads_ = 0;
|
|
|
|
void MarkPendingReads(const Rasterizer::RasterizerState &state);
|
|
void MarkPendingWrites(const Rasterizer::RasterizerState &state);
|
|
bool HasTextureWrite(const Rasterizer::RasterizerState &state);
|
|
BinCoords Scissor(BinCoords range);
|
|
BinCoords Range(const VertexData &v0, const VertexData &v1, const VertexData &v2);
|
|
BinCoords Range(const VertexData &v0, const VertexData &v1);
|
|
BinCoords Range(const VertexData &v0);
|
|
void Expand(const BinCoords &range);
|
|
|
|
friend class DrawBinItemsTask;
|
|
};
|