Merge pull request #15313 from unknownbrackets/softgpu-binning

softgpu: Allow binning across prim calls
This commit is contained in:
Henrik Rydgård 2022-01-16 10:27:36 +01:00 committed by GitHub
commit f96c22765c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 60 additions and 25 deletions

View File

@ -124,6 +124,8 @@ BinManager::BinManager() {
queueRange_.y2 = 0;
waitable_ = new BinWaitable();
for (auto &s : taskStatus_)
s = false;
}
BinManager::~BinManager() {

View File

@ -554,10 +554,14 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) {
break;
case GE_CMD_FRAMEBUFPTR:
// We assume fb.data won't change while we're drawing.
drawEngine_->transformUnit.Flush();
fb.data = Memory::GetPointer(gstate.getFrameBufAddress());
break;
case GE_CMD_FRAMEBUFWIDTH:
// We assume fb.data won't change while we're drawing.
drawEngine_->transformUnit.Flush();
fb.data = Memory::GetPointer(gstate.getFrameBufAddress());
break;
@ -572,6 +576,8 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) {
case GE_CMD_TEXADDR5:
case GE_CMD_TEXADDR6:
case GE_CMD_TEXADDR7:
// TODO: Try not flushing here, unless overlap with framebuf/depthbuf?
drawEngine_->transformUnit.Flush();
break;
case GE_CMD_TEXBUFWIDTH0:
@ -582,6 +588,8 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) {
case GE_CMD_TEXBUFWIDTH5:
case GE_CMD_TEXBUFWIDTH6:
case GE_CMD_TEXBUFWIDTH7:
// TODO: Try not flushing here, unless overlap with framebuf/depthbuf?
drawEngine_->transformUnit.Flush();
break;
case GE_CMD_CLUTADDR:
@ -590,6 +598,10 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) {
case GE_CMD_LOADCLUT:
{
// Might be copying drawing into the CLUT, so flush.
// TODO: It seems worth copying the CLUT to state...
drawEngine_->transformUnit.Flush();
u32 clutAddr = gstate.getClutAddress();
u32 clutTotalBytes = gstate.getClutLoadBytes();
@ -620,6 +632,9 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) {
case GE_CMD_TRANSFERSTART:
{
// Let's finish any drawing before we transfer.
drawEngine_->transformUnit.Flush();
u32 srcBasePtr = gstate.getTransferSrcAddress();
u32 srcStride = gstate.getTransferSrcStride();
@ -670,10 +685,14 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) {
break;
case GE_CMD_ZBUFPTR:
// We assume depthbuf.data won't change while we're drawing.
drawEngine_->transformUnit.Flush();
depthbuf.data = Memory::GetPointer(gstate.getDepthBufAddress());
break;
case GE_CMD_ZBUFWIDTH:
// We assume depthbuf.data won't change while we're drawing.
drawEngine_->transformUnit.Flush();
depthbuf.data = Memory::GetPointer(gstate.getDepthBufAddress());
break;
@ -867,6 +886,11 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) {
}
}
void SoftGPU::FinishDeferred() {
// Need to flush before going back to CPU, so drawing is appropriately visible.
drawEngine_->transformUnit.Flush();
}
void SoftGPU::GetStats(char *buffer, size_t bufsize) {
snprintf(buffer, bufsize, "SoftGPU: (N/A)");
}

View File

@ -65,6 +65,7 @@ public:
void CheckGPUFeatures() override {}
void InitClear() override {}
void ExecuteOp(u32 op, u32 diff) override;
void FinishDeferred() override;
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override;
void CopyDisplayToOutput(bool reallyDirty) override;

View File

@ -41,10 +41,12 @@
TransformUnit::TransformUnit() {
decoded_ = (u8 *)AllocateMemoryPages(TRANSFORM_BUF_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
binner_ = new BinManager();
}
TransformUnit::~TransformUnit() {
FreeMemoryPages(decoded_, DECODED_VERTEX_BUFFER_SIZE);
delete binner_;
}
SoftwareDrawEngine::SoftwareDrawEngine() {
@ -333,8 +335,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
// TODO: Do this in two passes - first process the vertices (before indexing/stripping),
// then resolve the indices. This lets us avoid transforming shared vertices twice.
static BinManager binner;
binner.UpdateState();
binner_->UpdateState();
bool outside_range_flag = false;
switch (prim_type) {
@ -367,22 +368,22 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
case GE_PRIM_TRIANGLES:
{
if (!gstate.isCullEnabled() || gstate.isModeClear()) {
Clipper::ProcessTriangle(data[0], data[1], data[2], data[2], binner);
Clipper::ProcessTriangle(data[2], data[1], data[0], data[2], binner);
Clipper::ProcessTriangle(data[0], data[1], data[2], data[2], *binner_);
Clipper::ProcessTriangle(data[2], data[1], data[0], data[2], *binner_);
} else if (!gstate.getCullMode()) {
Clipper::ProcessTriangle(data[2], data[1], data[0], data[2], binner);
Clipper::ProcessTriangle(data[2], data[1], data[0], data[2], *binner_);
} else {
Clipper::ProcessTriangle(data[0], data[1], data[2], data[2], binner);
Clipper::ProcessTriangle(data[0], data[1], data[2], data[2], *binner_);
}
break;
}
case GE_PRIM_LINES:
Clipper::ProcessLine(data[0], data[1], binner);
Clipper::ProcessLine(data[0], data[1], *binner_);
break;
case GE_PRIM_POINTS:
Clipper::ProcessPoint(data[0], binner);
Clipper::ProcessPoint(data[0], *binner_);
break;
default:
@ -422,14 +423,14 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
}
if (data_index == 4) {
Clipper::ProcessRect(data[0], data[1], binner);
Clipper::ProcessRect(data[2], data[3], binner);
Clipper::ProcessRect(data[0], data[1], *binner_);
Clipper::ProcessRect(data[2], data[3], *binner_);
data_index = 0;
}
}
if (data_index >= 2) {
Clipper::ProcessRect(data[0], data[1], binner);
Clipper::ProcessRect(data[0], data[1], *binner_);
data_index -= 2;
}
break;
@ -458,7 +459,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
--skip_count;
} else {
// We already incremented data_index, so data_index & 1 is previous one.
Clipper::ProcessLine(data[data_index & 1], data[(data_index & 1) ^ 1], binner);
Clipper::ProcessLine(data[data_index & 1], data[(data_index & 1) ^ 1], *binner_);
}
}
break;
@ -484,7 +485,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
// If a strip is effectively a rectangle, draw it as such!
if (!outside_range_flag && Rasterizer::DetectRectangleFromThroughModeStrip(data)) {
Clipper::ProcessRect(data[0], data[3], binner);
Clipper::ProcessRect(data[0], data[3], *binner_);
break;
}
}
@ -512,14 +513,14 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
}
if (!gstate.isCullEnabled() || gstate.isModeClear()) {
Clipper::ProcessTriangle(data[0], data[1], data[2], data[provoking_index], binner);
Clipper::ProcessTriangle(data[2], data[1], data[0], data[provoking_index], binner);
Clipper::ProcessTriangle(data[0], data[1], data[2], data[provoking_index], *binner_);
Clipper::ProcessTriangle(data[2], data[1], data[0], data[provoking_index], *binner_);
} else if ((!gstate.getCullMode()) ^ ((data_index - 1) % 2)) {
// We need to reverse the vertex order for each second primitive,
// but we additionally need to do that for every primitive if CCW cullmode is used.
Clipper::ProcessTriangle(data[2], data[1], data[0], data[provoking_index], binner);
Clipper::ProcessTriangle(data[2], data[1], data[0], data[provoking_index], *binner_);
} else {
Clipper::ProcessTriangle(data[0], data[1], data[2], data[provoking_index], binner);
Clipper::ProcessTriangle(data[0], data[1], data[2], data[provoking_index], *binner_);
}
}
break;
@ -560,7 +561,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
int tl = -1, br = -1;
if (!outside_range_flag && Rasterizer::DetectRectangleFromThroughModeFan(data, vertex_count, &tl, &br)) {
Clipper::ProcessRect(data[tl], data[br], binner);
Clipper::ProcessRect(data[tl], data[br], *binner_);
break;
}
}
@ -588,14 +589,14 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
}
if (!gstate.isCullEnabled() || gstate.isModeClear()) {
Clipper::ProcessTriangle(data[0], data[1], data[2], data[provoking_index], binner);
Clipper::ProcessTriangle(data[2], data[1], data[0], data[provoking_index], binner);
Clipper::ProcessTriangle(data[0], data[1], data[2], data[provoking_index], *binner_);
Clipper::ProcessTriangle(data[2], data[1], data[0], data[provoking_index], *binner_);
} else if ((!gstate.getCullMode()) ^ ((data_index - 1) % 2)) {
// We need to reverse the vertex order for each second primitive,
// but we additionally need to do that for every primitive if CCW cullmode is used.
Clipper::ProcessTriangle(data[2], data[1], data[0], data[provoking_index], binner);
Clipper::ProcessTriangle(data[2], data[1], data[0], data[provoking_index], *binner_);
} else {
Clipper::ProcessTriangle(data[0], data[1], data[2], data[provoking_index], binner);
Clipper::ProcessTriangle(data[0], data[1], data[2], data[provoking_index], *binner_);
}
}
break;
@ -605,9 +606,10 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
ERROR_LOG(G3D, "Unexpected prim type: %d", prim_type);
break;
}
}
binner.Flush();
void TransformUnit::Flush() {
binner_->Flush();
GPUDebug::NotifyDraw();
}

View File

@ -32,6 +32,7 @@ typedef Vec3<float> ViewCoords;
typedef Vec4<float> ClipCoords; // Range: -w <= x/y/z <= w
struct SplinePatch;
class BinManager;
struct ScreenCoords
{
@ -117,9 +118,14 @@ public:
void SubmitPrimitive(void* vertices, void* indices, GEPrimitiveType prim_type, int vertex_count, u32 vertex_type, int *bytesRead, SoftwareDrawEngine *drawEngine);
bool GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices);
void Flush();
private:
VertexData ReadVertex(VertexReader &vreader, bool &outside_range_flag);
u8 *decoded_;
u8 *decoded_ = nullptr;
BinManager *binner_ = nullptr;
};
class SoftwareDrawEngine : public DrawEngineCommon {