SoftGPU: Use vertexjit for submitted primitives.

This uses the standard cache other rendering uses.

In Legend of Heroes 3, for example, this provides a 10% speed improvement.
This commit is contained in:
Unknown W. Brackets 2017-05-20 20:19:08 -07:00
parent 66dc0ea62f
commit 8187927b9d
4 changed files with 47 additions and 27 deletions

View File

@ -55,23 +55,6 @@ static Draw::SamplerState *samplerLinear = nullptr;
static Draw::Buffer *vdata = nullptr;
static Draw::Buffer *idata = nullptr;
class SoftwareDrawEngine : public DrawEngineCommon {
public:
SoftwareDrawEngine() {
// All this is a LOT of memory, need to see if we can cut down somehow. Used for splines.
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
splineBuffer = (u8 *)AllocateMemoryPages(SPLINE_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
}
virtual void DispatchFlush() {
}
virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
TransformUnit::SubmitPrimitive(verts, inds, prim, vertexCount, vertType, bytesRead);
}
};
SoftGPU::SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
: GPUCommon(gfxCtx, draw)
{
@ -121,7 +104,8 @@ SoftGPU::SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
displayStride_ = 512;
displayFormat_ = GE_FORMAT_8888;
drawEngineCommon_ = new SoftwareDrawEngine();
drawEngine_ = new SoftwareDrawEngine();
drawEngineCommon_ = drawEngine_;
}
void SoftGPU::DeviceLost() {
@ -384,7 +368,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) {
cyclesExecuted += EstimatePerVertexCost() * count;
int bytesRead;
TransformUnit::SubmitPrimitive(verts, indices, type, count, gstate.vertType, &bytesRead);
TransformUnit::SubmitPrimitive(verts, indices, type, count, gstate.vertType, &bytesRead, drawEngine_);
framebufferDirty_ = true;
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).

View File

@ -45,7 +45,7 @@ struct FormatBuffer {
}
};
class ShaderManagerGLES;
class SoftwareDrawEngine;
class SoftGPU : public GPUCommon {
public:
@ -103,6 +103,8 @@ private:
u32 displayStride_;
GEBufferFormat displayFormat_;
SoftwareDrawEngine *drawEngine_ = nullptr;
Draw::Texture *fbTex;
Draw::Pipeline *texColor;
std::vector<u32> fbTexBuffer;

View File

@ -30,6 +30,31 @@
static u8 buf[65536 * 48]; // yolo
bool TransformUnit::outside_range_flag = false;
SoftwareDrawEngine::SoftwareDrawEngine() {
// All this is a LOT of memory, need to see if we can cut down somehow. Used for splines.
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
splineBuffer = (u8 *)AllocateMemoryPages(SPLINE_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
}
SoftwareDrawEngine::~SoftwareDrawEngine() {
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
FreeMemoryPages(splineBuffer, SPLINE_BUFFER_SIZE);
}
void SoftwareDrawEngine::DispatchFlush() {
}
void SoftwareDrawEngine::DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
TransformUnit::SubmitPrimitive(verts, inds, prim, vertexCount, vertType, bytesRead, this);
}
VertexDecoder *SoftwareDrawEngine::FindVertexDecoder(u32 vtype) {
const u32 vertTypeID = (vtype & 0xFFFFFF) | (gstate.getUVGenMode() << 24);
return DrawEngineCommon::GetVertexDecoder(vertTypeID);
}
WorldCoords TransformUnit::ModelToWorld(const ModelCoords& coords)
{
Mat3x3<float> world_matrix(gstate.worldMatrix);
@ -216,13 +241,10 @@ struct SplinePatch {
int pad[3];
};
void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveType prim_type, int vertex_count, u32 vertex_type, int *bytesRead)
void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveType prim_type, int vertex_count, u32 vertex_type, int *bytesRead, SoftwareDrawEngine *drawEngine)
{
// TODO: Cache VertexDecoder objects
VertexDecoder vdecoder;
VertexDecoderOptions options{};
vdecoder.SetVertexType(vertex_type, options);
const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt();
VertexDecoder &vdecoder = *drawEngine->FindVertexDecoder(vertex_type);
const DecVtxFormat &vtxfmt = vdecoder.GetDecVtxFmt();
if (bytesRead)
*bytesRead = vertex_count * vdecoder.VertexSize();

View File

@ -18,6 +18,7 @@
#pragma once
#include "CommonTypes.h"
#include "GPU/Common/DrawEngineCommon.h"
#include "GPU/Common/GPUDebugInterface.h"
#include "GPU/Math3D.h"
@ -30,6 +31,17 @@ typedef Vec3<float> WorldCoords;
typedef Vec3<float> ViewCoords;
typedef Vec4<float> ClipCoords; // Range: -w <= x/y/z <= w
class SoftwareDrawEngine : public DrawEngineCommon {
public:
SoftwareDrawEngine();
~SoftwareDrawEngine();
void DispatchFlush() override;
void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) override;
VertexDecoder *FindVertexDecoder(u32 vtype);
};
struct SplinePatch;
struct ScreenCoords
@ -126,7 +138,7 @@ public:
static DrawingCoords ScreenToDrawing(const ScreenCoords& coords);
static ScreenCoords DrawingToScreen(const DrawingCoords& coords);
static void SubmitPrimitive(void* vertices, void* indices, GEPrimitiveType prim_type, int vertex_count, u32 vertex_type, int *bytesRead);
static void SubmitPrimitive(void* vertices, void* indices, GEPrimitiveType prim_type, int vertex_count, u32 vertex_type, int *bytesRead, SoftwareDrawEngine *drawEngine);
static bool GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices);
static VertexData ReadVertex(VertexReader& vreader);