mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 13:30:02 +00:00
Defer vertex decoding until flush. Track draw calls so that they can be cached later.
This commit is contained in:
parent
229d4e9f32
commit
8ff96bbcd8
@ -294,6 +294,13 @@ add_library(etcpack STATIC
|
||||
native/ext/etcpack/image.h)
|
||||
include_directories(native/ext/etcpack)
|
||||
|
||||
add_library(cityhash STATIC
|
||||
native/ext/cityhash/city.cpp
|
||||
native/ext/cityhash/city.h
|
||||
native/ext/cityhash/citycrc.h
|
||||
)
|
||||
include_directories(ext/cityhash)
|
||||
|
||||
if(NOT USING_GLES2)
|
||||
include_directories(${OPENGL_INCLUDE_DIR})
|
||||
|
||||
@ -795,7 +802,7 @@ add_library(${CoreLibName} ${CoreLinkType}
|
||||
Core/Util/ppge_atlas.h
|
||||
$<TARGET_OBJECTS:GPU>
|
||||
Globals.h)
|
||||
target_link_libraries(${CoreLibName} Common native kirk
|
||||
target_link_libraries(${CoreLibName} Common native kirk cityhash
|
||||
${GLEW_LIBRARIES} ${OPENGL_LIBRARIES})
|
||||
setup_target_project(${CoreLibName} Core)
|
||||
|
||||
|
@ -67,6 +67,7 @@ void CConfig::Load(const char *iniFileName)
|
||||
graphics->Get("SSAA", &SSAntiAlaising, 0);
|
||||
graphics->Get("VBO", &bUseVBO, true);
|
||||
graphics->Get("DisableG3DLog", &bDisableG3DLog, false);
|
||||
graphics->Get("VertexCache", &bVertexCache, false);
|
||||
|
||||
IniFile::Section *sound = iniFile.GetOrCreateSection("Sound");
|
||||
sound->Get("Enable", &bEnableSound, true);
|
||||
@ -114,6 +115,7 @@ void CConfig::Save()
|
||||
graphics->Set("SSAA", SSAntiAlaising);
|
||||
graphics->Set("VBO", bUseVBO);
|
||||
graphics->Set("DisableG3DLog", bDisableG3DLog);
|
||||
graphics->Set("VertexCache", bVertexCache);
|
||||
|
||||
IniFile::Section *sound = iniFile.GetOrCreateSection("Sound");
|
||||
sound->Set("Enable", bEnableSound);
|
||||
|
@ -57,6 +57,7 @@ public:
|
||||
int iWindowZoom; // for Windows
|
||||
bool SSAntiAlaising; //for Windows, too
|
||||
bool bDisableG3DLog;
|
||||
bool bVertexCache;
|
||||
|
||||
// Sound
|
||||
bool bEnableSound;
|
||||
|
@ -216,7 +216,10 @@ void hleEnterVblank(u64 userdata, int cyclesLate) {
|
||||
"Slowest syscall: %s : %0.2f ms\n"
|
||||
"Most active syscall: %s : %0.2f ms\n"
|
||||
"Draw calls: %i, flushes %i\n"
|
||||
"Cached Draw calls: %i\n"
|
||||
"Num Tracked Vertex Arrays: %i\n"
|
||||
"Vertices Transformed: %i\n"
|
||||
"Cached Vertices Drawn: %i\n"
|
||||
"FBOs active: %i\n"
|
||||
"Textures active: %i, decoded: %i\n"
|
||||
"Texture invalidations: %i\n"
|
||||
@ -232,7 +235,10 @@ void hleEnterVblank(u64 userdata, int cyclesLate) {
|
||||
kernelStats.summedSlowestSyscallTime * 1000.0f,
|
||||
gpuStats.numDrawCalls,
|
||||
gpuStats.numFlushes,
|
||||
gpuStats.numCachedDrawCalls,
|
||||
gpuStats.numTrackedVertexArrays,
|
||||
gpuStats.numVertsTransformed,
|
||||
gpuStats.numCachedVertsDrawn,
|
||||
gpuStats.numFBOs,
|
||||
gpuStats.numTextures,
|
||||
gpuStats.numTexturesDecoded,
|
||||
|
@ -223,6 +223,7 @@ void GLES_GPU::DumpNextFrame() {
|
||||
void GLES_GPU::BeginFrame() {
|
||||
TextureCache_StartFrame();
|
||||
DecimateFBOs();
|
||||
transformDraw_.DecimateTrackedVertexArrays();
|
||||
|
||||
if (dumpNextFrame_) {
|
||||
NOTICE_LOG(G3D, "DUMPING THIS FRAME");
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "../ge_constants.h"
|
||||
#include "DisplayListInterpreter.h"
|
||||
#include "ShaderManager.h"
|
||||
#include "TextureCache.h"
|
||||
|
||||
const GLint aLookup[] = {
|
||||
GL_DST_COLOR,
|
||||
@ -74,6 +75,13 @@ const GLuint stencilOps[] = {
|
||||
void ApplyDrawState(int prim) {
|
||||
// TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall.
|
||||
|
||||
if (gstate_c.textureChanged) {
|
||||
if ((gstate.textureMapEnable & 1) && !gstate.isModeClear()) {
|
||||
PSPSetTexture();
|
||||
}
|
||||
gstate_c.textureChanged = false;
|
||||
}
|
||||
|
||||
// TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a
|
||||
// single fullscreen pass that converts alpha to stencil (or 2 passes, to set both the 0 and 1 values) very easily.
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "../../Core/Host.h"
|
||||
#include "../../Core/System.h"
|
||||
#include "../../native/gfx_es2/gl_state.h"
|
||||
#include "../../native/ext/cityhash/city.h"
|
||||
|
||||
#include "../Math3D.h"
|
||||
#include "../GPUState.h"
|
||||
@ -45,7 +46,8 @@ const GLuint glprim[8] = {
|
||||
};
|
||||
|
||||
TransformDrawEngine::TransformDrawEngine()
|
||||
: collectedVerts(0),
|
||||
: numDrawCalls(0),
|
||||
collectedVerts(0),
|
||||
lastVType(-1),
|
||||
curVbo_(0),
|
||||
shaderManager_(0) {
|
||||
@ -131,6 +133,7 @@ void TransformDrawEngine::DrawBezier(int ucount, int vcount) {
|
||||
} else {
|
||||
SubmitPrim(Memory::GetPointer(gstate_c.vertexAddr), &indices[0], GE_PRIM_TRIANGLES, c, gstate.vertType, GE_VTYPE_IDX_16BIT, 0);
|
||||
}
|
||||
Flush(); // as our vertex storage here is temporary, it will only survive one draw.
|
||||
}
|
||||
|
||||
void TransformDrawEngine::DrawSpline(int ucount, int vcount, int utype, int vtype) {
|
||||
@ -659,18 +662,14 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
|
||||
}
|
||||
|
||||
void TransformDrawEngine::SubmitPrim(void *verts, void *inds, int prim, int vertexCount, u32 vertType, int forceIndexType, int *bytesRead) {
|
||||
if (vertexCount == 0)
|
||||
{
|
||||
return; // we ignore zero-sized draw calls.
|
||||
}
|
||||
// For the future
|
||||
if (!indexGen.PrimCompatible(prim))
|
||||
if (!indexGen.PrimCompatible(prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS)
|
||||
Flush();
|
||||
|
||||
if (!indexGen.Empty()) {
|
||||
gpuStats.numJoins++;
|
||||
}
|
||||
gpuStats.numDrawCalls++;
|
||||
gpuStats.numVertsTransformed += vertexCount;
|
||||
|
||||
indexGen.SetIndex(collectedVerts);
|
||||
int indexLowerBound, indexUpperBound;
|
||||
// If vtype has changed, setup the vertex decoder.
|
||||
// TODO: Simply cache the setup decoders instead.
|
||||
if (vertType != lastVType) {
|
||||
@ -678,72 +677,223 @@ void TransformDrawEngine::SubmitPrim(void *verts, void *inds, int prim, int vert
|
||||
lastVType = vertType;
|
||||
}
|
||||
|
||||
// Decode the verts and apply morphing
|
||||
dec.DecodeVerts(decoded + collectedVerts * (int)dec.GetDecVtxFmt().stride, verts, inds, prim, vertexCount, &indexLowerBound, &indexUpperBound);
|
||||
collectedVerts += indexUpperBound - indexLowerBound + 1;
|
||||
if (bytesRead)
|
||||
*bytesRead = vertexCount * dec.VertexSize();
|
||||
|
||||
int indexType = vertType & GE_VTYPE_IDX_MASK;
|
||||
if (forceIndexType != -1) indexType = forceIndexType;
|
||||
switch (indexType) {
|
||||
case GE_VTYPE_IDX_NONE:
|
||||
switch (prim) {
|
||||
case GE_PRIM_POINTS: indexGen.AddPoints(vertexCount); break;
|
||||
case GE_PRIM_LINES: indexGen.AddLineList(vertexCount); break;
|
||||
case GE_PRIM_LINE_STRIP: indexGen.AddLineStrip(vertexCount); break;
|
||||
case GE_PRIM_TRIANGLES: indexGen.AddList(vertexCount); break;
|
||||
case GE_PRIM_TRIANGLE_STRIP: indexGen.AddStrip(vertexCount); break;
|
||||
case GE_PRIM_TRIANGLE_FAN: indexGen.AddFan(vertexCount); break;
|
||||
case GE_PRIM_RECTANGLES: indexGen.AddRectangles(vertexCount); break; // Same
|
||||
}
|
||||
break;
|
||||
if (!indexGen.Empty()) {
|
||||
gpuStats.numJoins++;
|
||||
}
|
||||
gpuStats.numDrawCalls++;
|
||||
gpuStats.numVertsTransformed += vertexCount;
|
||||
|
||||
case GE_VTYPE_IDX_8BIT:
|
||||
switch (prim) {
|
||||
case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u8 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u8 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u8 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u8 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_RECTANGLES: indexGen.TranslateRectangles(vertexCount, (const u8 *)inds, -indexLowerBound); break; // Same
|
||||
}
|
||||
break;
|
||||
DeferredDrawCall &dc = drawCalls[numDrawCalls++];
|
||||
dc.verts = verts;
|
||||
dc.inds = inds;
|
||||
dc.vertType = vertType;
|
||||
dc.indexType = forceIndexType == (-1 ? (vertType & GE_VTYPE_IDX_MASK) : forceIndexType) >> GE_VTYPE_IDX_SHIFT;
|
||||
dc.prim = prim;
|
||||
dc.vertexCount = vertexCount;
|
||||
if (inds) {
|
||||
GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
|
||||
} else {
|
||||
dc.indexLowerBound = 0;
|
||||
dc.indexUpperBound = vertexCount - 1;
|
||||
}
|
||||
}
|
||||
|
||||
case GE_VTYPE_IDX_16BIT:
|
||||
switch (prim) {
|
||||
case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u16 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u16 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u16 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u16 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u16 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u16 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_RECTANGLES: indexGen.TranslateRectangles(vertexCount, (const u16 *)inds, -indexLowerBound); break; // Same
|
||||
void TransformDrawEngine::DecodeVerts() {
|
||||
for (int i = 0; i < numDrawCalls; i++) {
|
||||
const DeferredDrawCall &dc = drawCalls[i];
|
||||
|
||||
indexGen.SetIndex(collectedVerts);
|
||||
int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound;
|
||||
|
||||
// Decode the verts and apply morphing
|
||||
dec.DecodeVerts(decoded + collectedVerts * (int)dec.GetDecVtxFmt().stride,
|
||||
dc.verts, dc.inds, dc.prim, dc.vertexCount, indexLowerBound, indexUpperBound);
|
||||
collectedVerts += indexUpperBound - indexLowerBound + 1;
|
||||
|
||||
u32 indexType = dc.indexType;
|
||||
int vertexCount = dc.vertexCount;
|
||||
void *inds = dc.inds;
|
||||
switch (indexType) {
|
||||
case GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT:
|
||||
switch (dc.prim) {
|
||||
case GE_PRIM_POINTS: indexGen.AddPoints(vertexCount); break;
|
||||
case GE_PRIM_LINES: indexGen.AddLineList(vertexCount); break;
|
||||
case GE_PRIM_LINE_STRIP: indexGen.AddLineStrip(vertexCount); break;
|
||||
case GE_PRIM_TRIANGLES: indexGen.AddList(vertexCount); break;
|
||||
case GE_PRIM_TRIANGLE_STRIP: indexGen.AddStrip(vertexCount); break;
|
||||
case GE_PRIM_TRIANGLE_FAN: indexGen.AddFan(vertexCount); break;
|
||||
case GE_PRIM_RECTANGLES: indexGen.AddRectangles(vertexCount); break; // Same
|
||||
}
|
||||
break;
|
||||
|
||||
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
|
||||
switch (dc.prim) {
|
||||
case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u8 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u8 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u8 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u8 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_RECTANGLES: indexGen.TranslateRectangles(vertexCount, (const u8 *)inds, -indexLowerBound); break; // Same
|
||||
}
|
||||
break;
|
||||
|
||||
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
|
||||
switch (dc.prim) {
|
||||
case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u16 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u16 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u16 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u16 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u16 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u16 *)inds, -indexLowerBound); break;
|
||||
case GE_PRIM_RECTANGLES: indexGen.TranslateRectangles(vertexCount, (const u16 *)inds, -indexLowerBound); break; // Same
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
u32 TransformDrawEngine::ComputeHash() {
|
||||
u32 fullhash = 0;
|
||||
int vertexSize = dec.GetDecVtxFmt().stride;
|
||||
|
||||
for (int i = 0; i < numDrawCalls; i++) {
|
||||
if (!drawCalls[i].inds) {
|
||||
fullhash += CityHash32((const char *)drawCalls[i].verts, vertexSize * drawCalls[i].vertexCount);
|
||||
} else {
|
||||
fullhash += CityHash32((const char *)drawCalls[i].verts + vertexSize * drawCalls[i].indexLowerBound,
|
||||
vertexSize * (drawCalls[i].indexUpperBound - drawCalls[i].indexLowerBound));
|
||||
int indexSize = (dec.VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
|
||||
fullhash += CityHash32((const char *)drawCalls[i].inds, indexSize * drawCalls[i].vertexCount);
|
||||
}
|
||||
}
|
||||
|
||||
return fullhash;
|
||||
}
|
||||
|
||||
u32 TransformDrawEngine::ComputeFastDCID() {
|
||||
u32 hash = 0;
|
||||
for (int i = 0; i < numDrawCalls; i++) {
|
||||
hash ^= (u32)drawCalls[i].verts;
|
||||
hash = _rotl(hash, 13);
|
||||
hash ^= (u32)drawCalls[i].inds;
|
||||
hash = _rotl(hash, 13);
|
||||
hash ^= (u32)drawCalls[i].vertType;
|
||||
hash = _rotl(hash, 13);
|
||||
hash ^= (u32)drawCalls[i].vertexCount;
|
||||
hash = _rotl(hash, 13);
|
||||
hash ^= (u32)drawCalls[i].prim;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
enum { VAI_KILL_AGE = 50 };
|
||||
|
||||
void TransformDrawEngine::ClearTrackedVertexArrays() {
|
||||
for (auto vai = vai_.begin(); vai != vai_.end(); vai++) {
|
||||
delete vai->second;
|
||||
}
|
||||
vai_.clear();
|
||||
}
|
||||
|
||||
void TransformDrawEngine::DecimateTrackedVertexArrays() {
|
||||
for (auto iter = vai_.begin(); iter != vai_.end(); ) {
|
||||
if (iter->second->lastFrame + VAI_KILL_AGE < gpuStats.numFrames) {
|
||||
delete iter->second;
|
||||
vai_.erase(iter++);
|
||||
}
|
||||
else
|
||||
++iter;
|
||||
}
|
||||
}
|
||||
|
||||
void TransformDrawEngine::Flush() {
|
||||
if (indexGen.Empty())
|
||||
if (!numDrawCalls)
|
||||
return;
|
||||
|
||||
#if 0
|
||||
for (int i = indexLowerBound; i <= indexUpperBound; i++) {
|
||||
PrintDecodedVertex(decoded[i], vertType);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Check if anything needs updating
|
||||
if (gstate_c.textureChanged) {
|
||||
if ((gstate.textureMapEnable & 1) && !gstate.isModeClear()) {
|
||||
PSPSetTexture();
|
||||
}
|
||||
gstate_c.textureChanged = false;
|
||||
}
|
||||
gpuStats.numFlushes++;
|
||||
|
||||
// TODO: Try to recognize the currently collected sequence of drawcalls.
|
||||
// Collect stats, hash, and buffer them.
|
||||
|
||||
bool useVBO = g_Config.bUseVBO;
|
||||
GLuint vbo, ebo;
|
||||
if (useVBO) {
|
||||
if (g_Config.bVertexCache) {
|
||||
u32 id = ComputeFastDCID();
|
||||
auto iter = vai_.find(id);
|
||||
VertexArrayInfo *vai;
|
||||
if (vai_.find(id) != vai_.end()) {
|
||||
// We've seen this before. Could have been a cached draw.
|
||||
vai = iter->second;
|
||||
} else {
|
||||
vai = new VertexArrayInfo();
|
||||
vai->decFmt = dec.GetDecVtxFmt();
|
||||
vai_[id] = vai;
|
||||
}
|
||||
vai->lastFrame = gpuStats.numFrames;
|
||||
// A pretty little state machine.
|
||||
switch (vai->status) {
|
||||
case VertexArrayInfo::VAI_NEW:
|
||||
{
|
||||
// Haven't seen this one before.
|
||||
u32 dataHash = ComputeHash();
|
||||
vai->hash = dataHash;
|
||||
vai->status = VertexArrayInfo::VAI_HASHING;
|
||||
DecodeVerts();
|
||||
goto rotateVBO;
|
||||
}
|
||||
|
||||
// Hashing - still gaining confidence about the buffer.
|
||||
case VertexArrayInfo::VAI_HASHING:
|
||||
{
|
||||
u32 newHash = ComputeHash();
|
||||
vai->numDraws++;
|
||||
if (vai->numDraws > 100000) {
|
||||
vai->status = VertexArrayInfo::VAI_RELIABLE;
|
||||
}
|
||||
if (newHash == vai->hash) {
|
||||
gpuStats.numCachedDrawCalls++;
|
||||
} else {
|
||||
vai->status = VertexArrayInfo::VAI_UNRELIABLE;
|
||||
}
|
||||
DecodeVerts(); // TODO : Remove
|
||||
goto rotateVBO;
|
||||
}
|
||||
|
||||
// Reliable - we don't even bother hashing anymore. Right now we don't go here until after a very long time.
|
||||
case VertexArrayInfo::VAI_RELIABLE:
|
||||
{
|
||||
vai->numDraws++;
|
||||
gpuStats.numCachedDrawCalls++;
|
||||
DecodeVerts(); // TODO : Remove
|
||||
break;
|
||||
}
|
||||
|
||||
case VertexArrayInfo::VAI_UNRELIABLE:
|
||||
{
|
||||
vai->numDraws++;
|
||||
DecodeVerts();
|
||||
goto rotateVBO;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
DecodeVerts();
|
||||
rotateVBO:
|
||||
// Just rotate VBO.
|
||||
vbo = vbo_[curVbo_];
|
||||
ebo = ebo_[curVbo_];
|
||||
curVbo_++;
|
||||
if (curVbo_ == NUM_VBOS)
|
||||
curVbo_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
gpuStats.numTrackedVertexArrays = vai_.size();
|
||||
|
||||
// TODO: This should not be done on every drawcall, we should collect vertex data
|
||||
// until critical state changes. That's when we draw (flush).
|
||||
|
||||
@ -756,15 +906,13 @@ void TransformDrawEngine::Flush() {
|
||||
|
||||
DEBUG_LOG(G3D, "Flush prim %i! %i verts in one go", prim, collectedVerts);
|
||||
|
||||
bool useVBO = g_Config.bUseVBO;
|
||||
|
||||
if (CanUseHardwareTransform(prim)) {
|
||||
if (useVBO) {
|
||||
//char title[64];
|
||||
//sprintf(title, "upload %i verts for hw", indexGen.VertexCount());
|
||||
//LoggingDeadline deadline(title, 5);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vbo_[curVbo_]);
|
||||
glBufferData(GL_ARRAY_BUFFER, dec.GetDecVtxFmt().stride * indexGen.MaxIndex(), decoded, GL_DYNAMIC_DRAW);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vbo);
|
||||
glBufferData(GL_ARRAY_BUFFER, dec.GetDecVtxFmt().stride * indexGen.MaxIndex(), decoded, GL_STREAM_DRAW);
|
||||
}
|
||||
SetupDecFmtForDraw(program, dec.GetDecVtxFmt(), useVBO ? 0 : decoded);
|
||||
// If there's only been one primitive type, and it's either TRIANGLES, LINES or POINTS,
|
||||
@ -775,8 +923,8 @@ void TransformDrawEngine::Flush() {
|
||||
glDrawArrays(glprim[prim], 0, indexGen.VertexCount());
|
||||
} else {
|
||||
if (useVBO) {
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo_[curVbo_]);
|
||||
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(short) * indexGen.VertexCount(), (GLvoid *)decIndex, GL_DYNAMIC_DRAW);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
|
||||
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(short) * indexGen.VertexCount(), (GLvoid *)decIndex, GL_STREAM_DRAW);
|
||||
}
|
||||
glDrawElements(glprim[prim], indexGen.VertexCount(), GL_UNSIGNED_SHORT, useVBO ? 0 : (GLvoid*)decIndex);
|
||||
if (useVBO) {
|
||||
@ -787,9 +935,6 @@ void TransformDrawEngine::Flush() {
|
||||
if (useVBO) {
|
||||
// glBufferData(GL_ARRAY_BUFFER, dec.GetDecVtxFmt().stride * indexGen.MaxIndex(), 0, GL_DYNAMIC_DRAW);
|
||||
// glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
curVbo_++;
|
||||
if (curVbo_ == NUM_VBOS)
|
||||
curVbo_ = 0;
|
||||
}
|
||||
} else {
|
||||
SoftwareTransformAndDraw(prim, decoded, program, indexGen.VertexCount(), dec.VertexType(), (void *)decIndex, GE_VTYPE_IDX_16BIT, dec.GetDecVtxFmt(),
|
||||
@ -798,4 +943,5 @@ void TransformDrawEngine::Flush() {
|
||||
|
||||
indexGen.Reset();
|
||||
collectedVerts = 0;
|
||||
numDrawCalls = 0;
|
||||
}
|
||||
|
@ -25,14 +25,66 @@ class LinkedShader;
|
||||
class ShaderManager;
|
||||
struct DecVtxFormat;
|
||||
|
||||
// States transitions:
|
||||
// On creation: DRAWN_NEW
|
||||
// DRAWN_NEW -> DRAWN_HASHING
|
||||
// DRAWN_HASHING -> DRAWN_RELIABLE
|
||||
// DRAWN_HASHING -> DRAWN_UNRELIABLE
|
||||
// DRAWN_ONCE -> UNRELIABLE
|
||||
// DRAWN_RELIABLE -> DRAWN_SAFE
|
||||
// UNRELIABLE -> death
|
||||
// DRAWN_ONCE -> death
|
||||
// DRAWN_RELIABLE -> death
|
||||
|
||||
|
||||
// Don't bother storing information about draws smaller than this.
|
||||
enum {
|
||||
VERTEX_CACHE_THRESHOLD = 20,
|
||||
};
|
||||
|
||||
// Try to keep this POD.
|
||||
class VertexArrayInfo {
|
||||
public:
|
||||
VertexArrayInfo() {
|
||||
status = VAI_NEW;
|
||||
vbo = 0;
|
||||
ebo = 0;
|
||||
numDCs = 0;
|
||||
numDraws = 0;
|
||||
lastFrame = 0; // TODO
|
||||
}
|
||||
enum Status {
|
||||
VAI_NEW,
|
||||
VAI_HASHING,
|
||||
VAI_RELIABLE, // cache, don't hash
|
||||
VAI_UNRELIABLE, // never cache
|
||||
};
|
||||
|
||||
u64 hash;
|
||||
|
||||
Status status;
|
||||
|
||||
DecVtxFormat decFmt;
|
||||
|
||||
u32 vbo;
|
||||
u32 ebo;
|
||||
|
||||
// ID information
|
||||
u8 numDCs;
|
||||
int numDraws;
|
||||
int lastFrame; // So that we can forget.
|
||||
};
|
||||
|
||||
|
||||
// Handles transform, lighting and drawing.
|
||||
class TransformDrawEngine : public GfxResourceHolder {
|
||||
public:
|
||||
TransformDrawEngine();
|
||||
~TransformDrawEngine();
|
||||
virtual ~TransformDrawEngine();
|
||||
void SubmitPrim(void *verts, void *inds, int prim, int vertexCount, u32 vertexType, int forceIndexType, int *bytesRead);
|
||||
void DrawBezier(int ucount, int vcount);
|
||||
void DrawSpline(int ucount, int vcount, int utype, int vtype);
|
||||
void DecodeVerts();
|
||||
void Flush();
|
||||
void SetShaderManager(ShaderManager *shaderManager) {
|
||||
shaderManager_ = shaderManager;
|
||||
@ -42,9 +94,33 @@ public:
|
||||
void DestroyDeviceObjects();
|
||||
void GLLost();
|
||||
|
||||
void DecimateTrackedVertexArrays();
|
||||
void ClearTrackedVertexArrays();
|
||||
|
||||
private:
|
||||
void SoftwareTransformAndDraw(int prim, u8 *decoded, LinkedShader *program, int vertexCount, u32 vertexType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex);
|
||||
|
||||
// drawcall ID
|
||||
u32 ComputeFastDCID();
|
||||
u32 ComputeHash(); // Reads deferred vertex data.
|
||||
|
||||
// Defer all vertex decoding to a Flush, so that we can hash and cache the
|
||||
// generated buffers without having to redecode them every time.
|
||||
struct DeferredDrawCall {
|
||||
void *verts;
|
||||
void *inds;
|
||||
u32 vertType;
|
||||
u8 indexType;
|
||||
u8 prim;
|
||||
u16 vertexCount;
|
||||
u16 indexLowerBound;
|
||||
u16 indexUpperBound;
|
||||
};
|
||||
|
||||
enum { MAX_DEFERRED_DRAW_CALLS = 128 };
|
||||
DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS];
|
||||
int numDrawCalls;
|
||||
|
||||
// Vertex collector state
|
||||
IndexGenerator indexGen;
|
||||
int collectedVerts;
|
||||
@ -65,6 +141,8 @@ private:
|
||||
GLuint ebo_[NUM_VBOS];
|
||||
int curVbo_;
|
||||
|
||||
std::map<u32, VertexArrayInfo *> vai_;
|
||||
|
||||
// Other
|
||||
ShaderManager *shaderManager_;
|
||||
};
|
||||
|
@ -645,12 +645,13 @@ void VertexDecoder::SetVertexType(u32 fmt) {
|
||||
DEBUG_LOG(G3D,"SVT : size = %i, aligned to biggest %i", size, biggest);
|
||||
}
|
||||
|
||||
void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const {
|
||||
void GetIndexBounds(void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound) {
|
||||
// Find index bounds. Could cache this in display lists.
|
||||
// Also, this could be greatly sped up with SSE2, although rarely a bottleneck.
|
||||
// Also, this could be greatly sped up with SSE2/NEON, although rarely a bottleneck.
|
||||
int lowerBound = 0x7FFFFFFF;
|
||||
int upperBound = 0;
|
||||
if (idx == (GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT)) {
|
||||
u32 idx = vertType & GE_VTYPE_IDX_MASK;
|
||||
if (idx == GE_VTYPE_IDX_8BIT) {
|
||||
const u8 *ind8 = (const u8 *)inds;
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (ind8[i] < lowerBound)
|
||||
@ -658,7 +659,7 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const void *i
|
||||
if (ind8[i] > upperBound)
|
||||
upperBound = ind8[i];
|
||||
}
|
||||
} else if (idx == (GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT)) {
|
||||
} else if (idx == GE_VTYPE_IDX_16BIT) {
|
||||
const u16 *ind16 = (const u16*)inds;
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (ind16[i] < lowerBound)
|
||||
@ -670,13 +671,15 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const void *i
|
||||
lowerBound = 0;
|
||||
upperBound = count - 1;
|
||||
}
|
||||
*indexLowerBound = lowerBound;
|
||||
*indexUpperBound = upperBound;
|
||||
*indexLowerBound = (u16)lowerBound;
|
||||
*indexUpperBound = (u16)upperBound;
|
||||
}
|
||||
|
||||
void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const void *inds, int prim, int count, int indexLowerBound, int indexUpperBound) const {
|
||||
// Decode the vertices within the found bounds, once each
|
||||
decoded_ = decodedptr; // + lowerBound * decFmt.stride;
|
||||
ptr_ = (const u8*)verts + lowerBound * size;
|
||||
for (int index = lowerBound; index <= upperBound; index++) {
|
||||
ptr_ = (const u8*)verts + indexLowerBound * size;
|
||||
for (int index = indexLowerBound; index <= indexUpperBound; index++) {
|
||||
for (int i = 0; i < numSteps_; i++) {
|
||||
((*this).*steps_[i])();
|
||||
}
|
||||
|
@ -65,6 +65,7 @@ class VertexDecoder;
|
||||
|
||||
typedef void (VertexDecoder::*StepFunction)() const;
|
||||
|
||||
void GetIndexBounds(void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound);
|
||||
|
||||
// Right now
|
||||
// - only contains computed information
|
||||
@ -87,7 +88,7 @@ public:
|
||||
u32 VertexType() const { return fmt_; }
|
||||
const DecVtxFormat &GetDecVtxFmt() { return decFmt; }
|
||||
|
||||
void DecodeVerts(u8 *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const;
|
||||
void DecodeVerts(u8 *decoded, const void *verts, const void *inds, int prim, int count, int indexLowerBound, int indexUpperBound) const;
|
||||
|
||||
// This could be easily generalized to inject any one component. Don't know another use for it though.
|
||||
u32 InjectUVs(u8 *decoded, const void *verts, float *customuv, int count) const;
|
||||
|
@ -258,7 +258,10 @@ struct GPUStatistics
|
||||
void resetFrame() {
|
||||
numJoins = 0;
|
||||
numDrawCalls = 0;
|
||||
numCachedDrawCalls = 0;
|
||||
numVertsTransformed = 0;
|
||||
numCachedVertsDrawn = 0;
|
||||
numTrackedVertexArrays = 0;
|
||||
numTextureInvalidations = 0;
|
||||
numTextureSwitches = 0;
|
||||
numShaderSwitches = 0;
|
||||
@ -270,8 +273,11 @@ struct GPUStatistics
|
||||
// Per frame statistics
|
||||
int numJoins;
|
||||
int numDrawCalls;
|
||||
int numCachedDrawCalls;
|
||||
int numFlushes;
|
||||
int numVertsTransformed;
|
||||
int numCachedVertsDrawn;
|
||||
int numTrackedVertexArrays;
|
||||
int numTextureInvalidations;
|
||||
int numTextureSwitches;
|
||||
int numShaderSwitches;
|
||||
|
@ -1,4 +1,4 @@
|
||||
Microsoft Visual Studio Solution File, Format Version 11.00
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2012
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PPSSPPWindows", "PPSSPP.vcxproj", "{567AF8DB-42C1-4D08-96CD-D70A2DFEFC6B}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
|
@ -487,7 +487,10 @@ namespace MainWindow
|
||||
g_Config.bDrawWireframe = !g_Config.bDrawWireframe;
|
||||
UpdateMenus();
|
||||
break;
|
||||
|
||||
case ID_OPTIONS_VERTEXCACHE:
|
||||
g_Config.bVertexCache = !g_Config.bVertexCache;
|
||||
UpdateMenus();
|
||||
break;
|
||||
case ID_OPTIONS_DISPLAYRAWFRAMEBUFFER:
|
||||
g_Config.bDisplayFramebuffer = !g_Config.bDisplayFramebuffer;
|
||||
UpdateMenus();
|
||||
@ -663,6 +666,7 @@ namespace MainWindow
|
||||
CHECKITEM(ID_EMULATION_RUNONLOAD, g_Config.bAutoRun);
|
||||
CHECKITEM(ID_OPTIONS_USEVBO, g_Config.bUseVBO);
|
||||
CHECKITEM(ID_OPTIONS_DISABLEG3DLOG, g_Config.bDisableG3DLog);
|
||||
CHECKITEM(ID_OPTIONS_VERTEXCACHE, g_Config.bVertexCache);
|
||||
|
||||
UINT enable = !Core_IsStepping() ? MF_GRAYED : MF_ENABLED;
|
||||
EnableMenuItem(menu,ID_EMULATION_RUN, g_State.bEmuThreadStarted ? enable : MF_GRAYED);
|
||||
|
@ -257,6 +257,7 @@ BEGIN
|
||||
MENUITEM "&Linear Filtering", ID_OPTIONS_LINEARFILTERING
|
||||
MENUITEM "Si&mple 2x SSAA", ID_OPTIONS_SIMPLE2XSSAA
|
||||
MENUITEM "&Use VBO", ID_OPTIONS_USEVBO
|
||||
MENUITEM "&Vertex Cache", ID_OPTIONS_VERTEXCACHE
|
||||
MENUITEM SEPARATOR
|
||||
MENUITEM "&Wireframe (experimental)", ID_OPTIONS_WIREFRAME
|
||||
MENUITEM "&Display Raw Framebuffer", ID_OPTIONS_DISPLAYRAWFRAMEBUFFER
|
||||
|
@ -257,6 +257,7 @@
|
||||
#define ID_OPTIONS_SIMPLE2XSSAA 40133
|
||||
#define ID_OPTIONS_USEVBO 40134
|
||||
#define ID_OPTIONS_DISABLEG3DLOG 40135
|
||||
#define ID_OPTIONS_VERTEXCACHE 40136
|
||||
#define IDC_STATIC -1
|
||||
|
||||
// Next default values for new objects
|
||||
@ -264,7 +265,7 @@
|
||||
#ifdef APSTUDIO_INVOKED
|
||||
#ifndef APSTUDIO_READONLY_SYMBOLS
|
||||
#define _APS_NEXT_RESOURCE_VALUE 233
|
||||
#define _APS_NEXT_COMMAND_VALUE 40135
|
||||
#define _APS_NEXT_COMMAND_VALUE 40137
|
||||
#define _APS_NEXT_CONTROL_VALUE 1163
|
||||
#define _APS_NEXT_SYMED_VALUE 101
|
||||
#endif
|
||||
|
2
native
2
native
@ -1 +1 @@
|
||||
Subproject commit f5e775a9a10e38d5ac98261c132875492dfd91c3
|
||||
Subproject commit b5037341aaca775e806be4b6a7bf109a47e0c655
|
Loading…
Reference in New Issue
Block a user