Defer vertex decoding until flush. Track draw calls so that they can be cached later.

This commit is contained in:
Henrik Rydgard 2013-01-19 17:05:08 +01:00
parent 229d4e9f32
commit 8ff96bbcd8
16 changed files with 351 additions and 86 deletions

View File

@ -294,6 +294,13 @@ add_library(etcpack STATIC
native/ext/etcpack/image.h)
include_directories(native/ext/etcpack)
add_library(cityhash STATIC
native/ext/cityhash/city.cpp
native/ext/cityhash/city.h
native/ext/cityhash/citycrc.h
)
include_directories(ext/cityhash)
if(NOT USING_GLES2)
include_directories(${OPENGL_INCLUDE_DIR})
@ -795,7 +802,7 @@ add_library(${CoreLibName} ${CoreLinkType}
Core/Util/ppge_atlas.h
$<TARGET_OBJECTS:GPU>
Globals.h)
target_link_libraries(${CoreLibName} Common native kirk
target_link_libraries(${CoreLibName} Common native kirk cityhash
${GLEW_LIBRARIES} ${OPENGL_LIBRARIES})
setup_target_project(${CoreLibName} Core)

View File

@ -67,6 +67,7 @@ void CConfig::Load(const char *iniFileName)
graphics->Get("SSAA", &SSAntiAlaising, 0);
graphics->Get("VBO", &bUseVBO, true);
graphics->Get("DisableG3DLog", &bDisableG3DLog, false);
graphics->Get("VertexCache", &bVertexCache, false);
IniFile::Section *sound = iniFile.GetOrCreateSection("Sound");
sound->Get("Enable", &bEnableSound, true);
@ -114,6 +115,7 @@ void CConfig::Save()
graphics->Set("SSAA", SSAntiAlaising);
graphics->Set("VBO", bUseVBO);
graphics->Set("DisableG3DLog", bDisableG3DLog);
graphics->Set("VertexCache", bVertexCache);
IniFile::Section *sound = iniFile.GetOrCreateSection("Sound");
sound->Set("Enable", bEnableSound);

View File

@ -57,6 +57,7 @@ public:
int iWindowZoom; // for Windows
bool SSAntiAlaising; //for Windows, too
bool bDisableG3DLog;
bool bVertexCache;
// Sound
bool bEnableSound;

View File

@ -216,7 +216,10 @@ void hleEnterVblank(u64 userdata, int cyclesLate) {
"Slowest syscall: %s : %0.2f ms\n"
"Most active syscall: %s : %0.2f ms\n"
"Draw calls: %i, flushes %i\n"
"Cached Draw calls: %i\n"
"Num Tracked Vertex Arrays: %i\n"
"Vertices Transformed: %i\n"
"Cached Vertices Drawn: %i\n"
"FBOs active: %i\n"
"Textures active: %i, decoded: %i\n"
"Texture invalidations: %i\n"
@ -232,7 +235,10 @@ void hleEnterVblank(u64 userdata, int cyclesLate) {
kernelStats.summedSlowestSyscallTime * 1000.0f,
gpuStats.numDrawCalls,
gpuStats.numFlushes,
gpuStats.numCachedDrawCalls,
gpuStats.numTrackedVertexArrays,
gpuStats.numVertsTransformed,
gpuStats.numCachedVertsDrawn,
gpuStats.numFBOs,
gpuStats.numTextures,
gpuStats.numTexturesDecoded,

View File

@ -223,6 +223,7 @@ void GLES_GPU::DumpNextFrame() {
void GLES_GPU::BeginFrame() {
TextureCache_StartFrame();
DecimateFBOs();
transformDraw_.DecimateTrackedVertexArrays();
if (dumpNextFrame_) {
NOTICE_LOG(G3D, "DUMPING THIS FRAME");

View File

@ -7,6 +7,7 @@
#include "../ge_constants.h"
#include "DisplayListInterpreter.h"
#include "ShaderManager.h"
#include "TextureCache.h"
const GLint aLookup[] = {
GL_DST_COLOR,
@ -74,6 +75,13 @@ const GLuint stencilOps[] = {
void ApplyDrawState(int prim) {
// TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall.
if (gstate_c.textureChanged) {
if ((gstate.textureMapEnable & 1) && !gstate.isModeClear()) {
PSPSetTexture();
}
gstate_c.textureChanged = false;
}
// TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a
// single fullscreen pass that converts alpha to stencil (or 2 passes, to set both the 0 and 1 values) very easily.

View File

@ -21,6 +21,7 @@
#include "../../Core/Host.h"
#include "../../Core/System.h"
#include "../../native/gfx_es2/gl_state.h"
#include "../../native/ext/cityhash/city.h"
#include "../Math3D.h"
#include "../GPUState.h"
@ -45,7 +46,8 @@ const GLuint glprim[8] = {
};
TransformDrawEngine::TransformDrawEngine()
: collectedVerts(0),
: numDrawCalls(0),
collectedVerts(0),
lastVType(-1),
curVbo_(0),
shaderManager_(0) {
@ -131,6 +133,7 @@ void TransformDrawEngine::DrawBezier(int ucount, int vcount) {
} else {
SubmitPrim(Memory::GetPointer(gstate_c.vertexAddr), &indices[0], GE_PRIM_TRIANGLES, c, gstate.vertType, GE_VTYPE_IDX_16BIT, 0);
}
Flush(); // as our vertex storage here is temporary, it will only survive one draw.
}
void TransformDrawEngine::DrawSpline(int ucount, int vcount, int utype, int vtype) {
@ -659,18 +662,14 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
}
void TransformDrawEngine::SubmitPrim(void *verts, void *inds, int prim, int vertexCount, u32 vertType, int forceIndexType, int *bytesRead) {
if (vertexCount == 0)
{
return; // we ignore zero-sized draw calls.
}
// For the future
if (!indexGen.PrimCompatible(prim))
if (!indexGen.PrimCompatible(prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS)
Flush();
if (!indexGen.Empty()) {
gpuStats.numJoins++;
}
gpuStats.numDrawCalls++;
gpuStats.numVertsTransformed += vertexCount;
indexGen.SetIndex(collectedVerts);
int indexLowerBound, indexUpperBound;
// If vtype has changed, setup the vertex decoder.
// TODO: Simply cache the setup decoders instead.
if (vertType != lastVType) {
@ -678,72 +677,223 @@ void TransformDrawEngine::SubmitPrim(void *verts, void *inds, int prim, int vert
lastVType = vertType;
}
// Decode the verts and apply morphing
dec.DecodeVerts(decoded + collectedVerts * (int)dec.GetDecVtxFmt().stride, verts, inds, prim, vertexCount, &indexLowerBound, &indexUpperBound);
collectedVerts += indexUpperBound - indexLowerBound + 1;
if (bytesRead)
*bytesRead = vertexCount * dec.VertexSize();
int indexType = vertType & GE_VTYPE_IDX_MASK;
if (forceIndexType != -1) indexType = forceIndexType;
switch (indexType) {
case GE_VTYPE_IDX_NONE:
switch (prim) {
case GE_PRIM_POINTS: indexGen.AddPoints(vertexCount); break;
case GE_PRIM_LINES: indexGen.AddLineList(vertexCount); break;
case GE_PRIM_LINE_STRIP: indexGen.AddLineStrip(vertexCount); break;
case GE_PRIM_TRIANGLES: indexGen.AddList(vertexCount); break;
case GE_PRIM_TRIANGLE_STRIP: indexGen.AddStrip(vertexCount); break;
case GE_PRIM_TRIANGLE_FAN: indexGen.AddFan(vertexCount); break;
case GE_PRIM_RECTANGLES: indexGen.AddRectangles(vertexCount); break; // Same
}
break;
if (!indexGen.Empty()) {
gpuStats.numJoins++;
}
gpuStats.numDrawCalls++;
gpuStats.numVertsTransformed += vertexCount;
case GE_VTYPE_IDX_8BIT:
switch (prim) {
case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u8 *)inds, -indexLowerBound); break;
case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u8 *)inds, -indexLowerBound); break;
case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break;
case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u8 *)inds, -indexLowerBound); break;
case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break;
case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u8 *)inds, -indexLowerBound); break;
case GE_PRIM_RECTANGLES: indexGen.TranslateRectangles(vertexCount, (const u8 *)inds, -indexLowerBound); break; // Same
}
break;
DeferredDrawCall &dc = drawCalls[numDrawCalls++];
dc.verts = verts;
dc.inds = inds;
dc.vertType = vertType;
dc.indexType = forceIndexType == (-1 ? (vertType & GE_VTYPE_IDX_MASK) : forceIndexType) >> GE_VTYPE_IDX_SHIFT;
dc.prim = prim;
dc.vertexCount = vertexCount;
if (inds) {
GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
} else {
dc.indexLowerBound = 0;
dc.indexUpperBound = vertexCount - 1;
}
}
case GE_VTYPE_IDX_16BIT:
switch (prim) {
case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u16 *)inds, -indexLowerBound); break;
case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u16 *)inds, -indexLowerBound); break;
case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u16 *)inds, -indexLowerBound); break;
case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u16 *)inds, -indexLowerBound); break;
case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u16 *)inds, -indexLowerBound); break;
case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u16 *)inds, -indexLowerBound); break;
case GE_PRIM_RECTANGLES: indexGen.TranslateRectangles(vertexCount, (const u16 *)inds, -indexLowerBound); break; // Same
void TransformDrawEngine::DecodeVerts() {
for (int i = 0; i < numDrawCalls; i++) {
const DeferredDrawCall &dc = drawCalls[i];
indexGen.SetIndex(collectedVerts);
int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound;
// Decode the verts and apply morphing
dec.DecodeVerts(decoded + collectedVerts * (int)dec.GetDecVtxFmt().stride,
dc.verts, dc.inds, dc.prim, dc.vertexCount, indexLowerBound, indexUpperBound);
collectedVerts += indexUpperBound - indexLowerBound + 1;
u32 indexType = dc.indexType;
int vertexCount = dc.vertexCount;
void *inds = dc.inds;
switch (indexType) {
case GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT:
switch (dc.prim) {
case GE_PRIM_POINTS: indexGen.AddPoints(vertexCount); break;
case GE_PRIM_LINES: indexGen.AddLineList(vertexCount); break;
case GE_PRIM_LINE_STRIP: indexGen.AddLineStrip(vertexCount); break;
case GE_PRIM_TRIANGLES: indexGen.AddList(vertexCount); break;
case GE_PRIM_TRIANGLE_STRIP: indexGen.AddStrip(vertexCount); break;
case GE_PRIM_TRIANGLE_FAN: indexGen.AddFan(vertexCount); break;
case GE_PRIM_RECTANGLES: indexGen.AddRectangles(vertexCount); break; // Same
}
break;
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
switch (dc.prim) {
case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u8 *)inds, -indexLowerBound); break;
case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u8 *)inds, -indexLowerBound); break;
case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break;
case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u8 *)inds, -indexLowerBound); break;
case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break;
case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u8 *)inds, -indexLowerBound); break;
case GE_PRIM_RECTANGLES: indexGen.TranslateRectangles(vertexCount, (const u8 *)inds, -indexLowerBound); break; // Same
}
break;
case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT:
switch (dc.prim) {
case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u16 *)inds, -indexLowerBound); break;
case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u16 *)inds, -indexLowerBound); break;
case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u16 *)inds, -indexLowerBound); break;
case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u16 *)inds, -indexLowerBound); break;
case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u16 *)inds, -indexLowerBound); break;
case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u16 *)inds, -indexLowerBound); break;
case GE_PRIM_RECTANGLES: indexGen.TranslateRectangles(vertexCount, (const u16 *)inds, -indexLowerBound); break; // Same
}
break;
}
break;
}
}
u32 TransformDrawEngine::ComputeHash() {
u32 fullhash = 0;
int vertexSize = dec.GetDecVtxFmt().stride;
for (int i = 0; i < numDrawCalls; i++) {
if (!drawCalls[i].inds) {
fullhash += CityHash32((const char *)drawCalls[i].verts, vertexSize * drawCalls[i].vertexCount);
} else {
fullhash += CityHash32((const char *)drawCalls[i].verts + vertexSize * drawCalls[i].indexLowerBound,
vertexSize * (drawCalls[i].indexUpperBound - drawCalls[i].indexLowerBound));
int indexSize = (dec.VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
fullhash += CityHash32((const char *)drawCalls[i].inds, indexSize * drawCalls[i].vertexCount);
}
}
return fullhash;
}
u32 TransformDrawEngine::ComputeFastDCID() {
u32 hash = 0;
for (int i = 0; i < numDrawCalls; i++) {
hash ^= (u32)drawCalls[i].verts;
hash = _rotl(hash, 13);
hash ^= (u32)drawCalls[i].inds;
hash = _rotl(hash, 13);
hash ^= (u32)drawCalls[i].vertType;
hash = _rotl(hash, 13);
hash ^= (u32)drawCalls[i].vertexCount;
hash = _rotl(hash, 13);
hash ^= (u32)drawCalls[i].prim;
}
return hash;
}
enum { VAI_KILL_AGE = 50 };
void TransformDrawEngine::ClearTrackedVertexArrays() {
for (auto vai = vai_.begin(); vai != vai_.end(); vai++) {
delete vai->second;
}
vai_.clear();
}
void TransformDrawEngine::DecimateTrackedVertexArrays() {
for (auto iter = vai_.begin(); iter != vai_.end(); ) {
if (iter->second->lastFrame + VAI_KILL_AGE < gpuStats.numFrames) {
delete iter->second;
vai_.erase(iter++);
}
else
++iter;
}
}
void TransformDrawEngine::Flush() {
if (indexGen.Empty())
if (!numDrawCalls)
return;
#if 0
for (int i = indexLowerBound; i <= indexUpperBound; i++) {
PrintDecodedVertex(decoded[i], vertType);
}
#endif
// Check if anything needs updating
if (gstate_c.textureChanged) {
if ((gstate.textureMapEnable & 1) && !gstate.isModeClear()) {
PSPSetTexture();
}
gstate_c.textureChanged = false;
}
gpuStats.numFlushes++;
// TODO: Try to recognize the currently collected sequence of drawcalls.
// Collect stats, hash, and buffer them.
bool useVBO = g_Config.bUseVBO;
GLuint vbo, ebo;
if (useVBO) {
if (g_Config.bVertexCache) {
u32 id = ComputeFastDCID();
auto iter = vai_.find(id);
VertexArrayInfo *vai;
if (vai_.find(id) != vai_.end()) {
// We've seen this before. Could have been a cached draw.
vai = iter->second;
} else {
vai = new VertexArrayInfo();
vai->decFmt = dec.GetDecVtxFmt();
vai_[id] = vai;
}
vai->lastFrame = gpuStats.numFrames;
// A pretty little state machine.
switch (vai->status) {
case VertexArrayInfo::VAI_NEW:
{
// Haven't seen this one before.
u32 dataHash = ComputeHash();
vai->hash = dataHash;
vai->status = VertexArrayInfo::VAI_HASHING;
DecodeVerts();
goto rotateVBO;
}
// Hashing - still gaining confidence about the buffer.
case VertexArrayInfo::VAI_HASHING:
{
u32 newHash = ComputeHash();
vai->numDraws++;
if (vai->numDraws > 100000) {
vai->status = VertexArrayInfo::VAI_RELIABLE;
}
if (newHash == vai->hash) {
gpuStats.numCachedDrawCalls++;
} else {
vai->status = VertexArrayInfo::VAI_UNRELIABLE;
}
DecodeVerts(); // TODO : Remove
goto rotateVBO;
}
// Reliable - we don't even bother hashing anymore. Right now we don't go here until after a very long time.
case VertexArrayInfo::VAI_RELIABLE:
{
vai->numDraws++;
gpuStats.numCachedDrawCalls++;
DecodeVerts(); // TODO : Remove
break;
}
case VertexArrayInfo::VAI_UNRELIABLE:
{
vai->numDraws++;
DecodeVerts();
goto rotateVBO;
}
}
} else {
DecodeVerts();
rotateVBO:
// Just rotate VBO.
vbo = vbo_[curVbo_];
ebo = ebo_[curVbo_];
curVbo_++;
if (curVbo_ == NUM_VBOS)
curVbo_ = 0;
}
}
gpuStats.numTrackedVertexArrays = vai_.size();
// TODO: This should not be done on every drawcall, we should collect vertex data
// until critical state changes. That's when we draw (flush).
@ -756,15 +906,13 @@ void TransformDrawEngine::Flush() {
DEBUG_LOG(G3D, "Flush prim %i! %i verts in one go", prim, collectedVerts);
bool useVBO = g_Config.bUseVBO;
if (CanUseHardwareTransform(prim)) {
if (useVBO) {
//char title[64];
//sprintf(title, "upload %i verts for hw", indexGen.VertexCount());
//LoggingDeadline deadline(title, 5);
glBindBuffer(GL_ARRAY_BUFFER, vbo_[curVbo_]);
glBufferData(GL_ARRAY_BUFFER, dec.GetDecVtxFmt().stride * indexGen.MaxIndex(), decoded, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, dec.GetDecVtxFmt().stride * indexGen.MaxIndex(), decoded, GL_STREAM_DRAW);
}
SetupDecFmtForDraw(program, dec.GetDecVtxFmt(), useVBO ? 0 : decoded);
// If there's only been one primitive type, and it's either TRIANGLES, LINES or POINTS,
@ -775,8 +923,8 @@ void TransformDrawEngine::Flush() {
glDrawArrays(glprim[prim], 0, indexGen.VertexCount());
} else {
if (useVBO) {
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo_[curVbo_]);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(short) * indexGen.VertexCount(), (GLvoid *)decIndex, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(short) * indexGen.VertexCount(), (GLvoid *)decIndex, GL_STREAM_DRAW);
}
glDrawElements(glprim[prim], indexGen.VertexCount(), GL_UNSIGNED_SHORT, useVBO ? 0 : (GLvoid*)decIndex);
if (useVBO) {
@ -787,9 +935,6 @@ void TransformDrawEngine::Flush() {
if (useVBO) {
// glBufferData(GL_ARRAY_BUFFER, dec.GetDecVtxFmt().stride * indexGen.MaxIndex(), 0, GL_DYNAMIC_DRAW);
// glBindBuffer(GL_ARRAY_BUFFER, 0);
curVbo_++;
if (curVbo_ == NUM_VBOS)
curVbo_ = 0;
}
} else {
SoftwareTransformAndDraw(prim, decoded, program, indexGen.VertexCount(), dec.VertexType(), (void *)decIndex, GE_VTYPE_IDX_16BIT, dec.GetDecVtxFmt(),
@ -798,4 +943,5 @@ void TransformDrawEngine::Flush() {
indexGen.Reset();
collectedVerts = 0;
numDrawCalls = 0;
}

View File

@ -25,14 +25,66 @@ class LinkedShader;
class ShaderManager;
struct DecVtxFormat;
// States transitions:
// On creation: DRAWN_NEW
// DRAWN_NEW -> DRAWN_HASHING
// DRAWN_HASHING -> DRAWN_RELIABLE
// DRAWN_HASHING -> DRAWN_UNRELIABLE
// DRAWN_ONCE -> UNRELIABLE
// DRAWN_RELIABLE -> DRAWN_SAFE
// UNRELIABLE -> death
// DRAWN_ONCE -> death
// DRAWN_RELIABLE -> death
// Don't bother storing information about draws smaller than this.
enum {
VERTEX_CACHE_THRESHOLD = 20,
};
// Try to keep this POD.
class VertexArrayInfo {
public:
VertexArrayInfo() {
status = VAI_NEW;
vbo = 0;
ebo = 0;
numDCs = 0;
numDraws = 0;
lastFrame = 0; // TODO
}
enum Status {
VAI_NEW,
VAI_HASHING,
VAI_RELIABLE, // cache, don't hash
VAI_UNRELIABLE, // never cache
};
u64 hash;
Status status;
DecVtxFormat decFmt;
u32 vbo;
u32 ebo;
// ID information
u8 numDCs;
int numDraws;
int lastFrame; // So that we can forget.
};
// Handles transform, lighting and drawing.
class TransformDrawEngine : public GfxResourceHolder {
public:
TransformDrawEngine();
~TransformDrawEngine();
virtual ~TransformDrawEngine();
void SubmitPrim(void *verts, void *inds, int prim, int vertexCount, u32 vertexType, int forceIndexType, int *bytesRead);
void DrawBezier(int ucount, int vcount);
void DrawSpline(int ucount, int vcount, int utype, int vtype);
void DecodeVerts();
void Flush();
void SetShaderManager(ShaderManager *shaderManager) {
shaderManager_ = shaderManager;
@ -42,9 +94,33 @@ public:
void DestroyDeviceObjects();
void GLLost();
void DecimateTrackedVertexArrays();
void ClearTrackedVertexArrays();
private:
void SoftwareTransformAndDraw(int prim, u8 *decoded, LinkedShader *program, int vertexCount, u32 vertexType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex);
// drawcall ID
u32 ComputeFastDCID();
u32 ComputeHash(); // Reads deferred vertex data.
// Defer all vertex decoding to a Flush, so that we can hash and cache the
// generated buffers without having to redecode them every time.
struct DeferredDrawCall {
void *verts;
void *inds;
u32 vertType;
u8 indexType;
u8 prim;
u16 vertexCount;
u16 indexLowerBound;
u16 indexUpperBound;
};
enum { MAX_DEFERRED_DRAW_CALLS = 128 };
DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS];
int numDrawCalls;
// Vertex collector state
IndexGenerator indexGen;
int collectedVerts;
@ -65,6 +141,8 @@ private:
GLuint ebo_[NUM_VBOS];
int curVbo_;
std::map<u32, VertexArrayInfo *> vai_;
// Other
ShaderManager *shaderManager_;
};

View File

@ -645,12 +645,13 @@ void VertexDecoder::SetVertexType(u32 fmt) {
DEBUG_LOG(G3D,"SVT : size = %i, aligned to biggest %i", size, biggest);
}
void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const {
void GetIndexBounds(void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound) {
// Find index bounds. Could cache this in display lists.
// Also, this could be greatly sped up with SSE2, although rarely a bottleneck.
// Also, this could be greatly sped up with SSE2/NEON, although rarely a bottleneck.
int lowerBound = 0x7FFFFFFF;
int upperBound = 0;
if (idx == (GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT)) {
u32 idx = vertType & GE_VTYPE_IDX_MASK;
if (idx == GE_VTYPE_IDX_8BIT) {
const u8 *ind8 = (const u8 *)inds;
for (int i = 0; i < count; i++) {
if (ind8[i] < lowerBound)
@ -658,7 +659,7 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const void *i
if (ind8[i] > upperBound)
upperBound = ind8[i];
}
} else if (idx == (GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT)) {
} else if (idx == GE_VTYPE_IDX_16BIT) {
const u16 *ind16 = (const u16*)inds;
for (int i = 0; i < count; i++) {
if (ind16[i] < lowerBound)
@ -670,13 +671,15 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const void *i
lowerBound = 0;
upperBound = count - 1;
}
*indexLowerBound = lowerBound;
*indexUpperBound = upperBound;
*indexLowerBound = (u16)lowerBound;
*indexUpperBound = (u16)upperBound;
}
void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const void *inds, int prim, int count, int indexLowerBound, int indexUpperBound) const {
// Decode the vertices within the found bounds, once each
decoded_ = decodedptr; // + lowerBound * decFmt.stride;
ptr_ = (const u8*)verts + lowerBound * size;
for (int index = lowerBound; index <= upperBound; index++) {
ptr_ = (const u8*)verts + indexLowerBound * size;
for (int index = indexLowerBound; index <= indexUpperBound; index++) {
for (int i = 0; i < numSteps_; i++) {
((*this).*steps_[i])();
}

View File

@ -65,6 +65,7 @@ class VertexDecoder;
typedef void (VertexDecoder::*StepFunction)() const;
void GetIndexBounds(void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound);
// Right now
// - only contains computed information
@ -87,7 +88,7 @@ public:
u32 VertexType() const { return fmt_; }
const DecVtxFormat &GetDecVtxFmt() { return decFmt; }
void DecodeVerts(u8 *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const;
void DecodeVerts(u8 *decoded, const void *verts, const void *inds, int prim, int count, int indexLowerBound, int indexUpperBound) const;
// This could be easily generalized to inject any one component. Don't know another use for it though.
u32 InjectUVs(u8 *decoded, const void *verts, float *customuv, int count) const;

View File

@ -258,7 +258,10 @@ struct GPUStatistics
void resetFrame() {
numJoins = 0;
numDrawCalls = 0;
numCachedDrawCalls = 0;
numVertsTransformed = 0;
numCachedVertsDrawn = 0;
numTrackedVertexArrays = 0;
numTextureInvalidations = 0;
numTextureSwitches = 0;
numShaderSwitches = 0;
@ -270,8 +273,11 @@ struct GPUStatistics
// Per frame statistics
int numJoins;
int numDrawCalls;
int numCachedDrawCalls;
int numFlushes;
int numVertsTransformed;
int numCachedVertsDrawn;
int numTrackedVertexArrays;
int numTextureInvalidations;
int numTextureSwitches;
int numShaderSwitches;

View File

@ -1,4 +1,4 @@
Microsoft Visual Studio Solution File, Format Version 11.00
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2012
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PPSSPPWindows", "PPSSPP.vcxproj", "{567AF8DB-42C1-4D08-96CD-D70A2DFEFC6B}"
ProjectSection(ProjectDependencies) = postProject

View File

@ -487,7 +487,10 @@ namespace MainWindow
g_Config.bDrawWireframe = !g_Config.bDrawWireframe;
UpdateMenus();
break;
case ID_OPTIONS_VERTEXCACHE:
g_Config.bVertexCache = !g_Config.bVertexCache;
UpdateMenus();
break;
case ID_OPTIONS_DISPLAYRAWFRAMEBUFFER:
g_Config.bDisplayFramebuffer = !g_Config.bDisplayFramebuffer;
UpdateMenus();
@ -663,6 +666,7 @@ namespace MainWindow
CHECKITEM(ID_EMULATION_RUNONLOAD, g_Config.bAutoRun);
CHECKITEM(ID_OPTIONS_USEVBO, g_Config.bUseVBO);
CHECKITEM(ID_OPTIONS_DISABLEG3DLOG, g_Config.bDisableG3DLog);
CHECKITEM(ID_OPTIONS_VERTEXCACHE, g_Config.bVertexCache);
UINT enable = !Core_IsStepping() ? MF_GRAYED : MF_ENABLED;
EnableMenuItem(menu,ID_EMULATION_RUN, g_State.bEmuThreadStarted ? enable : MF_GRAYED);

View File

@ -257,6 +257,7 @@ BEGIN
MENUITEM "&Linear Filtering", ID_OPTIONS_LINEARFILTERING
MENUITEM "Si&mple 2x SSAA", ID_OPTIONS_SIMPLE2XSSAA
MENUITEM "&Use VBO", ID_OPTIONS_USEVBO
MENUITEM "&Vertex Cache", ID_OPTIONS_VERTEXCACHE
MENUITEM SEPARATOR
MENUITEM "&Wireframe (experimental)", ID_OPTIONS_WIREFRAME
MENUITEM "&Display Raw Framebuffer", ID_OPTIONS_DISPLAYRAWFRAMEBUFFER

View File

@ -257,6 +257,7 @@
#define ID_OPTIONS_SIMPLE2XSSAA 40133
#define ID_OPTIONS_USEVBO 40134
#define ID_OPTIONS_DISABLEG3DLOG 40135
#define ID_OPTIONS_VERTEXCACHE 40136
#define IDC_STATIC -1
// Next default values for new objects
@ -264,7 +265,7 @@
#ifdef APSTUDIO_INVOKED
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE 233
#define _APS_NEXT_COMMAND_VALUE 40135
#define _APS_NEXT_COMMAND_VALUE 40137
#define _APS_NEXT_CONTROL_VALUE 1163
#define _APS_NEXT_SYMED_VALUE 101
#endif

2
native

@ -1 +1 @@
Subproject commit f5e775a9a10e38d5ac98261c132875492dfd91c3
Subproject commit b5037341aaca775e806be4b6a7bf109a47e0c655