d3d: Use minihashing in the vertex cache.

This commit is contained in:
Unknown W. Brackets 2014-09-14 13:50:57 -07:00
parent c4b6e74999
commit f0c37e3f61
3 changed files with 109 additions and 59 deletions

View File

@ -81,6 +81,8 @@ enum {
#define VERTEXCACHE_DECIMATION_INTERVAL 17 #define VERTEXCACHE_DECIMATION_INTERVAL 17
enum { VAI_KILL_AGE = 120, VAI_UNRELIABLE_KILL_AGE = 240, VAI_UNRELIABLE_KILL_MAX = 4 };
// Check for max first as clamping to max is more common than min when lighting. // Check for max first as clamping to max is more common than min when lighting.
inline float clamp(float in, float min, float max) { inline float clamp(float in, float min, float max) {
return in > max ? max : (in < min ? min : in); return in > max ? max : (in < min ? min : in);
@ -275,13 +277,12 @@ VertexDecoder *TransformDrawEngineDX9::GetVertexDecoder(u32 vtype) {
auto iter = decoderMap_.find(vtype); auto iter = decoderMap_.find(vtype);
if (iter != decoderMap_.end()) if (iter != decoderMap_.end())
return iter->second; return iter->second;
VertexDecoder*dec = new VertexDecoder(); VertexDecoder *dec = new VertexDecoder();
dec->SetVertexType(vtype, decOptions_, decJitCache_); dec->SetVertexType(vtype, decOptions_, decJitCache_);
decoderMap_[vtype] = dec; decoderMap_[vtype] = dec;
return dec; return dec;
} }
void TransformDrawEngineDX9::SetupVertexDecoder(u32 vertType) { void TransformDrawEngineDX9::SetupVertexDecoder(u32 vertType) {
SetupVertexDecoderInternal(vertType); SetupVertexDecoderInternal(vertType);
} }
@ -396,6 +397,27 @@ void TransformDrawEngineDX9::SubmitPrim(void *verts, void *inds, GEPrimitiveType
} }
} }
void TransformDrawEngineDX9::DecodeVerts() {
if (uvScale) {
const UVScale origUV = gstate_c.uv;
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) {
gstate_c.uv = uvScale[decodeCounter_];
DecodeVertsStep();
}
gstate_c.uv = origUV;
} else {
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) {
DecodeVertsStep();
}
}
// Sanity check
if (indexGen.Prim() < 0) {
ERROR_LOG_REPORT(G3D, "DecodeVerts: Failed to deduce prim: %i", indexGen.Prim());
// Force to points (0)
indexGen.AddPrim(GE_PRIM_POINTS, 0);
}
}
void TransformDrawEngineDX9::DecodeVertsStep() { void TransformDrawEngineDX9::DecodeVertsStep() {
const int i = decodeCounter_; const int i = decodeCounter_;
@ -470,31 +492,66 @@ void TransformDrawEngineDX9::DecodeVertsStep() {
} }
} }
inline u32 ComputeMiniHashRange(const void *ptr, size_t sz) {
// Switch to u32 units.
const u32 *p = (const u32 *)ptr;
sz >>= 2;
void TransformDrawEngineDX9::DecodeVerts() { if (sz > 100) {
if (uvScale) { size_t step = sz / 4;
const UVScale origUV = gstate_c.uv; u32 hash = 0;
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) { for (size_t i = 0; i < sz; i += step) {
gstate_c.uv = uvScale[decodeCounter_]; hash += DoReliableHash(p + i, 100, 0x3A44B9C4);
DecodeVertsStep();
} }
gstate_c.uv = origUV; return hash;
} else { } else {
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) { return p[0] + p[sz - 1];
DecodeVertsStep(); }
}
u32 TransformDrawEngineDX9::ComputeMiniHash() {
u32 fullhash = 0;
const int vertexSize = dec_->GetDecVtxFmt().stride;
const int indexSize = (dec_->VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
int step;
if (numDrawCalls < 3) {
step = 1;
} else if (numDrawCalls < 8) {
step = 4;
} else {
step = numDrawCalls / 8;
}
for (int i = 0; i < numDrawCalls; i += step) {
const DeferredDrawCall &dc = drawCalls[i];
if (!dc.inds) {
fullhash += ComputeMiniHashRange(dc.verts, vertexSize * dc.vertexCount);
} else {
int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound;
fullhash += ComputeMiniHashRange((const u8 *)dc.verts + vertexSize * indexLowerBound, vertexSize * (indexUpperBound - indexLowerBound));
fullhash += ComputeMiniHashRange(dc.inds, indexSize * dc.vertexCount);
} }
} }
// Sanity check
if (indexGen.Prim() < 0) { return fullhash;
ERROR_LOG_REPORT(G3D, "DecodeVerts: Failed to deduce prim: %i", indexGen.Prim()); }
// Force to points (0)
indexGen.AddPrim(GE_PRIM_POINTS, 0); void TransformDrawEngineDX9::MarkUnreliable(VertexArrayInfoDX9 *vai) {
vai->status = VertexArrayInfoDX9::VAI_UNRELIABLE;
if (vai->vbo) {
vai->vbo->Release();
vai->vbo = nullptr;
}
if (vai->ebo) {
vai->ebo->Release();
vai->ebo = nullptr;
} }
} }
u32 TransformDrawEngineDX9::ComputeHash() { u32 TransformDrawEngineDX9::ComputeHash() {
u32 fullhash = 0; u32 fullhash = 0;
int vertexSize = dec_->GetDecVtxFmt().stride; const int vertexSize = dec_->GetDecVtxFmt().stride;
const int indexSize = (dec_->VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
// TODO: Add some caps both for numDrawCalls and num verts to check? // TODO: Add some caps both for numDrawCalls and num verts to check?
// It is really very expensive to check all the vertex data so often. // It is really very expensive to check all the vertex data so often.
@ -518,7 +575,6 @@ u32 TransformDrawEngineDX9::ComputeHash() {
// we do when drawing. // we do when drawing.
fullhash += DoReliableHash((const char *)dc.verts + vertexSize * indexLowerBound, fullhash += DoReliableHash((const char *)dc.verts + vertexSize * indexLowerBound,
vertexSize * (indexUpperBound - indexLowerBound), 0x029F3EE1); vertexSize * (indexUpperBound - indexLowerBound), 0x029F3EE1);
int indexSize = (dec_->VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
// Hm, we will miss some indices when combining above, but meh, it should be fine. // Hm, we will miss some indices when combining above, but meh, it should be fine.
fullhash += DoReliableHash((const char *)dc.inds, indexSize * dc.vertexCount, 0x955FD1CA); fullhash += DoReliableHash((const char *)dc.inds, indexSize * dc.vertexCount, 0x955FD1CA);
i = lastMatch; i = lastMatch;
@ -531,24 +587,6 @@ u32 TransformDrawEngineDX9::ComputeHash() {
return fullhash; return fullhash;
} }
u32 TransformDrawEngineDX9::ComputeFastDCID() {
u32 hash = 0;
for (int i = 0; i < numDrawCalls; i++) {
hash ^= (u32)(uintptr_t)drawCalls[i].verts;
hash = __rotl(hash, 13);
hash ^= (u32)(uintptr_t)drawCalls[i].inds;
hash = __rotl(hash, 13);
hash ^= (u32)drawCalls[i].vertType;
hash = __rotl(hash, 13);
hash ^= (u32)drawCalls[i].vertexCount;
hash = __rotl(hash, 13);
hash ^= (u32)drawCalls[i].prim;
}
return hash;
}
enum { VAI_KILL_AGE = 120 };
void TransformDrawEngineDX9::ClearTrackedVertexArrays() { void TransformDrawEngineDX9::ClearTrackedVertexArrays() {
for (auto vai = vai_.begin(); vai != vai_.end(); vai++) { for (auto vai = vai_.begin(); vai != vai_.end(); vai++) {
delete vai->second; delete vai->second;
@ -563,14 +601,23 @@ void TransformDrawEngineDX9::DecimateTrackedVertexArrays() {
return; return;
} }
int threshold = gpuStats.numFlips - VAI_KILL_AGE; const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
for (auto iter = vai_.begin(); iter != vai_.end(); ) { for (auto iter = vai_.begin(); iter != vai_.end(); ) {
if (iter->second->lastFrame < threshold) { bool kill;
if (iter->second->status == VertexArrayInfoDX9::VAI_UNRELIABLE) {
// We limit killing unreliable so we don't rehash too often.
kill = iter->second->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
} else {
kill = iter->second->lastFrame < threshold;
}
if (kill) {
delete iter->second; delete iter->second;
vai_.erase(iter++); vai_.erase(iter++);
} } else {
else
++iter; ++iter;
}
} }
// Enable if you want to see vertex decoders in the log output. Need a better way. // Enable if you want to see vertex decoders in the log output. Need a better way.
@ -596,7 +643,6 @@ VertexArrayInfoDX9::~VertexArrayInfoDX9() {
void TransformDrawEngineDX9::DoFlush() { void TransformDrawEngineDX9::DoFlush() {
gpuStats.numFlushes++; gpuStats.numFlushes++;
gpuStats.numTrackedVertexArrays = (int)vai_.size(); gpuStats.numTrackedVertexArrays = (int)vai_.size();
// This is not done on every drawcall, we should collect vertex data // This is not done on every drawcall, we should collect vertex data
@ -622,7 +668,7 @@ void TransformDrawEngineDX9::DoFlush() {
useCache = false; useCache = false;
if (useCache) { if (useCache) {
u32 id = ComputeFastDCID(); u32 id = dcid_;
auto iter = vai_.find(id); auto iter = vai_.find(id);
VertexArrayInfoDX9 *vai; VertexArrayInfoDX9 *vai;
if (iter != vai_.end()) { if (iter != vai_.end()) {
@ -639,6 +685,7 @@ void TransformDrawEngineDX9::DoFlush() {
// Haven't seen this one before. // Haven't seen this one before.
u32 dataHash = ComputeHash(); u32 dataHash = ComputeHash();
vai->hash = dataHash; vai->hash = dataHash;
vai->minihash = ComputeMiniHash();
vai->status = VertexArrayInfoDX9::VAI_HASHING; vai->status = VertexArrayInfoDX9::VAI_HASHING;
vai->drawsUntilNextFullHash = 0; vai->drawsUntilNextFullHash = 0;
DecodeVerts(); // writes to indexGen DecodeVerts(); // writes to indexGen
@ -659,21 +706,18 @@ void TransformDrawEngineDX9::DoFlush() {
vai->numFrames++; vai->numFrames++;
} }
if (vai->drawsUntilNextFullHash == 0) { if (vai->drawsUntilNextFullHash == 0) {
u32 newHash = ComputeHash(); // Let's try to skip a full hash if mini would fail.
if (newHash != vai->hash) { const u32 newMiniHash = ComputeMiniHash();
vai->status = VertexArrayInfoDX9::VAI_UNRELIABLE; u32 newHash = vai->hash;
if (vai->vbo) { if (newMiniHash == vai->minihash) {
vai->vbo->Release(); newHash = ComputeHash();
vai->vbo = NULL; }
} if (newMiniHash != vai->minihash || newHash != vai->hash) {
if (vai->ebo) { MarkUnreliable(vai);
vai->ebo->Release();
vai->ebo = NULL;
}
DecodeVerts(); DecodeVerts();
goto rotateVBO; goto rotateVBO;
} }
if (vai->numVerts > 100) { if (vai->numVerts > 64) {
// exponential backoff up to 16 draws, then every 24 // exponential backoff up to 16 draws, then every 24
vai->drawsUntilNextFullHash = std::min(24, vai->numFrames); vai->drawsUntilNextFullHash = std::min(24, vai->numFrames);
} else { } else {
@ -686,7 +730,12 @@ void TransformDrawEngineDX9::DoFlush() {
//} //}
} else { } else {
vai->drawsUntilNextFullHash--; vai->drawsUntilNextFullHash--;
// TODO: "mini-hashing" the first 32 bytes of the vertex/index data or something. u32 newMiniHash = ComputeMiniHash();
if (newMiniHash != vai->minihash) {
MarkUnreliable(vai);
DecodeVerts();
goto rotateVBO;
}
} }
if (vai->vbo == 0) { if (vai->vbo == 0) {

View File

@ -82,6 +82,7 @@ public:
}; };
u32 hash; u32 hash;
u32 minihash;
Status status; Status status;
@ -162,9 +163,9 @@ private:
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, VertexDecoder *dec, int lowerBound, int upperBound, u32 vertType); u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, VertexDecoder *dec, int lowerBound, int upperBound, u32 vertType);
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType); u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType);
// drawcall ID u32 ComputeMiniHash();
u32 ComputeFastDCID();
u32 ComputeHash(); // Reads deferred vertex data. u32 ComputeHash(); // Reads deferred vertex data.
void MarkUnreliable(VertexArrayInfoDX9 *vai);
VertexDecoder *GetVertexDecoder(u32 vtype); VertexDecoder *GetVertexDecoder(u32 vtype);

View File

@ -547,8 +547,8 @@ void TransformDrawEngine::DecimateTrackedVertexArrays() {
return; return;
} }
int threshold = gpuStats.numFlips - VAI_KILL_AGE; const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE; const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
int unreliableLeft = VAI_UNRELIABLE_KILL_MAX; int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
for (auto iter = vai_.begin(); iter != vai_.end(); ) { for (auto iter = vai_.begin(); iter != vai_.end(); ) {
bool kill; bool kill;