mirror of
https://github.com/libretro/ppsspp.git
synced 2024-11-25 09:09:49 +00:00
d3d: Use minihashing in the vertex cache.
This commit is contained in:
parent
c4b6e74999
commit
f0c37e3f61
@ -81,6 +81,8 @@ enum {
|
|||||||
|
|
||||||
#define VERTEXCACHE_DECIMATION_INTERVAL 17
|
#define VERTEXCACHE_DECIMATION_INTERVAL 17
|
||||||
|
|
||||||
|
enum { VAI_KILL_AGE = 120, VAI_UNRELIABLE_KILL_AGE = 240, VAI_UNRELIABLE_KILL_MAX = 4 };
|
||||||
|
|
||||||
// Check for max first as clamping to max is more common than min when lighting.
|
// Check for max first as clamping to max is more common than min when lighting.
|
||||||
inline float clamp(float in, float min, float max) {
|
inline float clamp(float in, float min, float max) {
|
||||||
return in > max ? max : (in < min ? min : in);
|
return in > max ? max : (in < min ? min : in);
|
||||||
@ -275,13 +277,12 @@ VertexDecoder *TransformDrawEngineDX9::GetVertexDecoder(u32 vtype) {
|
|||||||
auto iter = decoderMap_.find(vtype);
|
auto iter = decoderMap_.find(vtype);
|
||||||
if (iter != decoderMap_.end())
|
if (iter != decoderMap_.end())
|
||||||
return iter->second;
|
return iter->second;
|
||||||
VertexDecoder*dec = new VertexDecoder();
|
VertexDecoder *dec = new VertexDecoder();
|
||||||
dec->SetVertexType(vtype, decOptions_, decJitCache_);
|
dec->SetVertexType(vtype, decOptions_, decJitCache_);
|
||||||
decoderMap_[vtype] = dec;
|
decoderMap_[vtype] = dec;
|
||||||
return dec;
|
return dec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void TransformDrawEngineDX9::SetupVertexDecoder(u32 vertType) {
|
void TransformDrawEngineDX9::SetupVertexDecoder(u32 vertType) {
|
||||||
SetupVertexDecoderInternal(vertType);
|
SetupVertexDecoderInternal(vertType);
|
||||||
}
|
}
|
||||||
@ -396,6 +397,27 @@ void TransformDrawEngineDX9::SubmitPrim(void *verts, void *inds, GEPrimitiveType
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TransformDrawEngineDX9::DecodeVerts() {
|
||||||
|
if (uvScale) {
|
||||||
|
const UVScale origUV = gstate_c.uv;
|
||||||
|
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) {
|
||||||
|
gstate_c.uv = uvScale[decodeCounter_];
|
||||||
|
DecodeVertsStep();
|
||||||
|
}
|
||||||
|
gstate_c.uv = origUV;
|
||||||
|
} else {
|
||||||
|
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) {
|
||||||
|
DecodeVertsStep();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Sanity check
|
||||||
|
if (indexGen.Prim() < 0) {
|
||||||
|
ERROR_LOG_REPORT(G3D, "DecodeVerts: Failed to deduce prim: %i", indexGen.Prim());
|
||||||
|
// Force to points (0)
|
||||||
|
indexGen.AddPrim(GE_PRIM_POINTS, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void TransformDrawEngineDX9::DecodeVertsStep() {
|
void TransformDrawEngineDX9::DecodeVertsStep() {
|
||||||
const int i = decodeCounter_;
|
const int i = decodeCounter_;
|
||||||
|
|
||||||
@ -470,31 +492,66 @@ void TransformDrawEngineDX9::DecodeVertsStep() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline u32 ComputeMiniHashRange(const void *ptr, size_t sz) {
|
||||||
|
// Switch to u32 units.
|
||||||
|
const u32 *p = (const u32 *)ptr;
|
||||||
|
sz >>= 2;
|
||||||
|
|
||||||
void TransformDrawEngineDX9::DecodeVerts() {
|
if (sz > 100) {
|
||||||
if (uvScale) {
|
size_t step = sz / 4;
|
||||||
const UVScale origUV = gstate_c.uv;
|
u32 hash = 0;
|
||||||
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) {
|
for (size_t i = 0; i < sz; i += step) {
|
||||||
gstate_c.uv = uvScale[decodeCounter_];
|
hash += DoReliableHash(p + i, 100, 0x3A44B9C4);
|
||||||
DecodeVertsStep();
|
|
||||||
}
|
}
|
||||||
gstate_c.uv = origUV;
|
return hash;
|
||||||
} else {
|
} else {
|
||||||
for (; decodeCounter_ < numDrawCalls; decodeCounter_++) {
|
return p[0] + p[sz - 1];
|
||||||
DecodeVertsStep();
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 TransformDrawEngineDX9::ComputeMiniHash() {
|
||||||
|
u32 fullhash = 0;
|
||||||
|
const int vertexSize = dec_->GetDecVtxFmt().stride;
|
||||||
|
const int indexSize = (dec_->VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
|
||||||
|
|
||||||
|
int step;
|
||||||
|
if (numDrawCalls < 3) {
|
||||||
|
step = 1;
|
||||||
|
} else if (numDrawCalls < 8) {
|
||||||
|
step = 4;
|
||||||
|
} else {
|
||||||
|
step = numDrawCalls / 8;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < numDrawCalls; i += step) {
|
||||||
|
const DeferredDrawCall &dc = drawCalls[i];
|
||||||
|
if (!dc.inds) {
|
||||||
|
fullhash += ComputeMiniHashRange(dc.verts, vertexSize * dc.vertexCount);
|
||||||
|
} else {
|
||||||
|
int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound;
|
||||||
|
fullhash += ComputeMiniHashRange((const u8 *)dc.verts + vertexSize * indexLowerBound, vertexSize * (indexUpperBound - indexLowerBound));
|
||||||
|
fullhash += ComputeMiniHashRange(dc.inds, indexSize * dc.vertexCount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Sanity check
|
|
||||||
if (indexGen.Prim() < 0) {
|
return fullhash;
|
||||||
ERROR_LOG_REPORT(G3D, "DecodeVerts: Failed to deduce prim: %i", indexGen.Prim());
|
}
|
||||||
// Force to points (0)
|
|
||||||
indexGen.AddPrim(GE_PRIM_POINTS, 0);
|
void TransformDrawEngineDX9::MarkUnreliable(VertexArrayInfoDX9 *vai) {
|
||||||
|
vai->status = VertexArrayInfoDX9::VAI_UNRELIABLE;
|
||||||
|
if (vai->vbo) {
|
||||||
|
vai->vbo->Release();
|
||||||
|
vai->vbo = nullptr;
|
||||||
|
}
|
||||||
|
if (vai->ebo) {
|
||||||
|
vai->ebo->Release();
|
||||||
|
vai->ebo = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 TransformDrawEngineDX9::ComputeHash() {
|
u32 TransformDrawEngineDX9::ComputeHash() {
|
||||||
u32 fullhash = 0;
|
u32 fullhash = 0;
|
||||||
int vertexSize = dec_->GetDecVtxFmt().stride;
|
const int vertexSize = dec_->GetDecVtxFmt().stride;
|
||||||
|
const int indexSize = (dec_->VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
|
||||||
|
|
||||||
// TODO: Add some caps both for numDrawCalls and num verts to check?
|
// TODO: Add some caps both for numDrawCalls and num verts to check?
|
||||||
// It is really very expensive to check all the vertex data so often.
|
// It is really very expensive to check all the vertex data so often.
|
||||||
@ -518,7 +575,6 @@ u32 TransformDrawEngineDX9::ComputeHash() {
|
|||||||
// we do when drawing.
|
// we do when drawing.
|
||||||
fullhash += DoReliableHash((const char *)dc.verts + vertexSize * indexLowerBound,
|
fullhash += DoReliableHash((const char *)dc.verts + vertexSize * indexLowerBound,
|
||||||
vertexSize * (indexUpperBound - indexLowerBound), 0x029F3EE1);
|
vertexSize * (indexUpperBound - indexLowerBound), 0x029F3EE1);
|
||||||
int indexSize = (dec_->VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
|
|
||||||
// Hm, we will miss some indices when combining above, but meh, it should be fine.
|
// Hm, we will miss some indices when combining above, but meh, it should be fine.
|
||||||
fullhash += DoReliableHash((const char *)dc.inds, indexSize * dc.vertexCount, 0x955FD1CA);
|
fullhash += DoReliableHash((const char *)dc.inds, indexSize * dc.vertexCount, 0x955FD1CA);
|
||||||
i = lastMatch;
|
i = lastMatch;
|
||||||
@ -531,24 +587,6 @@ u32 TransformDrawEngineDX9::ComputeHash() {
|
|||||||
return fullhash;
|
return fullhash;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 TransformDrawEngineDX9::ComputeFastDCID() {
|
|
||||||
u32 hash = 0;
|
|
||||||
for (int i = 0; i < numDrawCalls; i++) {
|
|
||||||
hash ^= (u32)(uintptr_t)drawCalls[i].verts;
|
|
||||||
hash = __rotl(hash, 13);
|
|
||||||
hash ^= (u32)(uintptr_t)drawCalls[i].inds;
|
|
||||||
hash = __rotl(hash, 13);
|
|
||||||
hash ^= (u32)drawCalls[i].vertType;
|
|
||||||
hash = __rotl(hash, 13);
|
|
||||||
hash ^= (u32)drawCalls[i].vertexCount;
|
|
||||||
hash = __rotl(hash, 13);
|
|
||||||
hash ^= (u32)drawCalls[i].prim;
|
|
||||||
}
|
|
||||||
return hash;
|
|
||||||
}
|
|
||||||
|
|
||||||
enum { VAI_KILL_AGE = 120 };
|
|
||||||
|
|
||||||
void TransformDrawEngineDX9::ClearTrackedVertexArrays() {
|
void TransformDrawEngineDX9::ClearTrackedVertexArrays() {
|
||||||
for (auto vai = vai_.begin(); vai != vai_.end(); vai++) {
|
for (auto vai = vai_.begin(); vai != vai_.end(); vai++) {
|
||||||
delete vai->second;
|
delete vai->second;
|
||||||
@ -563,14 +601,23 @@ void TransformDrawEngineDX9::DecimateTrackedVertexArrays() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int threshold = gpuStats.numFlips - VAI_KILL_AGE;
|
const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
|
||||||
|
const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
|
||||||
|
int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
|
||||||
for (auto iter = vai_.begin(); iter != vai_.end(); ) {
|
for (auto iter = vai_.begin(); iter != vai_.end(); ) {
|
||||||
if (iter->second->lastFrame < threshold) {
|
bool kill;
|
||||||
|
if (iter->second->status == VertexArrayInfoDX9::VAI_UNRELIABLE) {
|
||||||
|
// We limit killing unreliable so we don't rehash too often.
|
||||||
|
kill = iter->second->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
|
||||||
|
} else {
|
||||||
|
kill = iter->second->lastFrame < threshold;
|
||||||
|
}
|
||||||
|
if (kill) {
|
||||||
delete iter->second;
|
delete iter->second;
|
||||||
vai_.erase(iter++);
|
vai_.erase(iter++);
|
||||||
}
|
} else {
|
||||||
else
|
|
||||||
++iter;
|
++iter;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable if you want to see vertex decoders in the log output. Need a better way.
|
// Enable if you want to see vertex decoders in the log output. Need a better way.
|
||||||
@ -596,7 +643,6 @@ VertexArrayInfoDX9::~VertexArrayInfoDX9() {
|
|||||||
|
|
||||||
void TransformDrawEngineDX9::DoFlush() {
|
void TransformDrawEngineDX9::DoFlush() {
|
||||||
gpuStats.numFlushes++;
|
gpuStats.numFlushes++;
|
||||||
|
|
||||||
gpuStats.numTrackedVertexArrays = (int)vai_.size();
|
gpuStats.numTrackedVertexArrays = (int)vai_.size();
|
||||||
|
|
||||||
// This is not done on every drawcall, we should collect vertex data
|
// This is not done on every drawcall, we should collect vertex data
|
||||||
@ -622,7 +668,7 @@ void TransformDrawEngineDX9::DoFlush() {
|
|||||||
useCache = false;
|
useCache = false;
|
||||||
|
|
||||||
if (useCache) {
|
if (useCache) {
|
||||||
u32 id = ComputeFastDCID();
|
u32 id = dcid_;
|
||||||
auto iter = vai_.find(id);
|
auto iter = vai_.find(id);
|
||||||
VertexArrayInfoDX9 *vai;
|
VertexArrayInfoDX9 *vai;
|
||||||
if (iter != vai_.end()) {
|
if (iter != vai_.end()) {
|
||||||
@ -639,6 +685,7 @@ void TransformDrawEngineDX9::DoFlush() {
|
|||||||
// Haven't seen this one before.
|
// Haven't seen this one before.
|
||||||
u32 dataHash = ComputeHash();
|
u32 dataHash = ComputeHash();
|
||||||
vai->hash = dataHash;
|
vai->hash = dataHash;
|
||||||
|
vai->minihash = ComputeMiniHash();
|
||||||
vai->status = VertexArrayInfoDX9::VAI_HASHING;
|
vai->status = VertexArrayInfoDX9::VAI_HASHING;
|
||||||
vai->drawsUntilNextFullHash = 0;
|
vai->drawsUntilNextFullHash = 0;
|
||||||
DecodeVerts(); // writes to indexGen
|
DecodeVerts(); // writes to indexGen
|
||||||
@ -659,21 +706,18 @@ void TransformDrawEngineDX9::DoFlush() {
|
|||||||
vai->numFrames++;
|
vai->numFrames++;
|
||||||
}
|
}
|
||||||
if (vai->drawsUntilNextFullHash == 0) {
|
if (vai->drawsUntilNextFullHash == 0) {
|
||||||
u32 newHash = ComputeHash();
|
// Let's try to skip a full hash if mini would fail.
|
||||||
if (newHash != vai->hash) {
|
const u32 newMiniHash = ComputeMiniHash();
|
||||||
vai->status = VertexArrayInfoDX9::VAI_UNRELIABLE;
|
u32 newHash = vai->hash;
|
||||||
if (vai->vbo) {
|
if (newMiniHash == vai->minihash) {
|
||||||
vai->vbo->Release();
|
newHash = ComputeHash();
|
||||||
vai->vbo = NULL;
|
}
|
||||||
}
|
if (newMiniHash != vai->minihash || newHash != vai->hash) {
|
||||||
if (vai->ebo) {
|
MarkUnreliable(vai);
|
||||||
vai->ebo->Release();
|
|
||||||
vai->ebo = NULL;
|
|
||||||
}
|
|
||||||
DecodeVerts();
|
DecodeVerts();
|
||||||
goto rotateVBO;
|
goto rotateVBO;
|
||||||
}
|
}
|
||||||
if (vai->numVerts > 100) {
|
if (vai->numVerts > 64) {
|
||||||
// exponential backoff up to 16 draws, then every 24
|
// exponential backoff up to 16 draws, then every 24
|
||||||
vai->drawsUntilNextFullHash = std::min(24, vai->numFrames);
|
vai->drawsUntilNextFullHash = std::min(24, vai->numFrames);
|
||||||
} else {
|
} else {
|
||||||
@ -686,7 +730,12 @@ void TransformDrawEngineDX9::DoFlush() {
|
|||||||
//}
|
//}
|
||||||
} else {
|
} else {
|
||||||
vai->drawsUntilNextFullHash--;
|
vai->drawsUntilNextFullHash--;
|
||||||
// TODO: "mini-hashing" the first 32 bytes of the vertex/index data or something.
|
u32 newMiniHash = ComputeMiniHash();
|
||||||
|
if (newMiniHash != vai->minihash) {
|
||||||
|
MarkUnreliable(vai);
|
||||||
|
DecodeVerts();
|
||||||
|
goto rotateVBO;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vai->vbo == 0) {
|
if (vai->vbo == 0) {
|
||||||
|
@ -82,6 +82,7 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
u32 hash;
|
u32 hash;
|
||||||
|
u32 minihash;
|
||||||
|
|
||||||
Status status;
|
Status status;
|
||||||
|
|
||||||
@ -162,9 +163,9 @@ private:
|
|||||||
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, VertexDecoder *dec, int lowerBound, int upperBound, u32 vertType);
|
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, VertexDecoder *dec, int lowerBound, int upperBound, u32 vertType);
|
||||||
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType);
|
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType);
|
||||||
|
|
||||||
// drawcall ID
|
u32 ComputeMiniHash();
|
||||||
u32 ComputeFastDCID();
|
|
||||||
u32 ComputeHash(); // Reads deferred vertex data.
|
u32 ComputeHash(); // Reads deferred vertex data.
|
||||||
|
void MarkUnreliable(VertexArrayInfoDX9 *vai);
|
||||||
|
|
||||||
VertexDecoder *GetVertexDecoder(u32 vtype);
|
VertexDecoder *GetVertexDecoder(u32 vtype);
|
||||||
|
|
||||||
|
@ -547,8 +547,8 @@ void TransformDrawEngine::DecimateTrackedVertexArrays() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int threshold = gpuStats.numFlips - VAI_KILL_AGE;
|
const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
|
||||||
int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
|
const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
|
||||||
int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
|
int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
|
||||||
for (auto iter = vai_.begin(); iter != vai_.end(); ) {
|
for (auto iter = vai_.begin(); iter != vai_.end(); ) {
|
||||||
bool kill;
|
bool kill;
|
||||||
|
Loading…
Reference in New Issue
Block a user