mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-26 23:10:38 +00:00
Replace the most critical unordered_maps and maps with two new maps: FastHashMap and PrehashMap.
This commit is contained in:
parent
cbd107b03d
commit
e0e13e191f
@ -22,7 +22,8 @@
|
||||
<ProjectGuid>{3FCDBAE2-5103-4350-9A8E-848CE9C73195}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>Common</RootNamespace>
|
||||
<WindowsTargetPlatformVersion></WindowsTargetPlatformVersion>
|
||||
<WindowsTargetPlatformVersion>
|
||||
</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
@ -226,6 +227,7 @@
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClInclude>
|
||||
<ClInclude Include="GraphicsContext.h" />
|
||||
<ClInclude Include="Hashmaps.h" />
|
||||
<ClInclude Include="KeyMap.h" />
|
||||
<ClInclude Include="Log.h" />
|
||||
<ClInclude Include="LogManager.h" />
|
||||
|
@ -74,6 +74,7 @@
|
||||
<Filter>Vulkan</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="OSVersion.h" />
|
||||
<ClInclude Include="Hashmaps.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="stdafx.cpp" />
|
||||
|
271
Common/Hashmaps.h
Normal file
271
Common/Hashmaps.h
Normal file
@ -0,0 +1,271 @@
|
||||
#pragma once
|
||||
|
||||
#include "ext/xxhash.h"
|
||||
#include <functional>
|
||||
|
||||
// Whatever random value.
|
||||
const uint32_t hashmapSeed = 0x23B58532;
|
||||
|
||||
// TODO: Try hardware CRC. Unfortunately not available on older Intels or ARM32.
|
||||
// Seems to be ubiquitous on ARM64 though.
|
||||
template<class K>
|
||||
inline uint32_t HashKey(const K &k) {
|
||||
return XXH32(&k, sizeof(k), hashmapSeed);
|
||||
}
|
||||
template<class K>
|
||||
inline bool KeyEquals(const K &a, const K &b) {
|
||||
return !memcmp(&a, &b, sizeof(K));
|
||||
}
|
||||
|
||||
enum class BucketState {
|
||||
FREE,
|
||||
TAKEN,
|
||||
REMOVED, // for linear probing to work we need tombstones
|
||||
};
|
||||
|
||||
|
||||
// Uses linear probing for cache-friendliness. Not segregating values from keys because
|
||||
// we always use very small values, so it's probably better to have them in the same
|
||||
// cache-line as the corresponding key.
|
||||
// Enforces that value are pointers to make sure that combined storage makes sense.
|
||||
template <class Key, class Value>
|
||||
class DenseHashMap {
|
||||
public:
|
||||
DenseHashMap(int initialCapacity) : capacity_(initialCapacity) {
|
||||
map.resize(initialCapacity);
|
||||
}
|
||||
|
||||
// Returns nullptr if no entry was found.
|
||||
Value Get(const Key &key) {
|
||||
uint32_t mask = capacity_ - 1;
|
||||
uint32_t pos = HashKey(key) & mask;
|
||||
// No? Let's go into search mode. Linear probing.
|
||||
uint32_t p = pos;
|
||||
while (true) {
|
||||
if (map[p].state == BucketState::TAKEN && KeyEquals(key, map[p].key))
|
||||
return map[p].value;
|
||||
else if (map[p].state == BucketState::FREE)
|
||||
return nullptr;
|
||||
p = (p + 1) & mask; // If the state is REMOVED, we just keep on walking.
|
||||
if (p == pos)
|
||||
DebugBreak();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Returns false if we already had the key! Which is a bit different.
|
||||
bool Insert(const Key &key, Value value) {
|
||||
// Check load factor, resize if necessary. We never shrink.
|
||||
if (count_ > capacity_ / 2) {
|
||||
Grow();
|
||||
}
|
||||
uint32_t mask = capacity_ - 1;
|
||||
uint32_t pos = HashKey(key) & mask;
|
||||
uint32_t p = pos;
|
||||
while (true) {
|
||||
if (map[p].state == BucketState::TAKEN) {
|
||||
if (KeyEquals(key, map[p].key)) {
|
||||
DebugBreak(); // Bad! We already got this one. Let's avoid this case.
|
||||
return false;
|
||||
}
|
||||
// continue looking....
|
||||
} else {
|
||||
// Got a place, either removed or FREE.
|
||||
break;
|
||||
}
|
||||
p = (p + 1) & mask;
|
||||
if (p == pos) {
|
||||
// FULL! Error. Should not happen thanks to Grow().
|
||||
DebugBreak();
|
||||
}
|
||||
}
|
||||
map[p].state = BucketState::TAKEN;
|
||||
map[p].key = key;
|
||||
map[p].value = value;
|
||||
count_++;
|
||||
return true;
|
||||
}
|
||||
|
||||
void Remove(const Key &key) {
|
||||
uint32_t mask = capacity_ - 1;
|
||||
uint32_t pos = HashKey(key) & mask;
|
||||
uint32_t p = pos;
|
||||
while (map[p].state != BucketState::FREE) {
|
||||
if (map[p].state == BucketState::TAKEN && KeyEquals(key, map[p].key)) {
|
||||
// Got it! Mark it as removed.
|
||||
map[p].state = BucketState::REMOVED;
|
||||
count_--;
|
||||
return;
|
||||
}
|
||||
p = (p + 1) & mask;
|
||||
if (p == pos) {
|
||||
// FULL! Error. Should not happen.
|
||||
DebugBreak();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return count_;
|
||||
}
|
||||
|
||||
// TODO: Find a way to avoid std::function. I tried using a templated argument
|
||||
// but couldn't get it to pass the compiler.
|
||||
inline void Iterate(std::function<void(const typename Key &key, typename Value value)> func) {
|
||||
for (auto &iter : map) {
|
||||
if (iter.state == BucketState::TAKEN) {
|
||||
func(iter.key, iter.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Clear() {
|
||||
// TODO: Speedup?
|
||||
map.clear();
|
||||
map.resize(capacity_);
|
||||
}
|
||||
|
||||
private:
|
||||
void Grow() {
|
||||
// We simply move out the existing data, then we re-insert the old.
|
||||
// This is extremely non-atomic and will need synchronization.
|
||||
std::vector<Pair> old = std::move(map);
|
||||
capacity_ *= 2;
|
||||
map.clear();
|
||||
map.resize(capacity_);
|
||||
count_ = 0; // Insert will update it.
|
||||
for (auto &iter : old) {
|
||||
if (iter.state == BucketState::TAKEN) {
|
||||
Insert(iter.key, iter.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
struct Pair {
|
||||
BucketState state;
|
||||
Key key;
|
||||
Value value;
|
||||
};
|
||||
std::vector<Pair> map;
|
||||
int capacity_;
|
||||
int count_ = 0;
|
||||
};
|
||||
|
||||
// Like the above, uses linear probing for cache-friendliness.
|
||||
// Does not perform hashing at all so expects well-distributed keys.
|
||||
template <class Value>
|
||||
class PrehashMap {
|
||||
public:
|
||||
PrehashMap(int initialCapacity) : capacity_(initialCapacity) {
|
||||
map.resize(initialCapacity);
|
||||
}
|
||||
|
||||
// Returns nullptr if no entry was found.
|
||||
Value Get(uint32_t hash) {
|
||||
uint32_t mask = capacity_ - 1;
|
||||
uint32_t pos = hash & mask;
|
||||
// No? Let's go into search mode. Linear probing.
|
||||
uint32_t p = pos;
|
||||
while (true) {
|
||||
if (map[p].state == BucketState::TAKEN && hash == map[p].hash)
|
||||
return map[p].value;
|
||||
else if (map[p].state == BucketState::FREE)
|
||||
return nullptr;
|
||||
p = (p + 1) & mask; // If the state is REMOVED, we just keep on walking.
|
||||
if (p == pos)
|
||||
DebugBreak();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Returns false if we already had the key! Which is a bit different.
|
||||
bool Insert(uint32_t hash, Value value) {
|
||||
// Check load factor, resize if necessary. We never shrink.
|
||||
if (count_ > capacity_ / 2) {
|
||||
Grow();
|
||||
}
|
||||
uint32_t mask = capacity_ - 1;
|
||||
uint32_t pos = hash & mask;
|
||||
uint32_t p = pos;
|
||||
while (map[p].state != BucketState::FREE) {
|
||||
if (map[p].state == BucketState::TAKEN) {
|
||||
if (hash == map[p].hash)
|
||||
return false; // Bad!
|
||||
} else {
|
||||
// Got a place, either removed or FREE.
|
||||
break;
|
||||
}
|
||||
p = (p + 1) & mask;
|
||||
if (p == pos) {
|
||||
// FULL! Error. Should not happen thanks to Grow().
|
||||
DebugBreak();
|
||||
}
|
||||
}
|
||||
map[p].state = BucketState::TAKEN;
|
||||
map[p].hash = hash;
|
||||
map[p].value = value;
|
||||
count_++;
|
||||
return true;
|
||||
}
|
||||
|
||||
void Remove(uint32_t hash) {
|
||||
uint32_t mask = capacity_ - 1;
|
||||
uint32_t pos = hash & mask;
|
||||
uint32_t p = pos;
|
||||
while (map[p].state != BucketState::FREE) {
|
||||
if (map[p].state == BucketState::TAKEN && hash == map[p].hash) {
|
||||
// Got it!
|
||||
map[p].state = BucketState::REMOVED;
|
||||
count_--;
|
||||
return;
|
||||
}
|
||||
p = (p + 1) & mask;
|
||||
if (p == pos) {
|
||||
// FULL! Error. Should not happen.
|
||||
DebugBreak();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t size() {
|
||||
return count_;
|
||||
}
|
||||
|
||||
// TODO: Find a way to avoid std::function. I tried using a templated argument
|
||||
// but couldn't get it to pass the compiler.
|
||||
void Iterate(std::function<void(uint32_t hash, typename Value value)> func) {
|
||||
for (auto &iter : map) {
|
||||
if (iter.state == BucketState::TAKEN) {
|
||||
func(iter.hash, iter.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Clear() {
|
||||
// TODO: Speedup?
|
||||
map.clear();
|
||||
map.resize(capacity_);
|
||||
}
|
||||
|
||||
private:
|
||||
void Grow() {
|
||||
// We simply move out the existing data, then we re-insert the old.
|
||||
// This is extremely non-atomic and will need synchronization.
|
||||
std::vector<Pair> old = std::move(map);
|
||||
capacity_ *= 2;
|
||||
map.clear();
|
||||
map.resize(capacity_);
|
||||
for (auto &iter : old) {
|
||||
if (iter.state == BucketState::TAKEN) {
|
||||
Insert(iter.hash, iter.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
struct Pair {
|
||||
BucketState state;
|
||||
uint32_t hash;
|
||||
Value value;
|
||||
};
|
||||
std::vector<Pair> map;
|
||||
int capacity_;
|
||||
int count_ = 0;
|
||||
};
|
@ -72,8 +72,9 @@ static const D3D11_INPUT_ELEMENT_DESC TransformedVertexElements[] = {
|
||||
DrawEngineD3D11::DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device, ID3D11DeviceContext *context)
|
||||
: draw_(draw),
|
||||
device_(device),
|
||||
context_(context)
|
||||
{
|
||||
context_(context),
|
||||
vai_(256),
|
||||
inputLayoutMap_(32) {
|
||||
device1_ = (ID3D11Device1 *)draw->GetNativeObject(Draw::NativeObject::DEVICE_EX);
|
||||
context1_ = (ID3D11DeviceContext1 *)draw->GetNativeObject(Draw::NativeObject::CONTEXT_EX);
|
||||
decOptions_.expandAllWeightsToFloat = true;
|
||||
@ -111,18 +112,18 @@ void DrawEngineD3D11::InitDeviceObjects() {
|
||||
}
|
||||
|
||||
void DrawEngineD3D11::ClearTrackedVertexArrays() {
|
||||
for (auto &vai : vai_) {
|
||||
delete vai.second;
|
||||
}
|
||||
vai_.clear();
|
||||
vai_.Iterate([&](uint32_t hash, VertexArrayInfoD3D11 *vai){
|
||||
delete vai;
|
||||
});
|
||||
vai_.Clear();
|
||||
}
|
||||
|
||||
void DrawEngineD3D11::ClearInputLayoutMap() {
|
||||
for (auto &decl : inputLayoutMap_) {
|
||||
if (decl.second)
|
||||
decl.second->Release();
|
||||
}
|
||||
inputLayoutMap_.clear();
|
||||
inputLayoutMap_.Iterate([&](const InputLayoutKey &key, ID3D11InputLayout *il) {
|
||||
if (il)
|
||||
il->Release();
|
||||
});
|
||||
inputLayoutMap_.Clear();
|
||||
}
|
||||
|
||||
void DrawEngineD3D11::Resized() {
|
||||
@ -190,8 +191,10 @@ ID3D11InputLayout *DrawEngineD3D11::SetupDecFmtForDraw(D3D11VertexShader *vshade
|
||||
// TODO: Instead of one for each vshader, we can reduce it to one for each type of shader
|
||||
// that reads TEXCOORD or not, etc. Not sure if worth it.
|
||||
InputLayoutKey key{ vshader, pspFmt };
|
||||
auto vertexDeclCached = inputLayoutMap_.find(key);
|
||||
if (vertexDeclCached == inputLayoutMap_.end()) {
|
||||
ID3D11InputLayout *inputLayout = inputLayoutMap_.Get(key);
|
||||
if (inputLayout) {
|
||||
return inputLayout;
|
||||
} else {
|
||||
D3D11_INPUT_ELEMENT_DESC VertexElements[8];
|
||||
D3D11_INPUT_ELEMENT_DESC *VertexElement = &VertexElements[0];
|
||||
|
||||
@ -236,7 +239,6 @@ ID3D11InputLayout *DrawEngineD3D11::SetupDecFmtForDraw(D3D11VertexShader *vshade
|
||||
VertexElement++;
|
||||
|
||||
// Create declaration
|
||||
ID3D11InputLayout *inputLayout = nullptr;
|
||||
HRESULT hr = device_->CreateInputLayout(VertexElements, VertexElement - VertexElements, vshader->bytecode().data(), vshader->bytecode().size(), &inputLayout);
|
||||
if (FAILED(hr)) {
|
||||
ERROR_LOG(G3D, "Failed to create input layout!");
|
||||
@ -244,11 +246,8 @@ ID3D11InputLayout *DrawEngineD3D11::SetupDecFmtForDraw(D3D11VertexShader *vshade
|
||||
}
|
||||
|
||||
// Add it to map
|
||||
inputLayoutMap_[key] = inputLayout;
|
||||
inputLayoutMap_.Insert(key, inputLayout);
|
||||
return inputLayout;
|
||||
} else {
|
||||
// Set it from map
|
||||
return vertexDeclCached->second;
|
||||
}
|
||||
}
|
||||
|
||||
@ -375,21 +374,19 @@ void DrawEngineD3D11::BeginFrame() {
|
||||
const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
|
||||
const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
|
||||
int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
|
||||
for (auto iter = vai_.begin(); iter != vai_.end(); ) {
|
||||
vai_.Iterate([&](uint32_t hash, VertexArrayInfoD3D11 *vai){
|
||||
bool kill;
|
||||
if (iter->second->status == VertexArrayInfoD3D11::VAI_UNRELIABLE) {
|
||||
if (vai->status == VertexArrayInfoD3D11::VAI_UNRELIABLE) {
|
||||
// We limit killing unreliable so we don't rehash too often.
|
||||
kill = iter->second->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
|
||||
kill = vai->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
|
||||
} else {
|
||||
kill = iter->second->lastFrame < threshold;
|
||||
kill = vai->lastFrame < threshold;
|
||||
}
|
||||
if (kill) {
|
||||
delete iter->second;
|
||||
vai_.erase(iter++);
|
||||
} else {
|
||||
++iter;
|
||||
delete vai;
|
||||
vai_.Remove(hash);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Enable if you want to see vertex decoders in the log output. Need a better way.
|
||||
#if 0
|
||||
@ -443,14 +440,11 @@ void DrawEngineD3D11::DoFlush() {
|
||||
|
||||
if (useCache) {
|
||||
u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
|
||||
auto iter = vai_.find(id);
|
||||
VertexArrayInfoD3D11 *vai;
|
||||
if (iter != vai_.end()) {
|
||||
// We've seen this before. Could have been a cached draw.
|
||||
vai = iter->second;
|
||||
} else {
|
||||
|
||||
VertexArrayInfoD3D11 *vai = vai_.Get(id);
|
||||
if (!vai) {
|
||||
vai = new VertexArrayInfoD3D11();
|
||||
vai_[id] = vai;
|
||||
vai_.Insert(id, vai);
|
||||
}
|
||||
|
||||
switch (vai->status) {
|
||||
@ -709,13 +703,10 @@ rotateVBO:
|
||||
// We really do need a vertex layout for each vertex shader (or at least check its ID bits for what inputs it uses)!
|
||||
// Some vertex shaders ignore one of the inputs, and then the layout created from it will lack it, which will be a problem for others.
|
||||
InputLayoutKey key{ vshader, 0xFFFFFFFF }; // Let's use 0xFFFFFFFF to signify TransformedVertex
|
||||
auto iter = inputLayoutMap_.find(key);
|
||||
ID3D11InputLayout *layout;
|
||||
if (iter == inputLayoutMap_.end()) {
|
||||
ID3D11InputLayout *layout = inputLayoutMap_.Get(key);
|
||||
if (!layout) {
|
||||
ASSERT_SUCCESS(device_->CreateInputLayout(TransformedVertexElements, ARRAY_SIZE(TransformedVertexElements), vshader->bytecode().data(), vshader->bytecode().size(), &layout));
|
||||
inputLayoutMap_[key] = layout;
|
||||
} else {
|
||||
layout = iter->second;
|
||||
inputLayoutMap_.Insert(key, layout);
|
||||
}
|
||||
context_->IASetInputLayout(layout);
|
||||
context_->IASetPrimitiveTopology(d3d11prim[prim]);
|
||||
|
@ -17,11 +17,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include <d3d11.h>
|
||||
#include <d3d11_1.h>
|
||||
|
||||
#include "Common/Hashmaps.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/Common/GPUDebugInterface.h"
|
||||
#include "GPU/Common/IndexGenerator.h"
|
||||
@ -168,7 +167,7 @@ private:
|
||||
ID3D11DeviceContext *context_;
|
||||
ID3D11DeviceContext1 *context1_;
|
||||
|
||||
std::unordered_map<u32, VertexArrayInfoD3D11 *> vai_;
|
||||
PrehashMap<VertexArrayInfoD3D11 *> vai_;
|
||||
|
||||
struct InputLayoutKey {
|
||||
D3D11VertexShader *vshader;
|
||||
@ -182,7 +181,7 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
std::map<InputLayoutKey, ID3D11InputLayout *> inputLayoutMap_;
|
||||
DenseHashMap<InputLayoutKey, ID3D11InputLayout *> inputLayoutMap_;
|
||||
|
||||
// Other
|
||||
ShaderManagerD3D11 *shaderManager_ = nullptr;
|
||||
|
@ -84,7 +84,7 @@ static const D3DVERTEXELEMENT9 TransformedVertexElements[] = {
|
||||
D3DDECL_END()
|
||||
};
|
||||
|
||||
DrawEngineDX9::DrawEngineDX9(Draw::DrawContext *draw) {
|
||||
DrawEngineDX9::DrawEngineDX9(Draw::DrawContext *draw) : vai_(256), vertexDeclMap_(64) {
|
||||
device_ = (LPDIRECT3DDEVICE9)draw->GetNativeObject(Draw::NativeObject::DEVICE);
|
||||
decOptions_.expandAllWeightsToFloat = true;
|
||||
decOptions_.expand8BitNormalsToFloat = true;
|
||||
@ -115,12 +115,12 @@ DrawEngineDX9::~DrawEngineDX9() {
|
||||
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
|
||||
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
|
||||
FreeMemoryPages(splineBuffer, SPLINE_BUFFER_SIZE);
|
||||
for (auto decl = vertexDeclMap_.begin(); decl != vertexDeclMap_.end(); ++decl) {
|
||||
if (decl->second) {
|
||||
decl->second->Release();
|
||||
vertexDeclMap_.Iterate([&](const uint32_t &key, IDirect3DVertexDeclaration9 *decl) {
|
||||
if (decl) {
|
||||
decl->Release();
|
||||
}
|
||||
}
|
||||
|
||||
});
|
||||
vertexDeclMap_.Clear();
|
||||
delete tessDataTransfer;
|
||||
}
|
||||
|
||||
@ -169,9 +169,11 @@ static void VertexAttribSetup(D3DVERTEXELEMENT9 * VertexElement, u8 fmt, u8 offs
|
||||
}
|
||||
|
||||
IDirect3DVertexDeclaration9 *DrawEngineDX9::SetupDecFmtForDraw(VSShader *vshader, const DecVtxFormat &decFmt, u32 pspFmt) {
|
||||
auto vertexDeclCached = vertexDeclMap_.find(pspFmt);
|
||||
IDirect3DVertexDeclaration9 *vertexDeclCached = vertexDeclMap_.Get(pspFmt);
|
||||
|
||||
if (vertexDeclCached == vertexDeclMap_.end()) {
|
||||
if (vertexDeclCached) {
|
||||
return vertexDeclCached;
|
||||
} else {
|
||||
D3DVERTEXELEMENT9 VertexElements[8];
|
||||
D3DVERTEXELEMENT9 *VertexElement = &VertexElements[0];
|
||||
|
||||
@ -228,11 +230,8 @@ IDirect3DVertexDeclaration9 *DrawEngineDX9::SetupDecFmtForDraw(VSShader *vshader
|
||||
}
|
||||
|
||||
// Add it to map
|
||||
vertexDeclMap_[pspFmt] = pHardwareVertexDecl;
|
||||
vertexDeclMap_.Insert(pspFmt, pHardwareVertexDecl);
|
||||
return pHardwareVertexDecl;
|
||||
} else {
|
||||
// Set it from map
|
||||
return vertexDeclCached->second;
|
||||
}
|
||||
}
|
||||
|
||||
@ -329,10 +328,10 @@ void DrawEngineDX9::MarkUnreliable(VertexArrayInfoDX9 *vai) {
|
||||
}
|
||||
|
||||
void DrawEngineDX9::ClearTrackedVertexArrays() {
|
||||
for (auto vai = vai_.begin(); vai != vai_.end(); vai++) {
|
||||
delete vai->second;
|
||||
}
|
||||
vai_.clear();
|
||||
vai_.Iterate([&](uint32_t hash, DX9::VertexArrayInfoDX9 *vai) {
|
||||
delete vai;
|
||||
});
|
||||
vai_.Clear();
|
||||
}
|
||||
|
||||
void DrawEngineDX9::DecimateTrackedVertexArrays() {
|
||||
@ -345,21 +344,19 @@ void DrawEngineDX9::DecimateTrackedVertexArrays() {
|
||||
const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
|
||||
const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
|
||||
int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
|
||||
for (auto iter = vai_.begin(); iter != vai_.end(); ) {
|
||||
vai_.Iterate([&](uint32_t hash, DX9::VertexArrayInfoDX9 *vai) {
|
||||
bool kill;
|
||||
if (iter->second->status == VertexArrayInfoDX9::VAI_UNRELIABLE) {
|
||||
if (vai->status == VertexArrayInfoDX9::VAI_UNRELIABLE) {
|
||||
// We limit killing unreliable so we don't rehash too often.
|
||||
kill = iter->second->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
|
||||
kill = vai->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
|
||||
} else {
|
||||
kill = iter->second->lastFrame < threshold;
|
||||
kill = vai->lastFrame < threshold;
|
||||
}
|
||||
if (kill) {
|
||||
delete iter->second;
|
||||
vai_.erase(iter++);
|
||||
} else {
|
||||
++iter;
|
||||
delete vai;
|
||||
vai_.Remove(hash);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Enable if you want to see vertex decoders in the log output. Need a better way.
|
||||
#if 0
|
||||
@ -415,14 +412,10 @@ void DrawEngineDX9::DoFlush() {
|
||||
|
||||
if (useCache) {
|
||||
u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
|
||||
auto iter = vai_.find(id);
|
||||
VertexArrayInfoDX9 *vai;
|
||||
if (iter != vai_.end()) {
|
||||
// We've seen this before. Could have been a cached draw.
|
||||
vai = iter->second;
|
||||
} else {
|
||||
VertexArrayInfoDX9 *vai = vai_.Get(id);
|
||||
if (!vai) {
|
||||
vai = new VertexArrayInfoDX9();
|
||||
vai_[id] = vai;
|
||||
vai_.Insert(id, vai);
|
||||
}
|
||||
|
||||
switch (vai->status) {
|
||||
|
@ -17,10 +17,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include <d3d9.h>
|
||||
|
||||
#include "Common/Hashmaps.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/Common/GPUDebugInterface.h"
|
||||
#include "GPU/Common/IndexGenerator.h"
|
||||
@ -155,8 +154,8 @@ private:
|
||||
|
||||
LPDIRECT3DDEVICE9 device_ = nullptr;
|
||||
|
||||
std::unordered_map<u32, VertexArrayInfoDX9 *> vai_;
|
||||
std::unordered_map<u32, IDirect3DVertexDeclaration9 *> vertexDeclMap_;
|
||||
PrehashMap<VertexArrayInfoDX9 *> vai_;
|
||||
DenseHashMap<u32, IDirect3DVertexDeclaration9 *> vertexDeclMap_;
|
||||
|
||||
// SimpleVertex
|
||||
IDirect3DVertexDeclaration9* transformedVertexDecl_ = nullptr;
|
||||
|
@ -115,7 +115,7 @@ enum {
|
||||
|
||||
enum { VAI_KILL_AGE = 120, VAI_UNRELIABLE_KILL_AGE = 240, VAI_UNRELIABLE_KILL_MAX = 4 };
|
||||
|
||||
DrawEngineGLES::DrawEngineGLES() {
|
||||
DrawEngineGLES::DrawEngineGLES() : vai_(256) {
|
||||
|
||||
decOptions_.expandAllWeightsToFloat = false;
|
||||
decOptions_.expand8BitNormalsToFloat = false;
|
||||
@ -344,11 +344,11 @@ void DrawEngineGLES::MarkUnreliable(VertexArrayInfo *vai) {
|
||||
}
|
||||
|
||||
void DrawEngineGLES::ClearTrackedVertexArrays() {
|
||||
for (auto vai = vai_.begin(); vai != vai_.end(); vai++) {
|
||||
FreeVertexArray(vai->second);
|
||||
delete vai->second;
|
||||
}
|
||||
vai_.clear();
|
||||
vai_.Iterate([&](uint32_t hash, VertexArrayInfo *vai){
|
||||
FreeVertexArray(vai);
|
||||
delete vai;
|
||||
});
|
||||
vai_.Clear();
|
||||
}
|
||||
|
||||
void DrawEngineGLES::DecimateTrackedVertexArrays() {
|
||||
@ -361,22 +361,20 @@ void DrawEngineGLES::DecimateTrackedVertexArrays() {
|
||||
const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
|
||||
const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
|
||||
int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
|
||||
for (auto iter = vai_.begin(); iter != vai_.end(); ) {
|
||||
vai_.Iterate([&](uint32_t hash, VertexArrayInfo *vai) {
|
||||
bool kill;
|
||||
if (iter->second->status == VertexArrayInfo::VAI_UNRELIABLE) {
|
||||
if (vai->status == VertexArrayInfo::VAI_UNRELIABLE) {
|
||||
// We limit killing unreliable so we don't rehash too often.
|
||||
kill = iter->second->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
|
||||
kill = vai->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
|
||||
} else {
|
||||
kill = iter->second->lastFrame < threshold;
|
||||
kill = vai->lastFrame < threshold;
|
||||
}
|
||||
if (kill) {
|
||||
FreeVertexArray(iter->second);
|
||||
delete iter->second;
|
||||
vai_.erase(iter++);
|
||||
} else {
|
||||
++iter;
|
||||
FreeVertexArray(vai);
|
||||
delete vai;
|
||||
vai_.Remove(hash);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
GLuint DrawEngineGLES::AllocateBuffer(size_t sz) {
|
||||
@ -460,8 +458,6 @@ void DrawEngineGLES::DoFlush() {
|
||||
PROFILE_THIS_SCOPE("flush");
|
||||
CHECK_GL_ERROR_IF_DEBUG();
|
||||
|
||||
|
||||
|
||||
gpuStats.numFlushes++;
|
||||
gpuStats.numTrackedVertexArrays = (int)vai_.size();
|
||||
|
||||
@ -485,14 +481,10 @@ void DrawEngineGLES::DoFlush() {
|
||||
|
||||
if (useCache) {
|
||||
u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
|
||||
auto iter = vai_.find(id);
|
||||
VertexArrayInfo *vai;
|
||||
if (iter != vai_.end()) {
|
||||
// We've seen this before. Could have been a cached draw.
|
||||
vai = iter->second;
|
||||
} else {
|
||||
VertexArrayInfo *vai = vai_.Get(id);
|
||||
if (!vai) {
|
||||
vai = new VertexArrayInfo();
|
||||
vai_[id] = vai;
|
||||
vai_.Insert(id, vai);
|
||||
}
|
||||
|
||||
switch (vai->status) {
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <Common/Hashmaps.h>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "GPU/GPUState.h"
|
||||
@ -165,7 +166,7 @@ private:
|
||||
|
||||
void MarkUnreliable(VertexArrayInfo *vai);
|
||||
|
||||
std::unordered_map<u32, VertexArrayInfo *> vai_;
|
||||
PrehashMap<VertexArrayInfo *> vai_;
|
||||
|
||||
// Vertex buffer objects
|
||||
// Element buffer objects
|
||||
|
@ -78,7 +78,8 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *dra
|
||||
: vulkan_(vulkan),
|
||||
draw_(draw),
|
||||
curFrame_(0),
|
||||
stats_{} {
|
||||
stats_{},
|
||||
vai_(1024) {
|
||||
decOptions_.expandAllWeightsToFloat = false;
|
||||
decOptions_.expand8BitNormalsToFloat = false;
|
||||
|
||||
@ -316,34 +317,33 @@ void DrawEngineVulkan::BeginFrame() {
|
||||
vertexCache_->Destroy(vulkan_);
|
||||
delete vertexCache_; // orphans the buffers, they'll get deleted once no longer used by an in-flight frame.
|
||||
vertexCache_ = new VulkanPushBuffer(vulkan_, VERTEX_CACHE_SIZE);
|
||||
vai_.clear();
|
||||
vai_.Clear();
|
||||
}
|
||||
|
||||
vertexCache_->BeginNoReset();
|
||||
|
||||
if (--decimationCounter_ <= 0) {
|
||||
vkResetDescriptorPool(vulkan_->GetDevice(), frame->descPool, 0);
|
||||
frame->descSets.clear();
|
||||
frame->descSets.Clear();
|
||||
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
|
||||
|
||||
const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
|
||||
const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
|
||||
int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
|
||||
for (auto iter = vai_.begin(); iter != vai_.end(); ) {
|
||||
vai_.Iterate([&](uint32_t hash, VertexArrayInfoVulkan *vai) {
|
||||
bool kill;
|
||||
if (iter->second->status == VertexArrayInfoVulkan::VAI_UNRELIABLE) {
|
||||
if (vai->status == VertexArrayInfoVulkan::VAI_UNRELIABLE) {
|
||||
// We limit killing unreliable so we don't rehash too often.
|
||||
kill = iter->second->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
|
||||
kill = vai->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
|
||||
} else {
|
||||
kill = iter->second->lastFrame < threshold;
|
||||
kill = vai->lastFrame < threshold;
|
||||
}
|
||||
if (kill) {
|
||||
delete iter->second;
|
||||
vai_.erase(iter++);
|
||||
} else {
|
||||
++iter;
|
||||
// This is actually quite safe.
|
||||
vai_.Remove(hash);
|
||||
delete vai;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@ -513,10 +513,9 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
|
||||
|
||||
FrameData *frame = &frame_[curFrame_];
|
||||
if (!gstate_c.bezier && !gstate_c.spline) { // Has no cache when HW tessellation.
|
||||
auto iter = frame->descSets.find(key);
|
||||
if (iter != frame->descSets.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
VkDescriptorSet d = frame->descSets.Get(key);
|
||||
if (d != VK_NULL_HANDLE)
|
||||
return d;
|
||||
}
|
||||
|
||||
// Didn't find one in the frame descriptor set cache, let's make a new one.
|
||||
@ -607,7 +606,7 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
|
||||
vkUpdateDescriptorSets(vulkan_->GetDevice(), n, writes, 0, nullptr);
|
||||
|
||||
if (!(gstate_c.bezier || gstate_c.spline)) // Avoid caching when HW tessellation.
|
||||
frame->descSets[key] = desc;
|
||||
frame->descSets.Insert(key, desc);
|
||||
return desc;
|
||||
}
|
||||
|
||||
@ -692,14 +691,10 @@ void DrawEngineVulkan::DoFlush() {
|
||||
if (useCache) {
|
||||
PROFILE_THIS_SCOPE("vcache");
|
||||
u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
|
||||
auto iter = vai_.find(id);
|
||||
VertexArrayInfoVulkan *vai;
|
||||
if (iter != vai_.end()) {
|
||||
// We've seen this before. Could have been a cached draw.
|
||||
vai = iter->second;
|
||||
} else {
|
||||
VertexArrayInfoVulkan *vai = vai_.Get(id);
|
||||
if (!vai) {
|
||||
vai = new VertexArrayInfoVulkan();
|
||||
vai_[id] = vai;
|
||||
vai_.Insert(id, vai);
|
||||
}
|
||||
|
||||
switch (vai->status) {
|
||||
|
@ -32,6 +32,8 @@
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "Common/Hashmaps.h"
|
||||
|
||||
#include "GPU/Vulkan/VulkanUtil.h"
|
||||
|
||||
#include "GPU/GPUState.h"
|
||||
@ -195,7 +197,7 @@ private:
|
||||
VulkanPipeline *lastPipeline_;
|
||||
VkDescriptorSet lastDs_ = VK_NULL_HANDLE;
|
||||
|
||||
std::unordered_map<u32, VertexArrayInfoVulkan *> vai_;
|
||||
PrehashMap<VertexArrayInfoVulkan *> vai_;
|
||||
VulkanPushBuffer *vertexCache_;
|
||||
int decimationCounter_ = 0;
|
||||
|
||||
@ -219,12 +221,14 @@ private:
|
||||
|
||||
// We alternate between these.
|
||||
struct FrameData {
|
||||
FrameData() : descSets(1024) {}
|
||||
|
||||
VkDescriptorPool descPool;
|
||||
VulkanPushBuffer *pushUBO;
|
||||
VulkanPushBuffer *pushVertex;
|
||||
VulkanPushBuffer *pushIndex;
|
||||
// We do rolling allocation and reset instead of caching across frames. That we might do later.
|
||||
std::map<DescriptorSetKey, VkDescriptorSet> descSets;
|
||||
DenseHashMap<DescriptorSetKey, VkDescriptorSet> descSets;
|
||||
|
||||
void Destroy(VulkanContext *vulkan);
|
||||
};
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include "GPU/Vulkan/PipelineManagerVulkan.h"
|
||||
#include "GPU/Vulkan/ShaderManagerVulkan.h"
|
||||
|
||||
PipelineManagerVulkan::PipelineManagerVulkan(VulkanContext *vulkan) : vulkan_(vulkan) {
|
||||
PipelineManagerVulkan::PipelineManagerVulkan(VulkanContext *vulkan) : vulkan_(vulkan), pipelines_(256) {
|
||||
pipelineCache_ = vulkan->CreatePipelineCache();
|
||||
}
|
||||
|
||||
@ -23,11 +23,13 @@ void PipelineManagerVulkan::Clear() {
|
||||
// This should kill off all the shaders at once.
|
||||
// This could also be an opportunity to store the whole cache to disk. Will need to also
|
||||
// store the keys.
|
||||
for (auto &iter : pipelines_) {
|
||||
vulkan_->Delete().QueueDeletePipeline(iter.second->pipeline);
|
||||
delete iter.second;
|
||||
}
|
||||
pipelines_.clear();
|
||||
|
||||
pipelines_.Iterate([&](const VulkanPipelineKey &key, VulkanPipeline *value) {
|
||||
vulkan_->Delete().QueueDeletePipeline(value->pipeline);
|
||||
delete value;
|
||||
});
|
||||
|
||||
pipelines_.Clear();
|
||||
}
|
||||
|
||||
void PipelineManagerVulkan::DeviceLost() {
|
||||
@ -305,30 +307,30 @@ VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VkPipelineLayout layo
|
||||
key.vShader = vs->GetModule();
|
||||
key.fShader = fs->GetModule();
|
||||
key.vtxDec = useHwTransform ? vtxDec : nullptr;
|
||||
auto iter = pipelines_.find(key);
|
||||
if (iter != pipelines_.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
auto iter = pipelines_.Get(key);
|
||||
if (iter)
|
||||
return iter;
|
||||
|
||||
PROFILE_THIS_SCOPE("pipelinebuild");
|
||||
|
||||
VulkanPipeline *pipeline = CreateVulkanPipeline(
|
||||
vulkan_->GetDevice(), pipelineCache_, layout, renderPass,
|
||||
rasterKey, vtxDec, vs, fs, useHwTransform);
|
||||
pipelines_[key] = pipeline;
|
||||
pipelines_.Insert(key, pipeline);
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
std::vector<std::string> PipelineManagerVulkan::DebugGetObjectIDs(DebugShaderType type) {
|
||||
std::string id;
|
||||
std::vector<std::string> ids;
|
||||
switch (type) {
|
||||
case SHADER_TYPE_PIPELINE:
|
||||
{
|
||||
for (auto iter : pipelines_) {
|
||||
iter.first.ToString(&id);
|
||||
pipelines_.Iterate([&](const VulkanPipelineKey &key, VulkanPipeline *value) {
|
||||
std::string id;
|
||||
key.ToString(&id);
|
||||
ids.push_back(id);
|
||||
}
|
||||
});
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@ -344,15 +346,15 @@ std::string PipelineManagerVulkan::DebugGetObjectString(std::string id, DebugSha
|
||||
VulkanPipelineKey shaderId;
|
||||
shaderId.FromString(id);
|
||||
|
||||
auto iter = pipelines_.find(shaderId);
|
||||
if (iter == pipelines_.end()) {
|
||||
VulkanPipeline *iter = pipelines_.Get(shaderId);
|
||||
if (!iter) {
|
||||
return "";
|
||||
}
|
||||
|
||||
switch (stringType) {
|
||||
case SHADER_STRING_SHORT_DESC:
|
||||
{
|
||||
return StringFromFormat("%p", &iter->second);
|
||||
return StringFromFormat("%p", iter);
|
||||
}
|
||||
|
||||
case SHADER_STRING_SOURCE_CODE:
|
||||
|
@ -17,7 +17,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include "Common/Hashmaps.h"
|
||||
|
||||
#include "GPU/Common/VertexDecoderCommon.h"
|
||||
#include "GPU/Common/ShaderId.h"
|
||||
@ -97,7 +97,7 @@ public:
|
||||
std::vector<std::string> DebugGetObjectIDs(DebugShaderType type);
|
||||
|
||||
private:
|
||||
std::map<VulkanPipelineKey, VulkanPipeline *> pipelines_;
|
||||
DenseHashMap<VulkanPipelineKey, VulkanPipeline *> pipelines_;
|
||||
VkPipelineCache pipelineCache_;
|
||||
VulkanContext *vulkan_;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user