Replace the most critical unordered_maps and maps with two new maps: FastHashMap and PrehashMap.

This commit is contained in:
Henrik Rydgård 2017-08-20 11:30:19 +02:00
parent cbd107b03d
commit e0e13e191f
13 changed files with 403 additions and 153 deletions

View File

@ -22,7 +22,8 @@
<ProjectGuid>{3FCDBAE2-5103-4350-9A8E-848CE9C73195}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>Common</RootNamespace>
<WindowsTargetPlatformVersion></WindowsTargetPlatformVersion>
<WindowsTargetPlatformVersion>
</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
@ -226,6 +227,7 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="GraphicsContext.h" />
<ClInclude Include="Hashmaps.h" />
<ClInclude Include="KeyMap.h" />
<ClInclude Include="Log.h" />
<ClInclude Include="LogManager.h" />

View File

@ -74,6 +74,7 @@
<Filter>Vulkan</Filter>
</ClInclude>
<ClInclude Include="OSVersion.h" />
<ClInclude Include="Hashmaps.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="stdafx.cpp" />

271
Common/Hashmaps.h Normal file
View File

@ -0,0 +1,271 @@
#pragma once
#include "ext/xxhash.h"
#include <functional>
// Whatever random value.
const uint32_t hashmapSeed = 0x23B58532;
// TODO: Try hardware CRC. Unfortunately not available on older Intels or ARM32.
// Seems to be ubiquitous on ARM64 though.
template<class K>
inline uint32_t HashKey(const K &k) {
return XXH32(&k, sizeof(k), hashmapSeed);
}
template<class K>
inline bool KeyEquals(const K &a, const K &b) {
return !memcmp(&a, &b, sizeof(K));
}
enum class BucketState {
FREE,
TAKEN,
REMOVED, // for linear probing to work we need tombstones
};
// Uses linear probing for cache-friendliness. Not segregating values from keys because
// we always use very small values, so it's probably better to have them in the same
// cache-line as the corresponding key.
// Enforces that value are pointers to make sure that combined storage makes sense.
template <class Key, class Value>
class DenseHashMap {
public:
DenseHashMap(int initialCapacity) : capacity_(initialCapacity) {
map.resize(initialCapacity);
}
// Returns nullptr if no entry was found.
Value Get(const Key &key) {
uint32_t mask = capacity_ - 1;
uint32_t pos = HashKey(key) & mask;
// No? Let's go into search mode. Linear probing.
uint32_t p = pos;
while (true) {
if (map[p].state == BucketState::TAKEN && KeyEquals(key, map[p].key))
return map[p].value;
else if (map[p].state == BucketState::FREE)
return nullptr;
p = (p + 1) & mask; // If the state is REMOVED, we just keep on walking.
if (p == pos)
DebugBreak();
}
return nullptr;
}
// Returns false if we already had the key! Which is a bit different.
bool Insert(const Key &key, Value value) {
// Check load factor, resize if necessary. We never shrink.
if (count_ > capacity_ / 2) {
Grow();
}
uint32_t mask = capacity_ - 1;
uint32_t pos = HashKey(key) & mask;
uint32_t p = pos;
while (true) {
if (map[p].state == BucketState::TAKEN) {
if (KeyEquals(key, map[p].key)) {
DebugBreak(); // Bad! We already got this one. Let's avoid this case.
return false;
}
// continue looking....
} else {
// Got a place, either removed or FREE.
break;
}
p = (p + 1) & mask;
if (p == pos) {
// FULL! Error. Should not happen thanks to Grow().
DebugBreak();
}
}
map[p].state = BucketState::TAKEN;
map[p].key = key;
map[p].value = value;
count_++;
return true;
}
void Remove(const Key &key) {
uint32_t mask = capacity_ - 1;
uint32_t pos = HashKey(key) & mask;
uint32_t p = pos;
while (map[p].state != BucketState::FREE) {
if (map[p].state == BucketState::TAKEN && KeyEquals(key, map[p].key)) {
// Got it! Mark it as removed.
map[p].state = BucketState::REMOVED;
count_--;
return;
}
p = (p + 1) & mask;
if (p == pos) {
// FULL! Error. Should not happen.
DebugBreak();
}
}
}
size_t size() const {
return count_;
}
// TODO: Find a way to avoid std::function. I tried using a templated argument
// but couldn't get it to pass the compiler.
inline void Iterate(std::function<void(const typename Key &key, typename Value value)> func) {
for (auto &iter : map) {
if (iter.state == BucketState::TAKEN) {
func(iter.key, iter.value);
}
}
}
void Clear() {
// TODO: Speedup?
map.clear();
map.resize(capacity_);
}
private:
void Grow() {
// We simply move out the existing data, then we re-insert the old.
// This is extremely non-atomic and will need synchronization.
std::vector<Pair> old = std::move(map);
capacity_ *= 2;
map.clear();
map.resize(capacity_);
count_ = 0; // Insert will update it.
for (auto &iter : old) {
if (iter.state == BucketState::TAKEN) {
Insert(iter.key, iter.value);
}
}
}
struct Pair {
BucketState state;
Key key;
Value value;
};
std::vector<Pair> map;
int capacity_;
int count_ = 0;
};
// Like the above, uses linear probing for cache-friendliness.
// Does not perform hashing at all so expects well-distributed keys.
template <class Value>
class PrehashMap {
public:
PrehashMap(int initialCapacity) : capacity_(initialCapacity) {
map.resize(initialCapacity);
}
// Returns nullptr if no entry was found.
Value Get(uint32_t hash) {
uint32_t mask = capacity_ - 1;
uint32_t pos = hash & mask;
// No? Let's go into search mode. Linear probing.
uint32_t p = pos;
while (true) {
if (map[p].state == BucketState::TAKEN && hash == map[p].hash)
return map[p].value;
else if (map[p].state == BucketState::FREE)
return nullptr;
p = (p + 1) & mask; // If the state is REMOVED, we just keep on walking.
if (p == pos)
DebugBreak();
}
return nullptr;
}
// Returns false if we already had the key! Which is a bit different.
bool Insert(uint32_t hash, Value value) {
// Check load factor, resize if necessary. We never shrink.
if (count_ > capacity_ / 2) {
Grow();
}
uint32_t mask = capacity_ - 1;
uint32_t pos = hash & mask;
uint32_t p = pos;
while (map[p].state != BucketState::FREE) {
if (map[p].state == BucketState::TAKEN) {
if (hash == map[p].hash)
return false; // Bad!
} else {
// Got a place, either removed or FREE.
break;
}
p = (p + 1) & mask;
if (p == pos) {
// FULL! Error. Should not happen thanks to Grow().
DebugBreak();
}
}
map[p].state = BucketState::TAKEN;
map[p].hash = hash;
map[p].value = value;
count_++;
return true;
}
void Remove(uint32_t hash) {
uint32_t mask = capacity_ - 1;
uint32_t pos = hash & mask;
uint32_t p = pos;
while (map[p].state != BucketState::FREE) {
if (map[p].state == BucketState::TAKEN && hash == map[p].hash) {
// Got it!
map[p].state = BucketState::REMOVED;
count_--;
return;
}
p = (p + 1) & mask;
if (p == pos) {
// FULL! Error. Should not happen.
DebugBreak();
}
}
}
size_t size() {
return count_;
}
// TODO: Find a way to avoid std::function. I tried using a templated argument
// but couldn't get it to pass the compiler.
void Iterate(std::function<void(uint32_t hash, typename Value value)> func) {
for (auto &iter : map) {
if (iter.state == BucketState::TAKEN) {
func(iter.hash, iter.value);
}
}
}
void Clear() {
// TODO: Speedup?
map.clear();
map.resize(capacity_);
}
private:
void Grow() {
// We simply move out the existing data, then we re-insert the old.
// This is extremely non-atomic and will need synchronization.
std::vector<Pair> old = std::move(map);
capacity_ *= 2;
map.clear();
map.resize(capacity_);
for (auto &iter : old) {
if (iter.state == BucketState::TAKEN) {
Insert(iter.hash, iter.value);
}
}
}
struct Pair {
BucketState state;
uint32_t hash;
Value value;
};
std::vector<Pair> map;
int capacity_;
int count_ = 0;
};

View File

@ -72,8 +72,9 @@ static const D3D11_INPUT_ELEMENT_DESC TransformedVertexElements[] = {
DrawEngineD3D11::DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device, ID3D11DeviceContext *context)
: draw_(draw),
device_(device),
context_(context)
{
context_(context),
vai_(256),
inputLayoutMap_(32) {
device1_ = (ID3D11Device1 *)draw->GetNativeObject(Draw::NativeObject::DEVICE_EX);
context1_ = (ID3D11DeviceContext1 *)draw->GetNativeObject(Draw::NativeObject::CONTEXT_EX);
decOptions_.expandAllWeightsToFloat = true;
@ -111,18 +112,18 @@ void DrawEngineD3D11::InitDeviceObjects() {
}
void DrawEngineD3D11::ClearTrackedVertexArrays() {
for (auto &vai : vai_) {
delete vai.second;
}
vai_.clear();
vai_.Iterate([&](uint32_t hash, VertexArrayInfoD3D11 *vai){
delete vai;
});
vai_.Clear();
}
void DrawEngineD3D11::ClearInputLayoutMap() {
for (auto &decl : inputLayoutMap_) {
if (decl.second)
decl.second->Release();
}
inputLayoutMap_.clear();
inputLayoutMap_.Iterate([&](const InputLayoutKey &key, ID3D11InputLayout *il) {
if (il)
il->Release();
});
inputLayoutMap_.Clear();
}
void DrawEngineD3D11::Resized() {
@ -190,8 +191,10 @@ ID3D11InputLayout *DrawEngineD3D11::SetupDecFmtForDraw(D3D11VertexShader *vshade
// TODO: Instead of one for each vshader, we can reduce it to one for each type of shader
// that reads TEXCOORD or not, etc. Not sure if worth it.
InputLayoutKey key{ vshader, pspFmt };
auto vertexDeclCached = inputLayoutMap_.find(key);
if (vertexDeclCached == inputLayoutMap_.end()) {
ID3D11InputLayout *inputLayout = inputLayoutMap_.Get(key);
if (inputLayout) {
return inputLayout;
} else {
D3D11_INPUT_ELEMENT_DESC VertexElements[8];
D3D11_INPUT_ELEMENT_DESC *VertexElement = &VertexElements[0];
@ -236,7 +239,6 @@ ID3D11InputLayout *DrawEngineD3D11::SetupDecFmtForDraw(D3D11VertexShader *vshade
VertexElement++;
// Create declaration
ID3D11InputLayout *inputLayout = nullptr;
HRESULT hr = device_->CreateInputLayout(VertexElements, VertexElement - VertexElements, vshader->bytecode().data(), vshader->bytecode().size(), &inputLayout);
if (FAILED(hr)) {
ERROR_LOG(G3D, "Failed to create input layout!");
@ -244,11 +246,8 @@ ID3D11InputLayout *DrawEngineD3D11::SetupDecFmtForDraw(D3D11VertexShader *vshade
}
// Add it to map
inputLayoutMap_[key] = inputLayout;
inputLayoutMap_.Insert(key, inputLayout);
return inputLayout;
} else {
// Set it from map
return vertexDeclCached->second;
}
}
@ -375,21 +374,19 @@ void DrawEngineD3D11::BeginFrame() {
const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
for (auto iter = vai_.begin(); iter != vai_.end(); ) {
vai_.Iterate([&](uint32_t hash, VertexArrayInfoD3D11 *vai){
bool kill;
if (iter->second->status == VertexArrayInfoD3D11::VAI_UNRELIABLE) {
if (vai->status == VertexArrayInfoD3D11::VAI_UNRELIABLE) {
// We limit killing unreliable so we don't rehash too often.
kill = iter->second->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
kill = vai->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
} else {
kill = iter->second->lastFrame < threshold;
kill = vai->lastFrame < threshold;
}
if (kill) {
delete iter->second;
vai_.erase(iter++);
} else {
++iter;
delete vai;
vai_.Remove(hash);
}
}
});
// Enable if you want to see vertex decoders in the log output. Need a better way.
#if 0
@ -443,14 +440,11 @@ void DrawEngineD3D11::DoFlush() {
if (useCache) {
u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
auto iter = vai_.find(id);
VertexArrayInfoD3D11 *vai;
if (iter != vai_.end()) {
// We've seen this before. Could have been a cached draw.
vai = iter->second;
} else {
VertexArrayInfoD3D11 *vai = vai_.Get(id);
if (!vai) {
vai = new VertexArrayInfoD3D11();
vai_[id] = vai;
vai_.Insert(id, vai);
}
switch (vai->status) {
@ -709,13 +703,10 @@ rotateVBO:
// We really do need a vertex layout for each vertex shader (or at least check its ID bits for what inputs it uses)!
// Some vertex shaders ignore one of the inputs, and then the layout created from it will lack it, which will be a problem for others.
InputLayoutKey key{ vshader, 0xFFFFFFFF }; // Let's use 0xFFFFFFFF to signify TransformedVertex
auto iter = inputLayoutMap_.find(key);
ID3D11InputLayout *layout;
if (iter == inputLayoutMap_.end()) {
ID3D11InputLayout *layout = inputLayoutMap_.Get(key);
if (!layout) {
ASSERT_SUCCESS(device_->CreateInputLayout(TransformedVertexElements, ARRAY_SIZE(TransformedVertexElements), vshader->bytecode().data(), vshader->bytecode().size(), &layout));
inputLayoutMap_[key] = layout;
} else {
layout = iter->second;
inputLayoutMap_.Insert(key, layout);
}
context_->IASetInputLayout(layout);
context_->IASetPrimitiveTopology(d3d11prim[prim]);

View File

@ -17,11 +17,10 @@
#pragma once
#include <unordered_map>
#include <d3d11.h>
#include <d3d11_1.h>
#include "Common/Hashmaps.h"
#include "GPU/GPUState.h"
#include "GPU/Common/GPUDebugInterface.h"
#include "GPU/Common/IndexGenerator.h"
@ -168,7 +167,7 @@ private:
ID3D11DeviceContext *context_;
ID3D11DeviceContext1 *context1_;
std::unordered_map<u32, VertexArrayInfoD3D11 *> vai_;
PrehashMap<VertexArrayInfoD3D11 *> vai_;
struct InputLayoutKey {
D3D11VertexShader *vshader;
@ -182,7 +181,7 @@ private:
}
};
std::map<InputLayoutKey, ID3D11InputLayout *> inputLayoutMap_;
DenseHashMap<InputLayoutKey, ID3D11InputLayout *> inputLayoutMap_;
// Other
ShaderManagerD3D11 *shaderManager_ = nullptr;

View File

@ -84,7 +84,7 @@ static const D3DVERTEXELEMENT9 TransformedVertexElements[] = {
D3DDECL_END()
};
DrawEngineDX9::DrawEngineDX9(Draw::DrawContext *draw) {
DrawEngineDX9::DrawEngineDX9(Draw::DrawContext *draw) : vai_(256), vertexDeclMap_(64) {
device_ = (LPDIRECT3DDEVICE9)draw->GetNativeObject(Draw::NativeObject::DEVICE);
decOptions_.expandAllWeightsToFloat = true;
decOptions_.expand8BitNormalsToFloat = true;
@ -115,12 +115,12 @@ DrawEngineDX9::~DrawEngineDX9() {
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
FreeMemoryPages(splineBuffer, SPLINE_BUFFER_SIZE);
for (auto decl = vertexDeclMap_.begin(); decl != vertexDeclMap_.end(); ++decl) {
if (decl->second) {
decl->second->Release();
vertexDeclMap_.Iterate([&](const uint32_t &key, IDirect3DVertexDeclaration9 *decl) {
if (decl) {
decl->Release();
}
}
});
vertexDeclMap_.Clear();
delete tessDataTransfer;
}
@ -169,9 +169,11 @@ static void VertexAttribSetup(D3DVERTEXELEMENT9 * VertexElement, u8 fmt, u8 offs
}
IDirect3DVertexDeclaration9 *DrawEngineDX9::SetupDecFmtForDraw(VSShader *vshader, const DecVtxFormat &decFmt, u32 pspFmt) {
auto vertexDeclCached = vertexDeclMap_.find(pspFmt);
IDirect3DVertexDeclaration9 *vertexDeclCached = vertexDeclMap_.Get(pspFmt);
if (vertexDeclCached == vertexDeclMap_.end()) {
if (vertexDeclCached) {
return vertexDeclCached;
} else {
D3DVERTEXELEMENT9 VertexElements[8];
D3DVERTEXELEMENT9 *VertexElement = &VertexElements[0];
@ -228,11 +230,8 @@ IDirect3DVertexDeclaration9 *DrawEngineDX9::SetupDecFmtForDraw(VSShader *vshader
}
// Add it to map
vertexDeclMap_[pspFmt] = pHardwareVertexDecl;
vertexDeclMap_.Insert(pspFmt, pHardwareVertexDecl);
return pHardwareVertexDecl;
} else {
// Set it from map
return vertexDeclCached->second;
}
}
@ -329,10 +328,10 @@ void DrawEngineDX9::MarkUnreliable(VertexArrayInfoDX9 *vai) {
}
void DrawEngineDX9::ClearTrackedVertexArrays() {
for (auto vai = vai_.begin(); vai != vai_.end(); vai++) {
delete vai->second;
}
vai_.clear();
vai_.Iterate([&](uint32_t hash, DX9::VertexArrayInfoDX9 *vai) {
delete vai;
});
vai_.Clear();
}
void DrawEngineDX9::DecimateTrackedVertexArrays() {
@ -345,21 +344,19 @@ void DrawEngineDX9::DecimateTrackedVertexArrays() {
const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
for (auto iter = vai_.begin(); iter != vai_.end(); ) {
vai_.Iterate([&](uint32_t hash, DX9::VertexArrayInfoDX9 *vai) {
bool kill;
if (iter->second->status == VertexArrayInfoDX9::VAI_UNRELIABLE) {
if (vai->status == VertexArrayInfoDX9::VAI_UNRELIABLE) {
// We limit killing unreliable so we don't rehash too often.
kill = iter->second->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
kill = vai->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
} else {
kill = iter->second->lastFrame < threshold;
kill = vai->lastFrame < threshold;
}
if (kill) {
delete iter->second;
vai_.erase(iter++);
} else {
++iter;
delete vai;
vai_.Remove(hash);
}
}
});
// Enable if you want to see vertex decoders in the log output. Need a better way.
#if 0
@ -415,14 +412,10 @@ void DrawEngineDX9::DoFlush() {
if (useCache) {
u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
auto iter = vai_.find(id);
VertexArrayInfoDX9 *vai;
if (iter != vai_.end()) {
// We've seen this before. Could have been a cached draw.
vai = iter->second;
} else {
VertexArrayInfoDX9 *vai = vai_.Get(id);
if (!vai) {
vai = new VertexArrayInfoDX9();
vai_[id] = vai;
vai_.Insert(id, vai);
}
switch (vai->status) {

View File

@ -17,10 +17,9 @@
#pragma once
#include <unordered_map>
#include <d3d9.h>
#include "Common/Hashmaps.h"
#include "GPU/GPUState.h"
#include "GPU/Common/GPUDebugInterface.h"
#include "GPU/Common/IndexGenerator.h"
@ -155,8 +154,8 @@ private:
LPDIRECT3DDEVICE9 device_ = nullptr;
std::unordered_map<u32, VertexArrayInfoDX9 *> vai_;
std::unordered_map<u32, IDirect3DVertexDeclaration9 *> vertexDeclMap_;
PrehashMap<VertexArrayInfoDX9 *> vai_;
DenseHashMap<u32, IDirect3DVertexDeclaration9 *> vertexDeclMap_;
// SimpleVertex
IDirect3DVertexDeclaration9* transformedVertexDecl_ = nullptr;

View File

@ -115,7 +115,7 @@ enum {
enum { VAI_KILL_AGE = 120, VAI_UNRELIABLE_KILL_AGE = 240, VAI_UNRELIABLE_KILL_MAX = 4 };
DrawEngineGLES::DrawEngineGLES() {
DrawEngineGLES::DrawEngineGLES() : vai_(256) {
decOptions_.expandAllWeightsToFloat = false;
decOptions_.expand8BitNormalsToFloat = false;
@ -344,11 +344,11 @@ void DrawEngineGLES::MarkUnreliable(VertexArrayInfo *vai) {
}
void DrawEngineGLES::ClearTrackedVertexArrays() {
for (auto vai = vai_.begin(); vai != vai_.end(); vai++) {
FreeVertexArray(vai->second);
delete vai->second;
}
vai_.clear();
vai_.Iterate([&](uint32_t hash, VertexArrayInfo *vai){
FreeVertexArray(vai);
delete vai;
});
vai_.Clear();
}
void DrawEngineGLES::DecimateTrackedVertexArrays() {
@ -361,22 +361,20 @@ void DrawEngineGLES::DecimateTrackedVertexArrays() {
const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
for (auto iter = vai_.begin(); iter != vai_.end(); ) {
vai_.Iterate([&](uint32_t hash, VertexArrayInfo *vai) {
bool kill;
if (iter->second->status == VertexArrayInfo::VAI_UNRELIABLE) {
if (vai->status == VertexArrayInfo::VAI_UNRELIABLE) {
// We limit killing unreliable so we don't rehash too often.
kill = iter->second->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
kill = vai->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
} else {
kill = iter->second->lastFrame < threshold;
kill = vai->lastFrame < threshold;
}
if (kill) {
FreeVertexArray(iter->second);
delete iter->second;
vai_.erase(iter++);
} else {
++iter;
FreeVertexArray(vai);
delete vai;
vai_.Remove(hash);
}
}
});
}
GLuint DrawEngineGLES::AllocateBuffer(size_t sz) {
@ -460,8 +458,6 @@ void DrawEngineGLES::DoFlush() {
PROFILE_THIS_SCOPE("flush");
CHECK_GL_ERROR_IF_DEBUG();
gpuStats.numFlushes++;
gpuStats.numTrackedVertexArrays = (int)vai_.size();
@ -485,14 +481,10 @@ void DrawEngineGLES::DoFlush() {
if (useCache) {
u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
auto iter = vai_.find(id);
VertexArrayInfo *vai;
if (iter != vai_.end()) {
// We've seen this before. Could have been a cached draw.
vai = iter->second;
} else {
VertexArrayInfo *vai = vai_.Get(id);
if (!vai) {
vai = new VertexArrayInfo();
vai_[id] = vai;
vai_.Insert(id, vai);
}
switch (vai->status) {

View File

@ -17,6 +17,7 @@
#pragma once
#include <Common/Hashmaps.h>
#include <unordered_map>
#include "GPU/GPUState.h"
@ -165,7 +166,7 @@ private:
void MarkUnreliable(VertexArrayInfo *vai);
std::unordered_map<u32, VertexArrayInfo *> vai_;
PrehashMap<VertexArrayInfo *> vai_;
// Vertex buffer objects
// Element buffer objects

View File

@ -78,7 +78,8 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *dra
: vulkan_(vulkan),
draw_(draw),
curFrame_(0),
stats_{} {
stats_{},
vai_(1024) {
decOptions_.expandAllWeightsToFloat = false;
decOptions_.expand8BitNormalsToFloat = false;
@ -316,34 +317,33 @@ void DrawEngineVulkan::BeginFrame() {
vertexCache_->Destroy(vulkan_);
delete vertexCache_; // orphans the buffers, they'll get deleted once no longer used by an in-flight frame.
vertexCache_ = new VulkanPushBuffer(vulkan_, VERTEX_CACHE_SIZE);
vai_.clear();
vai_.Clear();
}
vertexCache_->BeginNoReset();
if (--decimationCounter_ <= 0) {
vkResetDescriptorPool(vulkan_->GetDevice(), frame->descPool, 0);
frame->descSets.clear();
frame->descSets.Clear();
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
for (auto iter = vai_.begin(); iter != vai_.end(); ) {
vai_.Iterate([&](uint32_t hash, VertexArrayInfoVulkan *vai) {
bool kill;
if (iter->second->status == VertexArrayInfoVulkan::VAI_UNRELIABLE) {
if (vai->status == VertexArrayInfoVulkan::VAI_UNRELIABLE) {
// We limit killing unreliable so we don't rehash too often.
kill = iter->second->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
kill = vai->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
} else {
kill = iter->second->lastFrame < threshold;
kill = vai->lastFrame < threshold;
}
if (kill) {
delete iter->second;
vai_.erase(iter++);
} else {
++iter;
// This is actually quite safe.
vai_.Remove(hash);
delete vai;
}
}
});
}
}
@ -513,10 +513,9 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
FrameData *frame = &frame_[curFrame_];
if (!gstate_c.bezier && !gstate_c.spline) { // Has no cache when HW tessellation.
auto iter = frame->descSets.find(key);
if (iter != frame->descSets.end()) {
return iter->second;
}
VkDescriptorSet d = frame->descSets.Get(key);
if (d != VK_NULL_HANDLE)
return d;
}
// Didn't find one in the frame descriptor set cache, let's make a new one.
@ -607,7 +606,7 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
vkUpdateDescriptorSets(vulkan_->GetDevice(), n, writes, 0, nullptr);
if (!(gstate_c.bezier || gstate_c.spline)) // Avoid caching when HW tessellation.
frame->descSets[key] = desc;
frame->descSets.Insert(key, desc);
return desc;
}
@ -692,14 +691,10 @@ void DrawEngineVulkan::DoFlush() {
if (useCache) {
PROFILE_THIS_SCOPE("vcache");
u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
auto iter = vai_.find(id);
VertexArrayInfoVulkan *vai;
if (iter != vai_.end()) {
// We've seen this before. Could have been a cached draw.
vai = iter->second;
} else {
VertexArrayInfoVulkan *vai = vai_.Get(id);
if (!vai) {
vai = new VertexArrayInfoVulkan();
vai_[id] = vai;
vai_.Insert(id, vai);
}
switch (vai->status) {

View File

@ -32,6 +32,8 @@
#include <map>
#include <unordered_map>
#include "Common/Hashmaps.h"
#include "GPU/Vulkan/VulkanUtil.h"
#include "GPU/GPUState.h"
@ -195,7 +197,7 @@ private:
VulkanPipeline *lastPipeline_;
VkDescriptorSet lastDs_ = VK_NULL_HANDLE;
std::unordered_map<u32, VertexArrayInfoVulkan *> vai_;
PrehashMap<VertexArrayInfoVulkan *> vai_;
VulkanPushBuffer *vertexCache_;
int decimationCounter_ = 0;
@ -219,12 +221,14 @@ private:
// We alternate between these.
struct FrameData {
FrameData() : descSets(1024) {}
VkDescriptorPool descPool;
VulkanPushBuffer *pushUBO;
VulkanPushBuffer *pushVertex;
VulkanPushBuffer *pushIndex;
// We do rolling allocation and reset instead of caching across frames. That we might do later.
std::map<DescriptorSetKey, VkDescriptorSet> descSets;
DenseHashMap<DescriptorSetKey, VkDescriptorSet> descSets;
void Destroy(VulkanContext *vulkan);
};

View File

@ -9,7 +9,7 @@
#include "GPU/Vulkan/PipelineManagerVulkan.h"
#include "GPU/Vulkan/ShaderManagerVulkan.h"
PipelineManagerVulkan::PipelineManagerVulkan(VulkanContext *vulkan) : vulkan_(vulkan) {
PipelineManagerVulkan::PipelineManagerVulkan(VulkanContext *vulkan) : vulkan_(vulkan), pipelines_(256) {
pipelineCache_ = vulkan->CreatePipelineCache();
}
@ -23,11 +23,13 @@ void PipelineManagerVulkan::Clear() {
// This should kill off all the shaders at once.
// This could also be an opportunity to store the whole cache to disk. Will need to also
// store the keys.
for (auto &iter : pipelines_) {
vulkan_->Delete().QueueDeletePipeline(iter.second->pipeline);
delete iter.second;
}
pipelines_.clear();
pipelines_.Iterate([&](const VulkanPipelineKey &key, VulkanPipeline *value) {
vulkan_->Delete().QueueDeletePipeline(value->pipeline);
delete value;
});
pipelines_.Clear();
}
void PipelineManagerVulkan::DeviceLost() {
@ -305,30 +307,30 @@ VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VkPipelineLayout layo
key.vShader = vs->GetModule();
key.fShader = fs->GetModule();
key.vtxDec = useHwTransform ? vtxDec : nullptr;
auto iter = pipelines_.find(key);
if (iter != pipelines_.end()) {
return iter->second;
}
auto iter = pipelines_.Get(key);
if (iter)
return iter;
PROFILE_THIS_SCOPE("pipelinebuild");
VulkanPipeline *pipeline = CreateVulkanPipeline(
vulkan_->GetDevice(), pipelineCache_, layout, renderPass,
rasterKey, vtxDec, vs, fs, useHwTransform);
pipelines_[key] = pipeline;
pipelines_.Insert(key, pipeline);
return pipeline;
}
std::vector<std::string> PipelineManagerVulkan::DebugGetObjectIDs(DebugShaderType type) {
std::string id;
std::vector<std::string> ids;
switch (type) {
case SHADER_TYPE_PIPELINE:
{
for (auto iter : pipelines_) {
iter.first.ToString(&id);
pipelines_.Iterate([&](const VulkanPipelineKey &key, VulkanPipeline *value) {
std::string id;
key.ToString(&id);
ids.push_back(id);
}
});
}
break;
default:
@ -344,15 +346,15 @@ std::string PipelineManagerVulkan::DebugGetObjectString(std::string id, DebugSha
VulkanPipelineKey shaderId;
shaderId.FromString(id);
auto iter = pipelines_.find(shaderId);
if (iter == pipelines_.end()) {
VulkanPipeline *iter = pipelines_.Get(shaderId);
if (!iter) {
return "";
}
switch (stringType) {
case SHADER_STRING_SHORT_DESC:
{
return StringFromFormat("%p", &iter->second);
return StringFromFormat("%p", iter);
}
case SHADER_STRING_SOURCE_CODE:

View File

@ -17,7 +17,7 @@
#pragma once
#include <map>
#include "Common/Hashmaps.h"
#include "GPU/Common/VertexDecoderCommon.h"
#include "GPU/Common/ShaderId.h"
@ -97,7 +97,7 @@ public:
std::vector<std::string> DebugGetObjectIDs(DebugShaderType type);
private:
std::map<VulkanPipelineKey, VulkanPipeline *> pipelines_;
DenseHashMap<VulkanPipelineKey, VulkanPipeline *> pipelines_;
VkPipelineCache pipelineCache_;
VulkanContext *vulkan_;
};