Merge pull request #16486 from unknownbrackets/softgpu-opt

softgpu: Apply optimizations to states generically
This commit is contained in:
Henrik Rydgård 2022-12-03 11:08:10 +01:00 committed by GitHub
commit 4589473231
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 317 additions and 96 deletions

View File

@ -169,12 +169,14 @@ void BinManager::UpdateState() {
if (HasDirty(SoftDirty::PIXEL_ALL | SoftDirty::SAMPLER_ALL | SoftDirty::RAST_ALL)) {
if (states_.Full())
Flush("states");
creatingState_ = true;
stateIndex_ = (uint16_t)states_.Push(RasterizerState());
// When new funcs are compiled, we need to flush if WX exclusive.
ComputeRasterizerState(&states_[stateIndex_], [&]() {
Flush("compile");
});
states_[stateIndex_].samplerID.cached.clut = cluts_[clutIndex_].readable;
creatingState_ = false;
ClearDirty(SoftDirty::PIXEL_ALL | SoftDirty::SAMPLER_ALL | SoftDirty::RAST_ALL);
}
@ -388,6 +390,7 @@ void BinManager::AddTriangle(const VertexData &v0, const VertexData &v1, const V
if (queue_.Full())
Drain();
queue_.Push(BinItem{ BinItemType::TRIANGLE, stateIndex_, range, v0, v1, v2 });
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, v2);
Expand(range);
}
@ -399,6 +402,7 @@ void BinManager::AddClearRect(const VertexData &v0, const VertexData &v1) {
if (queue_.Full())
Drain();
queue_.Push(BinItem{ BinItemType::CLEAR_RECT, stateIndex_, range, v0, v1 });
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, true);
Expand(range);
}
@ -410,6 +414,7 @@ void BinManager::AddRect(const VertexData &v0, const VertexData &v1) {
if (queue_.Full())
Drain();
queue_.Push(BinItem{ BinItemType::RECT, stateIndex_, range, v0, v1 });
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, true);
Expand(range);
}
@ -421,6 +426,7 @@ void BinManager::AddSprite(const VertexData &v0, const VertexData &v1) {
if (queue_.Full())
Drain();
queue_.Push(BinItem{ BinItemType::SPRITE, stateIndex_, range, v0, v1 });
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, true);
Expand(range);
}
@ -432,6 +438,7 @@ void BinManager::AddLine(const VertexData &v0, const VertexData &v1) {
if (queue_.Full())
Drain();
queue_.Push(BinItem{ BinItemType::LINE, stateIndex_, range, v0, v1 });
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, false);
Expand(range);
}
@ -443,6 +450,7 @@ void BinManager::AddPoint(const VertexData &v0) {
if (queue_.Full())
Drain();
queue_.Push(BinItem{ BinItemType::POINT, stateIndex_, range, v0 });
CalculateRasterStateFlags(&states_[stateIndex_], v0);
Expand(range);
}
@ -486,6 +494,10 @@ void BinManager::Drain(bool flushing) {
tasksSplit_ = true;
}
// Let's try to optimize states, if we can.
OptimizePendingStates(pendingStateIndex_, stateIndex_);
pendingStateIndex_ = stateIndex_;
if (taskRanges_.size() <= 1) {
PROFILE_THIS_SCOPE("bin_drain_single");
while (!queue_.Empty()) {
@ -584,6 +596,22 @@ void BinManager::Flush(const char *reason) {
}
}
void BinManager::OptimizePendingStates(uint16_t first, uint16_t last) {
// We can sometimes hit this when compiling new funcs while creating a state.
// At that point, the state isn't loaded fully yet, so don't touch it.
if (creatingState_ && last == stateIndex_) {
if (first == last)
return;
last--;
}
int count = (QUEUED_STATES + last - first) % QUEUED_STATES + 1;
for (int i = 0; i < count; ++i) {
size_t pos = (first + i) % QUEUED_STATES;
OptimizeRasterState(&states_[pos]);
}
}
bool BinManager::HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, uint32_t h) {
// We can only write to VRAM.
if (!Memory::IsVRAMAddress(start))

View File

@ -261,6 +261,8 @@ private:
std::unordered_map<uint32_t, BinDirtyRange> pendingReads_;
bool pendingOverlap_ = false;
bool creatingState_ = false;
uint16_t pendingStateIndex_ = 0;
std::unordered_map<const char *, double> flushReasonTimes_;
std::unordered_map<const char *, double> lastFlushReasonTimes_;
@ -274,6 +276,7 @@ private:
void MarkPendingWrites(const Rasterizer::RasterizerState &state);
bool HasTextureWrite(const Rasterizer::RasterizerState &state);
bool IsExactSelfRender(const Rasterizer::RasterizerState &state, const BinItem &item);
void OptimizePendingStates(uint16_t first, uint16_t last);
BinCoords Scissor(BinCoords range);
BinCoords Range(const VertexData &v0, const VertexData &v1, const VertexData &v2);
BinCoords Range(const VertexData &v0, const VertexData &v1);

View File

@ -744,12 +744,12 @@ SingleFunc PixelJitCache::GenericSingle(const PixelFuncID &id) {
}
// 256k should be plenty of space for plenty of variations.
PixelJitCache::PixelJitCache() : CodeBlock(1024 * 64 * 4) {
PixelJitCache::PixelJitCache() : CodeBlock(1024 * 64 * 4), cache_(64) {
}
void PixelJitCache::Clear() {
CodeBlock::Clear();
cache_.clear();
cache_.Clear();
addresses_.clear();
constBlendHalf_11_4s_ = nullptr;
@ -777,8 +777,12 @@ std::string PixelJitCache::DescribeCodePtr(const u8 *ptr) {
void PixelJitCache::Flush() {
std::unique_lock<std::mutex> guard(jitCacheLock);
for (const auto &queued : compileQueue_)
Compile(queued);
for (const auto &queued : compileQueue_) {
// Might've been compiled after enqueue, but before now.
size_t queuedKey = std::hash<PixelFuncID>()(queued);
if (!cache_.Get(queuedKey))
Compile(queued);
}
compileQueue_.clear();
}
@ -787,10 +791,11 @@ SingleFunc PixelJitCache::GetSingle(const PixelFuncID &id, std::function<void()>
return nullptr;
std::unique_lock<std::mutex> guard(jitCacheLock);
const size_t key = std::hash<PixelFuncID>()(id);
auto it = cache_.find(id);
if (it != cache_.end()) {
return it->second;
auto it = cache_.Get(key);
if (it != nullptr) {
return it;
}
if (!flushForCompile) {
@ -803,16 +808,17 @@ SingleFunc PixelJitCache::GetSingle(const PixelFuncID &id, std::function<void()>
flushForCompile();
guard.lock();
for (const auto &queued : compileQueue_)
Compile(queued);
for (const auto &queued : compileQueue_) {
// Might've been compiled after enqueue, but before now.
size_t queuedKey = std::hash<PixelFuncID>()(queued);
if (!cache_.Get(queuedKey))
Compile(queued);
}
compileQueue_.clear();
Compile(id);
it = cache_.find(id);
if (it != cache_.end())
return it->second;
return nullptr;
return cache_.Get(key);
}
void PixelJitCache::Compile(const PixelFuncID &id) {
@ -824,7 +830,7 @@ void PixelJitCache::Compile(const PixelFuncID &id) {
#if PPSSPP_ARCH(AMD64) && !PPSSPP_PLATFORM(UWP)
addresses_[id] = GetCodePointer();
SingleFunc func = CompileSingle(id);
cache_[id] = func;
cache_.Insert(std::hash<PixelFuncID>()(id), func);
#endif
}

View File

@ -24,6 +24,7 @@
#include <vector>
#include <unordered_map>
#include <unordered_set>
#include "Common/Data/Collections/Hashmaps.h"
#include "GPU/Math3D.h"
#include "GPU/Software/FuncId.h"
#include "GPU/Software/RasterizerRegCache.h"
@ -107,7 +108,7 @@ private:
bool Jit_ConvertFrom5551(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg temp1Reg, RegCache::Reg temp2Reg, bool keepAlpha);
bool Jit_ConvertFrom4444(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg temp1Reg, RegCache::Reg temp2Reg, bool keepAlpha);
std::unordered_map<PixelFuncID, SingleFunc> cache_;
DenseHashMap<size_t, SingleFunc, nullptr> cache_;
std::unordered_map<PixelFuncID, const u8 *> addresses_;
std::unordered_set<PixelFuncID> compileQueue_;

View File

@ -143,7 +143,7 @@ void ComputeRasterizerState(RasterizerState *state, std::function<void()> flushF
}
}
state->shadeGouraud = gstate.getShadeMode() == GE_SHADE_GOURAUD;
state->shadeGouraud = !gstate.isModeClear() && gstate.getShadeMode() == GE_SHADE_GOURAUD;
state->throughMode = gstate.isModeThrough();
state->antialiasLines = gstate.isAntiAliasEnabled();
@ -155,74 +155,220 @@ void ComputeRasterizerState(RasterizerState *state, std::function<void()> flushF
#endif
}
static inline void CalculateRasterStateFlags(RasterizerState *state, const VertexData &v0, bool useColor) {
if (useColor) {
if ((v0.color0 & 0x00FFFFFF) != 0x00FFFFFF)
state->flags |= RasterizerStateFlags::VERTEX_NON_FULL_WHITE;
uint8_t alpha = v0.color0 >> 24;
if (alpha != 0)
state->flags |= RasterizerStateFlags::VERTEX_ALPHA_NON_ZERO;
if (alpha != 0xFF)
state->flags |= RasterizerStateFlags::VERTEX_ALPHA_NON_FULL;
}
if (!(v0.fogdepth >= 1.0f))
state->flags |= RasterizerStateFlags::VERTEX_HAS_FOG;
}
void CalculateRasterStateFlags(RasterizerState *state, const VertexData &v0) {
CalculateRasterStateFlags(state, v0, true);
}
void CalculateRasterStateFlags(RasterizerState *state, const VertexData &v0, const VertexData &v1, bool forceFlat) {
CalculateRasterStateFlags(state, v0, !forceFlat && state->shadeGouraud);
CalculateRasterStateFlags(state, v1, true);
}
void CalculateRasterStateFlags(RasterizerState *state, const VertexData &v0, const VertexData &v1, const VertexData &v2) {
CalculateRasterStateFlags(state, v0, state->shadeGouraud);
CalculateRasterStateFlags(state, v1, state->shadeGouraud);
CalculateRasterStateFlags(state, v2, true);
}
static inline int OptimizePixelIDFlags(const RasterizerStateFlags &flags) {
return (int)flags & (int)RasterizerStateFlags::OPTIMIZED_PIXELID;
}
static inline int OptimizeSamplerIDFlags(const RasterizerStateFlags &flags) {
return (int)flags & (int)RasterizerStateFlags::OPTIMIZED_SAMPLERID;
}
static inline int OptimizeAllFlags(const RasterizerStateFlags &flags) {
return OptimizePixelIDFlags(flags) | OptimizeSamplerIDFlags(flags);
}
static inline RasterizerStateFlags ClearFlags(const RasterizerStateFlags &flags, const RasterizerStateFlags &mask) {
int clearBits = (int)flags & (int)mask;
return (RasterizerStateFlags)((int)flags & ~clearBits);
}
static inline RasterizerStateFlags ReplacePixelIDFlags(const RasterizerStateFlags &flags, const RasterizerStateFlags &replace) {
RasterizerStateFlags updated = ClearFlags(flags, RasterizerStateFlags::OPTIMIZED_PIXELID);
return updated | (RasterizerStateFlags)OptimizePixelIDFlags(replace);
}
static inline RasterizerStateFlags ReplaceSamplerIDFlags(const RasterizerStateFlags &flags, const RasterizerStateFlags &replace) {
RasterizerStateFlags updated = ClearFlags(flags, RasterizerStateFlags::OPTIMIZED_SAMPLERID);
return updated | (RasterizerStateFlags)OptimizeSamplerIDFlags(replace);
}
static RasterizerStateFlags DetectStateOptimizations(RasterizerState *state) {
// Note: all optimizations must be undoable.
RasterizerStateFlags optimize = RasterizerStateFlags::NONE;
if (!state->pixelID.clearMode) {
auto &pixelID = state->pixelID;
auto &cached = pixelID.cached;
bool useTextureAlpha = state->enableTextures && state->samplerID.useTextureAlpha;
bool alphaBlend = pixelID.alphaBlend || (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_OFF);
if (alphaBlend && !useTextureAlpha) {
PixelBlendFactor src = pixelID.AlphaBlendSrc();
PixelBlendFactor dst = pixelID.AlphaBlendDst();
if (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_SRC)
src = PixelBlendFactor::SRCALPHA;
if (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_DST)
dst = PixelBlendFactor::INVSRCALPHA;
bool canZero = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_ZERO);
bool canFull = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_FULL);
// Okay, we may be able to convert this to a fixed value.
if (canZero || canFull) {
// If it was already set and we still can, set it again.
if (src == PixelBlendFactor::SRCALPHA)
optimize |= RasterizerStateFlags::OPTIMIZED_BLEND_SRC;
if (dst == PixelBlendFactor::INVSRCALPHA)
optimize |= RasterizerStateFlags::OPTIMIZED_BLEND_DST;
}
if (canFull && (src == PixelBlendFactor::SRCALPHA || src == PixelBlendFactor::ONE) && (dst == PixelBlendFactor::INVSRCALPHA || dst == PixelBlendFactor::ZERO)) {
optimize |= RasterizerStateFlags::OPTIMIZED_BLEND_OFF;
}
}
bool applyFog = pixelID.applyFog || (state->flags & RasterizerStateFlags::OPTIMIZED_FOG_OFF);
if (applyFog) {
bool hasFog = state->flags & RasterizerStateFlags::VERTEX_HAS_FOG;
if (!hasFog)
optimize |= RasterizerStateFlags::OPTIMIZED_FOG_OFF;
}
}
if (state->enableTextures) {
bool useTextureAlpha = state->samplerID.useTextureAlpha;
bool alphaFull = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_FULL);
bool colorFull = !(state->flags & RasterizerStateFlags::VERTEX_NON_FULL_WHITE);
if (colorFull && (!useTextureAlpha || alphaFull)) {
// Modulate is common, sometimes even with a fixed color. Replace is cheaper.
GETexFunc texFunc = state->samplerID.TexFunc();
if (state->flags & RasterizerStateFlags::OPTIMIZED_TEXREPLACE)
texFunc = GE_TEXFUNC_MODULATE;
if (texFunc == GE_TEXFUNC_MODULATE)
optimize |= RasterizerStateFlags::OPTIMIZED_TEXREPLACE;
}
}
return optimize;
}
static bool ApplyStateOptimizations(RasterizerState *state, const RasterizerStateFlags &optimize) {
bool changed = false;
// Check if we can compile the new funcs before replacing.
if (OptimizePixelIDFlags(state->flags) != OptimizePixelIDFlags(optimize)) {
bool canFull = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_FULL);
PixelFuncID pixelID = state->pixelID;
if (optimize & RasterizerStateFlags::OPTIMIZED_BLEND_OFF)
pixelID.alphaBlend = false;
else if (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_OFF)
pixelID.alphaBlend = true;
if (optimize & RasterizerStateFlags::OPTIMIZED_BLEND_SRC)
pixelID.alphaBlendSrc = (uint8_t)(canFull ? PixelBlendFactor::ONE : PixelBlendFactor::ZERO);
else if (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_SRC)
pixelID.alphaBlendSrc = (uint8_t)PixelBlendFactor::SRCALPHA;
if (optimize & RasterizerStateFlags::OPTIMIZED_BLEND_DST)
pixelID.alphaBlendDst = (uint8_t)(canFull ? PixelBlendFactor::ZERO : PixelBlendFactor::ONE);
else if (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_DST)
pixelID.alphaBlendDst = (uint8_t)PixelBlendFactor::INVSRCALPHA;
if (optimize & RasterizerStateFlags::OPTIMIZED_FOG_OFF)
pixelID.applyFog = false;
else if (state->flags & RasterizerStateFlags::OPTIMIZED_FOG_OFF)
pixelID.applyFog = true;
SingleFunc drawPixel = Rasterizer::GetSingleFunc(pixelID, nullptr);
// Can't compile during runtime. This failing is a bit of a problem when undoing...
if (drawPixel) {
state->drawPixel = drawPixel;
memcpy(&state->pixelID, &pixelID, sizeof(PixelFuncID));
state->flags = ReplacePixelIDFlags(state->flags, optimize) | RasterizerStateFlags::OPTIMIZED;
changed = true;
}
}
if (OptimizeSamplerIDFlags(state->flags) != OptimizeSamplerIDFlags(optimize)) {
SamplerID samplerID = state->samplerID;
if (optimize & RasterizerStateFlags::OPTIMIZED_TEXREPLACE)
samplerID.texFunc = (uint8_t)GE_TEXFUNC_REPLACE;
else if (state->flags & RasterizerStateFlags::OPTIMIZED_TEXREPLACE)
samplerID.texFunc = (uint8_t)GE_TEXFUNC_MODULATE;
Sampler::LinearFunc linear = Sampler::GetLinearFunc(samplerID, nullptr);
Sampler::LinearFunc nearest = Sampler::GetNearestFunc(samplerID, nullptr);
// Can't compile during runtime. This failing is a bit of a problem when undoing...
if (linear && nearest) {
// Since the definitions are the same, just force this setting using the func pointer.
if (g_Config.iTexFiltering == TEX_FILTER_FORCE_LINEAR) {
state->nearest = linear;
state->linear = linear;
} else if (g_Config.iTexFiltering == TEX_FILTER_FORCE_NEAREST) {
state->nearest = nearest;
state->linear = nearest;
} else {
state->nearest = nearest;
state->linear = linear;
}
memcpy(&state->samplerID, &samplerID, sizeof(SamplerID));
state->flags = ReplaceSamplerIDFlags(state->flags, optimize) | RasterizerStateFlags::OPTIMIZED;
changed = true;
}
}
state->lastFlags = state->flags;
return changed;
}
bool OptimizeRasterState(RasterizerState *state) {
if (state->flags == state->lastFlags)
return false;
RasterizerStateFlags optimize = DetectStateOptimizations(state);
// If it was optimized before, just revert and don't churn.
if ((state->flags & RasterizerStateFlags::OPTIMIZED) && OptimizeAllFlags(state->flags) != OptimizeAllFlags(optimize)) {
optimize = RasterizerStateFlags::NONE;
} else if (optimize == RasterizerStateFlags::NONE && !(state->flags & RasterizerStateFlags::OPTIMIZED)) {
state->lastFlags = state->flags;
return false;
}
return ApplyStateOptimizations(state, optimize);
}
RasterizerState OptimizeFlatRasterizerState(const RasterizerState &origState, const VertexData &v1) {
uint8_t alpha = v1.color0 >> 24;
RasterizerState state = origState;
bool changedPixelID = false;
bool changedSamplerID = false;
if (!state.pixelID.clearMode) {
auto &pixelID = state.pixelID;
auto &cached = pixelID.cached;
// Sometimes, a particular draw can do better than the overall state.
state.flags = ClearFlags(state.flags, RasterizerStateFlags::VERTEX_FLAT_RESET);
CalculateRasterStateFlags(&state, v1, true);
bool useTextureAlpha = state.enableTextures && state.samplerID.useTextureAlpha;
if (pixelID.alphaBlend && pixelID.AlphaBlendSrc() == PixelBlendFactor::SRCALPHA && !useTextureAlpha) {
// Okay, we may be able to convert this to a fixed value.
if (alpha == 0) {
pixelID.alphaBlendSrc = (uint8_t)PixelBlendFactor::ZERO;
changedPixelID = true;
} else if (alpha == 0xFF) {
pixelID.alphaBlendSrc = (uint8_t)PixelBlendFactor::ONE;
changedPixelID = true;
}
}
if (pixelID.alphaBlend && pixelID.AlphaBlendDst() == PixelBlendFactor::INVSRCALPHA && !useTextureAlpha) {
if (alpha == 0) {
pixelID.alphaBlendDst = (uint8_t)PixelBlendFactor::ONE;
changedPixelID = true;
} else if (alpha == 0xFF) {
pixelID.alphaBlendDst = (uint8_t)PixelBlendFactor::ZERO;
changedPixelID = true;
}
}
if (pixelID.alphaBlend && pixelID.AlphaBlendSrc() == PixelBlendFactor::ONE && pixelID.AlphaBlendDst() == PixelBlendFactor::ZERO) {
if (pixelID.AlphaBlendEq() == GE_BLENDMODE_MUL_AND_ADD) {
pixelID.alphaBlend = false;
changedPixelID = true;
}
}
}
if (state.enableTextures) {
if (v1.color0 == 0xFFFFFFFF) {
// Modulate is common, sometimes even with a fixed color. Replace is cheaper.
if (state.samplerID.TexFunc() == GE_TEXFUNC_MODULATE) {
state.samplerID.texFunc = (uint8_t)GE_TEXFUNC_REPLACE;
changedSamplerID = true;
}
}
RasterizerStateFlags optimize = DetectStateOptimizations(&state);
if (OptimizeAllFlags(state.flags) != OptimizeAllFlags(optimize)) {
ApplyStateOptimizations(&state, optimize);
return state;
}
if (changedPixelID) {
state.drawPixel = Rasterizer::GetSingleFunc(state.pixelID, nullptr);
// Can't compile during runtime.
if (!state.drawPixel)
return origState;
}
if (changedSamplerID) {
state.linear = Sampler::GetLinearFunc(state.samplerID, nullptr);
state.nearest = Sampler::GetNearestFunc(state.samplerID, nullptr);
// Can't compile during runtime.
if (!state.linear || !state.nearest)
return origState;
// Since the definitions are the same, just force this setting using the func pointer.
if (g_Config.iTexFiltering == TEX_FILTER_FORCE_LINEAR)
state.nearest = state.linear;
else if (g_Config.iTexFiltering == TEX_FILTER_FORCE_NEAREST)
state.linear = state.nearest;
}
return state;
return origState;
}
static inline u8 ClampFogDepth(float fogdepth) {
@ -602,7 +748,7 @@ void DrawTriangleSlice(
// All the z values are the same, no interpolation required.
// This is common, and when we interpolate, we lose accuracy.
const bool flatZ = v0.screenpos.z == v1.screenpos.z && v0.screenpos.z == v2.screenpos.z;
const bool flatColorAll = clearMode || !state.shadeGouraud;
const bool flatColorAll = !state.shadeGouraud;
const bool flatColor0 = flatColorAll || (v0.color0 == v1.color0 && v0.color0 == v2.color0);
const bool flatColor1 = flatColorAll || (v0.color1 == v1.color1 && v0.color1 == v2.color1);
const bool noFog = clearMode || !pixelID.applyFog || (v0.fogdepth >= 1.0f && v1.fogdepth >= 1.0f && v2.fogdepth >= 1.0f);

View File

@ -33,6 +33,30 @@ struct BinCoords;
namespace Rasterizer {
enum class RasterizerStateFlags {
NONE = 0,
VERTEX_NON_FULL_WHITE = 0x0001,
VERTEX_ALPHA_NON_ZERO = 0x0002,
VERTEX_ALPHA_NON_FULL = 0x0004,
VERTEX_HAS_FOG = 0x0008,
VERTEX_FLAT_RESET = VERTEX_NON_FULL_WHITE | VERTEX_ALPHA_NON_FULL | VERTEX_ALPHA_NON_ZERO | VERTEX_HAS_FOG,
OPTIMIZED = 0x0001'0000,
OPTIMIZED_BLEND_SRC = 0x0002'0000,
OPTIMIZED_BLEND_DST = 0x0004'0000,
OPTIMIZED_BLEND_OFF = 0x0008'0000,
OPTIMIZED_TEXREPLACE = 0x0010'0000,
OPTIMIZED_FOG_OFF = 0x0020'0000,
// Anything that changes the actual pixel or sampler func.
OPTIMIZED_PIXELID = OPTIMIZED_BLEND_SRC | OPTIMIZED_BLEND_DST | OPTIMIZED_BLEND_OFF | OPTIMIZED_FOG_OFF,
OPTIMIZED_SAMPLERID = OPTIMIZED_TEXREPLACE,
INVALID = 0x7FFFFFFF,
};
ENUM_CLASS_BITOPS(RasterizerStateFlags);
struct RasterizerState {
PixelFuncID pixelID;
SamplerID samplerID;
@ -43,6 +67,8 @@ struct RasterizerState {
uint16_t texbufw[8]{};
const u8 *texptr[8]{};
float textureLodSlope;
RasterizerStateFlags flags = RasterizerStateFlags::NONE;
RasterizerStateFlags lastFlags = RasterizerStateFlags::INVALID;
struct {
uint8_t maxTexLevel : 3;
@ -68,6 +94,10 @@ struct RasterizerState {
};
void ComputeRasterizerState(RasterizerState *state, std::function<void()> flushForCompile);
void CalculateRasterStateFlags(RasterizerState *state, const VertexData &v0);
void CalculateRasterStateFlags(RasterizerState *state, const VertexData &v0, const VertexData &v1, bool forceFlat);
void CalculateRasterStateFlags(RasterizerState *state, const VertexData &v0, const VertexData &v1, const VertexData &v2);
bool OptimizeRasterState(RasterizerState *state);
// Draws a triangle if its vertices are specified in counter-clockwise order
void DrawTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2, const BinCoords &range, const RasterizerState &state);

View File

@ -98,12 +98,12 @@ FetchFunc GetFetchFunc(SamplerID id, std::function<void()> flushForCompile) {
}
// 256k should be enough.
SamplerJitCache::SamplerJitCache() : Rasterizer::CodeBlock(1024 * 64 * 4) {
SamplerJitCache::SamplerJitCache() : Rasterizer::CodeBlock(1024 * 64 * 4), cache_(64) {
}
void SamplerJitCache::Clear() {
CodeBlock::Clear();
cache_.clear();
cache_.Clear();
addresses_.clear();
const10All16_ = nullptr;
@ -144,8 +144,12 @@ std::string SamplerJitCache::DescribeCodePtr(const u8 *ptr) {
void SamplerJitCache::Flush() {
std::unique_lock<std::mutex> guard(jitCacheLock);
for (const auto &queued : compileQueue_)
Compile(queued);
for (const auto &queued : compileQueue_) {
// Might've been compiled after enqueue, but before now.
size_t queuedKey = std::hash<SamplerID>()(queued);
if (!cache_.Get(queuedKey))
Compile(queued);
}
compileQueue_.clear();
}
@ -154,10 +158,11 @@ NearestFunc SamplerJitCache::GetByID(const SamplerID &id, std::function<void()>
return nullptr;
std::unique_lock<std::mutex> guard(jitCacheLock);
const size_t key = std::hash<SamplerID>()(id);
auto it = cache_.find(id);
if (it != cache_.end())
return it->second;
auto it = cache_.Get(key);
if (it != nullptr)
return it;
if (!flushForCompile) {
// Can't compile, let's try to do it later when there's an opportunity.
@ -169,17 +174,18 @@ NearestFunc SamplerJitCache::GetByID(const SamplerID &id, std::function<void()>
flushForCompile();
guard.lock();
for (const auto &queued : compileQueue_)
Compile(queued);
for (const auto &queued : compileQueue_) {
// Might've been compiled after enqueue, but before now.
size_t queuedKey = std::hash<SamplerID>()(queued);
if (!cache_.Get(queuedKey))
Compile(queued);
}
compileQueue_.clear();
Compile(id);
// Okay, should be there now.
it = cache_.find(id);
if (it != cache_.end())
return it->second;
return nullptr;
return cache_.Get(key);
}
NearestFunc SamplerJitCache::GetNearest(const SamplerID &id, std::function<void()> flushForCompile) {
@ -207,19 +213,19 @@ void SamplerJitCache::Compile(const SamplerID &id) {
fetchID.linear = false;
fetchID.fetch = true;
addresses_[fetchID] = GetCodePointer();
cache_[fetchID] = (NearestFunc)CompileFetch(fetchID);
cache_.Insert(std::hash<SamplerID>()(fetchID), (NearestFunc)CompileFetch(fetchID));
SamplerID nearestID = id;
nearestID.linear = false;
nearestID.fetch = false;
addresses_[nearestID] = GetCodePointer();
cache_[nearestID] = (NearestFunc)CompileNearest(nearestID);
cache_.Insert(std::hash<SamplerID>()(nearestID), (NearestFunc)CompileNearest(nearestID));
SamplerID linearID = id;
linearID.linear = true;
linearID.fetch = false;
addresses_[linearID] = GetCodePointer();
cache_[linearID] = (NearestFunc)CompileLinear(linearID);
cache_.Insert(std::hash<SamplerID>()(linearID), (NearestFunc)CompileLinear(linearID));
#endif
}

View File

@ -22,6 +22,7 @@
#include <functional>
#include <unordered_map>
#include <unordered_set>
#include "Common/Data/Collections/Hashmaps.h"
#include "GPU/Math3D.h"
#include "GPU/Software/FuncId.h"
#include "GPU/Software/RasterizerRegCache.h"
@ -126,7 +127,7 @@ private:
const u8 *const5551Swizzle_ = nullptr;
const u8 *const5650Swizzle_ = nullptr;
std::unordered_map<SamplerID, NearestFunc> cache_;
DenseHashMap<size_t, NearestFunc, nullptr> cache_;
std::unordered_map<SamplerID, const u8 *> addresses_;
std::unordered_set<SamplerID> compileQueue_;
};

View File

@ -107,7 +107,7 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = {
{ GE_CMD_FOG1, 0, SoftDirty::TRANSFORM_FOG },
{ GE_CMD_FOG2, 0, SoftDirty::TRANSFORM_FOG },
{ GE_CMD_CLEARMODE, 0, SoftDirty::TRANSFORM_BASIC | SoftDirty::RAST_TEX | SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_ALPHA | SoftDirty::PIXEL_STENCIL | SoftDirty::PIXEL_CACHED | SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_CLEARMODE, 0, SoftDirty::TRANSFORM_BASIC | SoftDirty::RAST_BASIC | SoftDirty::RAST_TEX | SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_ALPHA | SoftDirty::PIXEL_STENCIL | SoftDirty::PIXEL_CACHED | SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_TEXTUREMAPENABLE, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::RAST_TEX | SoftDirty::TRANSFORM_BASIC | SoftDirty::BINNER_OVERLAP },
{ GE_CMD_FOGENABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED | SoftDirty::TRANSFORM_BASIC | SoftDirty::TRANSFORM_FOG | SoftDirty::TRANSFORM_MATRIX },
{ GE_CMD_TEXMODE, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::RAST_TEX },