mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-12-01 01:11:46 +00:00
Merge pull request #16486 from unknownbrackets/softgpu-opt
softgpu: Apply optimizations to states generically
This commit is contained in:
commit
4589473231
@ -169,12 +169,14 @@ void BinManager::UpdateState() {
|
||||
if (HasDirty(SoftDirty::PIXEL_ALL | SoftDirty::SAMPLER_ALL | SoftDirty::RAST_ALL)) {
|
||||
if (states_.Full())
|
||||
Flush("states");
|
||||
creatingState_ = true;
|
||||
stateIndex_ = (uint16_t)states_.Push(RasterizerState());
|
||||
// When new funcs are compiled, we need to flush if WX exclusive.
|
||||
ComputeRasterizerState(&states_[stateIndex_], [&]() {
|
||||
Flush("compile");
|
||||
});
|
||||
states_[stateIndex_].samplerID.cached.clut = cluts_[clutIndex_].readable;
|
||||
creatingState_ = false;
|
||||
|
||||
ClearDirty(SoftDirty::PIXEL_ALL | SoftDirty::SAMPLER_ALL | SoftDirty::RAST_ALL);
|
||||
}
|
||||
@ -388,6 +390,7 @@ void BinManager::AddTriangle(const VertexData &v0, const VertexData &v1, const V
|
||||
if (queue_.Full())
|
||||
Drain();
|
||||
queue_.Push(BinItem{ BinItemType::TRIANGLE, stateIndex_, range, v0, v1, v2 });
|
||||
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, v2);
|
||||
Expand(range);
|
||||
}
|
||||
|
||||
@ -399,6 +402,7 @@ void BinManager::AddClearRect(const VertexData &v0, const VertexData &v1) {
|
||||
if (queue_.Full())
|
||||
Drain();
|
||||
queue_.Push(BinItem{ BinItemType::CLEAR_RECT, stateIndex_, range, v0, v1 });
|
||||
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, true);
|
||||
Expand(range);
|
||||
}
|
||||
|
||||
@ -410,6 +414,7 @@ void BinManager::AddRect(const VertexData &v0, const VertexData &v1) {
|
||||
if (queue_.Full())
|
||||
Drain();
|
||||
queue_.Push(BinItem{ BinItemType::RECT, stateIndex_, range, v0, v1 });
|
||||
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, true);
|
||||
Expand(range);
|
||||
}
|
||||
|
||||
@ -421,6 +426,7 @@ void BinManager::AddSprite(const VertexData &v0, const VertexData &v1) {
|
||||
if (queue_.Full())
|
||||
Drain();
|
||||
queue_.Push(BinItem{ BinItemType::SPRITE, stateIndex_, range, v0, v1 });
|
||||
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, true);
|
||||
Expand(range);
|
||||
}
|
||||
|
||||
@ -432,6 +438,7 @@ void BinManager::AddLine(const VertexData &v0, const VertexData &v1) {
|
||||
if (queue_.Full())
|
||||
Drain();
|
||||
queue_.Push(BinItem{ BinItemType::LINE, stateIndex_, range, v0, v1 });
|
||||
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, false);
|
||||
Expand(range);
|
||||
}
|
||||
|
||||
@ -443,6 +450,7 @@ void BinManager::AddPoint(const VertexData &v0) {
|
||||
if (queue_.Full())
|
||||
Drain();
|
||||
queue_.Push(BinItem{ BinItemType::POINT, stateIndex_, range, v0 });
|
||||
CalculateRasterStateFlags(&states_[stateIndex_], v0);
|
||||
Expand(range);
|
||||
}
|
||||
|
||||
@ -486,6 +494,10 @@ void BinManager::Drain(bool flushing) {
|
||||
tasksSplit_ = true;
|
||||
}
|
||||
|
||||
// Let's try to optimize states, if we can.
|
||||
OptimizePendingStates(pendingStateIndex_, stateIndex_);
|
||||
pendingStateIndex_ = stateIndex_;
|
||||
|
||||
if (taskRanges_.size() <= 1) {
|
||||
PROFILE_THIS_SCOPE("bin_drain_single");
|
||||
while (!queue_.Empty()) {
|
||||
@ -584,6 +596,22 @@ void BinManager::Flush(const char *reason) {
|
||||
}
|
||||
}
|
||||
|
||||
void BinManager::OptimizePendingStates(uint16_t first, uint16_t last) {
|
||||
// We can sometimes hit this when compiling new funcs while creating a state.
|
||||
// At that point, the state isn't loaded fully yet, so don't touch it.
|
||||
if (creatingState_ && last == stateIndex_) {
|
||||
if (first == last)
|
||||
return;
|
||||
last--;
|
||||
}
|
||||
|
||||
int count = (QUEUED_STATES + last - first) % QUEUED_STATES + 1;
|
||||
for (int i = 0; i < count; ++i) {
|
||||
size_t pos = (first + i) % QUEUED_STATES;
|
||||
OptimizeRasterState(&states_[pos]);
|
||||
}
|
||||
}
|
||||
|
||||
bool BinManager::HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, uint32_t h) {
|
||||
// We can only write to VRAM.
|
||||
if (!Memory::IsVRAMAddress(start))
|
||||
|
@ -261,6 +261,8 @@ private:
|
||||
std::unordered_map<uint32_t, BinDirtyRange> pendingReads_;
|
||||
|
||||
bool pendingOverlap_ = false;
|
||||
bool creatingState_ = false;
|
||||
uint16_t pendingStateIndex_ = 0;
|
||||
|
||||
std::unordered_map<const char *, double> flushReasonTimes_;
|
||||
std::unordered_map<const char *, double> lastFlushReasonTimes_;
|
||||
@ -274,6 +276,7 @@ private:
|
||||
void MarkPendingWrites(const Rasterizer::RasterizerState &state);
|
||||
bool HasTextureWrite(const Rasterizer::RasterizerState &state);
|
||||
bool IsExactSelfRender(const Rasterizer::RasterizerState &state, const BinItem &item);
|
||||
void OptimizePendingStates(uint16_t first, uint16_t last);
|
||||
BinCoords Scissor(BinCoords range);
|
||||
BinCoords Range(const VertexData &v0, const VertexData &v1, const VertexData &v2);
|
||||
BinCoords Range(const VertexData &v0, const VertexData &v1);
|
||||
|
@ -744,12 +744,12 @@ SingleFunc PixelJitCache::GenericSingle(const PixelFuncID &id) {
|
||||
}
|
||||
|
||||
// 256k should be plenty of space for plenty of variations.
|
||||
PixelJitCache::PixelJitCache() : CodeBlock(1024 * 64 * 4) {
|
||||
PixelJitCache::PixelJitCache() : CodeBlock(1024 * 64 * 4), cache_(64) {
|
||||
}
|
||||
|
||||
void PixelJitCache::Clear() {
|
||||
CodeBlock::Clear();
|
||||
cache_.clear();
|
||||
cache_.Clear();
|
||||
addresses_.clear();
|
||||
|
||||
constBlendHalf_11_4s_ = nullptr;
|
||||
@ -777,8 +777,12 @@ std::string PixelJitCache::DescribeCodePtr(const u8 *ptr) {
|
||||
|
||||
void PixelJitCache::Flush() {
|
||||
std::unique_lock<std::mutex> guard(jitCacheLock);
|
||||
for (const auto &queued : compileQueue_)
|
||||
Compile(queued);
|
||||
for (const auto &queued : compileQueue_) {
|
||||
// Might've been compiled after enqueue, but before now.
|
||||
size_t queuedKey = std::hash<PixelFuncID>()(queued);
|
||||
if (!cache_.Get(queuedKey))
|
||||
Compile(queued);
|
||||
}
|
||||
compileQueue_.clear();
|
||||
}
|
||||
|
||||
@ -787,10 +791,11 @@ SingleFunc PixelJitCache::GetSingle(const PixelFuncID &id, std::function<void()>
|
||||
return nullptr;
|
||||
|
||||
std::unique_lock<std::mutex> guard(jitCacheLock);
|
||||
const size_t key = std::hash<PixelFuncID>()(id);
|
||||
|
||||
auto it = cache_.find(id);
|
||||
if (it != cache_.end()) {
|
||||
return it->second;
|
||||
auto it = cache_.Get(key);
|
||||
if (it != nullptr) {
|
||||
return it;
|
||||
}
|
||||
|
||||
if (!flushForCompile) {
|
||||
@ -803,16 +808,17 @@ SingleFunc PixelJitCache::GetSingle(const PixelFuncID &id, std::function<void()>
|
||||
flushForCompile();
|
||||
guard.lock();
|
||||
|
||||
for (const auto &queued : compileQueue_)
|
||||
Compile(queued);
|
||||
for (const auto &queued : compileQueue_) {
|
||||
// Might've been compiled after enqueue, but before now.
|
||||
size_t queuedKey = std::hash<PixelFuncID>()(queued);
|
||||
if (!cache_.Get(queuedKey))
|
||||
Compile(queued);
|
||||
}
|
||||
compileQueue_.clear();
|
||||
|
||||
Compile(id);
|
||||
|
||||
it = cache_.find(id);
|
||||
if (it != cache_.end())
|
||||
return it->second;
|
||||
return nullptr;
|
||||
return cache_.Get(key);
|
||||
}
|
||||
|
||||
void PixelJitCache::Compile(const PixelFuncID &id) {
|
||||
@ -824,7 +830,7 @@ void PixelJitCache::Compile(const PixelFuncID &id) {
|
||||
#if PPSSPP_ARCH(AMD64) && !PPSSPP_PLATFORM(UWP)
|
||||
addresses_[id] = GetCodePointer();
|
||||
SingleFunc func = CompileSingle(id);
|
||||
cache_[id] = func;
|
||||
cache_.Insert(std::hash<PixelFuncID>()(id), func);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include "Common/Data/Collections/Hashmaps.h"
|
||||
#include "GPU/Math3D.h"
|
||||
#include "GPU/Software/FuncId.h"
|
||||
#include "GPU/Software/RasterizerRegCache.h"
|
||||
@ -107,7 +108,7 @@ private:
|
||||
bool Jit_ConvertFrom5551(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg temp1Reg, RegCache::Reg temp2Reg, bool keepAlpha);
|
||||
bool Jit_ConvertFrom4444(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg temp1Reg, RegCache::Reg temp2Reg, bool keepAlpha);
|
||||
|
||||
std::unordered_map<PixelFuncID, SingleFunc> cache_;
|
||||
DenseHashMap<size_t, SingleFunc, nullptr> cache_;
|
||||
std::unordered_map<PixelFuncID, const u8 *> addresses_;
|
||||
std::unordered_set<PixelFuncID> compileQueue_;
|
||||
|
||||
|
@ -143,7 +143,7 @@ void ComputeRasterizerState(RasterizerState *state, std::function<void()> flushF
|
||||
}
|
||||
}
|
||||
|
||||
state->shadeGouraud = gstate.getShadeMode() == GE_SHADE_GOURAUD;
|
||||
state->shadeGouraud = !gstate.isModeClear() && gstate.getShadeMode() == GE_SHADE_GOURAUD;
|
||||
state->throughMode = gstate.isModeThrough();
|
||||
state->antialiasLines = gstate.isAntiAliasEnabled();
|
||||
|
||||
@ -155,74 +155,220 @@ void ComputeRasterizerState(RasterizerState *state, std::function<void()> flushF
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void CalculateRasterStateFlags(RasterizerState *state, const VertexData &v0, bool useColor) {
|
||||
if (useColor) {
|
||||
if ((v0.color0 & 0x00FFFFFF) != 0x00FFFFFF)
|
||||
state->flags |= RasterizerStateFlags::VERTEX_NON_FULL_WHITE;
|
||||
uint8_t alpha = v0.color0 >> 24;
|
||||
if (alpha != 0)
|
||||
state->flags |= RasterizerStateFlags::VERTEX_ALPHA_NON_ZERO;
|
||||
if (alpha != 0xFF)
|
||||
state->flags |= RasterizerStateFlags::VERTEX_ALPHA_NON_FULL;
|
||||
}
|
||||
if (!(v0.fogdepth >= 1.0f))
|
||||
state->flags |= RasterizerStateFlags::VERTEX_HAS_FOG;
|
||||
}
|
||||
|
||||
void CalculateRasterStateFlags(RasterizerState *state, const VertexData &v0) {
|
||||
CalculateRasterStateFlags(state, v0, true);
|
||||
}
|
||||
|
||||
void CalculateRasterStateFlags(RasterizerState *state, const VertexData &v0, const VertexData &v1, bool forceFlat) {
|
||||
CalculateRasterStateFlags(state, v0, !forceFlat && state->shadeGouraud);
|
||||
CalculateRasterStateFlags(state, v1, true);
|
||||
}
|
||||
|
||||
void CalculateRasterStateFlags(RasterizerState *state, const VertexData &v0, const VertexData &v1, const VertexData &v2) {
|
||||
CalculateRasterStateFlags(state, v0, state->shadeGouraud);
|
||||
CalculateRasterStateFlags(state, v1, state->shadeGouraud);
|
||||
CalculateRasterStateFlags(state, v2, true);
|
||||
}
|
||||
|
||||
static inline int OptimizePixelIDFlags(const RasterizerStateFlags &flags) {
|
||||
return (int)flags & (int)RasterizerStateFlags::OPTIMIZED_PIXELID;
|
||||
}
|
||||
|
||||
static inline int OptimizeSamplerIDFlags(const RasterizerStateFlags &flags) {
|
||||
return (int)flags & (int)RasterizerStateFlags::OPTIMIZED_SAMPLERID;
|
||||
}
|
||||
|
||||
static inline int OptimizeAllFlags(const RasterizerStateFlags &flags) {
|
||||
return OptimizePixelIDFlags(flags) | OptimizeSamplerIDFlags(flags);
|
||||
}
|
||||
|
||||
static inline RasterizerStateFlags ClearFlags(const RasterizerStateFlags &flags, const RasterizerStateFlags &mask) {
|
||||
int clearBits = (int)flags & (int)mask;
|
||||
return (RasterizerStateFlags)((int)flags & ~clearBits);
|
||||
}
|
||||
|
||||
static inline RasterizerStateFlags ReplacePixelIDFlags(const RasterizerStateFlags &flags, const RasterizerStateFlags &replace) {
|
||||
RasterizerStateFlags updated = ClearFlags(flags, RasterizerStateFlags::OPTIMIZED_PIXELID);
|
||||
return updated | (RasterizerStateFlags)OptimizePixelIDFlags(replace);
|
||||
}
|
||||
|
||||
static inline RasterizerStateFlags ReplaceSamplerIDFlags(const RasterizerStateFlags &flags, const RasterizerStateFlags &replace) {
|
||||
RasterizerStateFlags updated = ClearFlags(flags, RasterizerStateFlags::OPTIMIZED_SAMPLERID);
|
||||
return updated | (RasterizerStateFlags)OptimizeSamplerIDFlags(replace);
|
||||
}
|
||||
|
||||
static RasterizerStateFlags DetectStateOptimizations(RasterizerState *state) {
|
||||
// Note: all optimizations must be undoable.
|
||||
RasterizerStateFlags optimize = RasterizerStateFlags::NONE;
|
||||
|
||||
if (!state->pixelID.clearMode) {
|
||||
auto &pixelID = state->pixelID;
|
||||
auto &cached = pixelID.cached;
|
||||
|
||||
bool useTextureAlpha = state->enableTextures && state->samplerID.useTextureAlpha;
|
||||
bool alphaBlend = pixelID.alphaBlend || (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_OFF);
|
||||
if (alphaBlend && !useTextureAlpha) {
|
||||
PixelBlendFactor src = pixelID.AlphaBlendSrc();
|
||||
PixelBlendFactor dst = pixelID.AlphaBlendDst();
|
||||
if (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_SRC)
|
||||
src = PixelBlendFactor::SRCALPHA;
|
||||
if (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_DST)
|
||||
dst = PixelBlendFactor::INVSRCALPHA;
|
||||
|
||||
bool canZero = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_ZERO);
|
||||
bool canFull = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_FULL);
|
||||
// Okay, we may be able to convert this to a fixed value.
|
||||
if (canZero || canFull) {
|
||||
// If it was already set and we still can, set it again.
|
||||
if (src == PixelBlendFactor::SRCALPHA)
|
||||
optimize |= RasterizerStateFlags::OPTIMIZED_BLEND_SRC;
|
||||
if (dst == PixelBlendFactor::INVSRCALPHA)
|
||||
optimize |= RasterizerStateFlags::OPTIMIZED_BLEND_DST;
|
||||
}
|
||||
if (canFull && (src == PixelBlendFactor::SRCALPHA || src == PixelBlendFactor::ONE) && (dst == PixelBlendFactor::INVSRCALPHA || dst == PixelBlendFactor::ZERO)) {
|
||||
optimize |= RasterizerStateFlags::OPTIMIZED_BLEND_OFF;
|
||||
}
|
||||
}
|
||||
|
||||
bool applyFog = pixelID.applyFog || (state->flags & RasterizerStateFlags::OPTIMIZED_FOG_OFF);
|
||||
if (applyFog) {
|
||||
bool hasFog = state->flags & RasterizerStateFlags::VERTEX_HAS_FOG;
|
||||
if (!hasFog)
|
||||
optimize |= RasterizerStateFlags::OPTIMIZED_FOG_OFF;
|
||||
}
|
||||
}
|
||||
|
||||
if (state->enableTextures) {
|
||||
bool useTextureAlpha = state->samplerID.useTextureAlpha;
|
||||
bool alphaFull = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_FULL);
|
||||
bool colorFull = !(state->flags & RasterizerStateFlags::VERTEX_NON_FULL_WHITE);
|
||||
if (colorFull && (!useTextureAlpha || alphaFull)) {
|
||||
// Modulate is common, sometimes even with a fixed color. Replace is cheaper.
|
||||
GETexFunc texFunc = state->samplerID.TexFunc();
|
||||
if (state->flags & RasterizerStateFlags::OPTIMIZED_TEXREPLACE)
|
||||
texFunc = GE_TEXFUNC_MODULATE;
|
||||
|
||||
if (texFunc == GE_TEXFUNC_MODULATE)
|
||||
optimize |= RasterizerStateFlags::OPTIMIZED_TEXREPLACE;
|
||||
}
|
||||
}
|
||||
|
||||
return optimize;
|
||||
}
|
||||
|
||||
static bool ApplyStateOptimizations(RasterizerState *state, const RasterizerStateFlags &optimize) {
|
||||
bool changed = false;
|
||||
|
||||
// Check if we can compile the new funcs before replacing.
|
||||
if (OptimizePixelIDFlags(state->flags) != OptimizePixelIDFlags(optimize)) {
|
||||
bool canFull = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_FULL);
|
||||
|
||||
PixelFuncID pixelID = state->pixelID;
|
||||
if (optimize & RasterizerStateFlags::OPTIMIZED_BLEND_OFF)
|
||||
pixelID.alphaBlend = false;
|
||||
else if (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_OFF)
|
||||
pixelID.alphaBlend = true;
|
||||
if (optimize & RasterizerStateFlags::OPTIMIZED_BLEND_SRC)
|
||||
pixelID.alphaBlendSrc = (uint8_t)(canFull ? PixelBlendFactor::ONE : PixelBlendFactor::ZERO);
|
||||
else if (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_SRC)
|
||||
pixelID.alphaBlendSrc = (uint8_t)PixelBlendFactor::SRCALPHA;
|
||||
if (optimize & RasterizerStateFlags::OPTIMIZED_BLEND_DST)
|
||||
pixelID.alphaBlendDst = (uint8_t)(canFull ? PixelBlendFactor::ZERO : PixelBlendFactor::ONE);
|
||||
else if (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_DST)
|
||||
pixelID.alphaBlendDst = (uint8_t)PixelBlendFactor::INVSRCALPHA;
|
||||
if (optimize & RasterizerStateFlags::OPTIMIZED_FOG_OFF)
|
||||
pixelID.applyFog = false;
|
||||
else if (state->flags & RasterizerStateFlags::OPTIMIZED_FOG_OFF)
|
||||
pixelID.applyFog = true;
|
||||
|
||||
SingleFunc drawPixel = Rasterizer::GetSingleFunc(pixelID, nullptr);
|
||||
// Can't compile during runtime. This failing is a bit of a problem when undoing...
|
||||
if (drawPixel) {
|
||||
state->drawPixel = drawPixel;
|
||||
memcpy(&state->pixelID, &pixelID, sizeof(PixelFuncID));
|
||||
state->flags = ReplacePixelIDFlags(state->flags, optimize) | RasterizerStateFlags::OPTIMIZED;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (OptimizeSamplerIDFlags(state->flags) != OptimizeSamplerIDFlags(optimize)) {
|
||||
SamplerID samplerID = state->samplerID;
|
||||
if (optimize & RasterizerStateFlags::OPTIMIZED_TEXREPLACE)
|
||||
samplerID.texFunc = (uint8_t)GE_TEXFUNC_REPLACE;
|
||||
else if (state->flags & RasterizerStateFlags::OPTIMIZED_TEXREPLACE)
|
||||
samplerID.texFunc = (uint8_t)GE_TEXFUNC_MODULATE;
|
||||
|
||||
Sampler::LinearFunc linear = Sampler::GetLinearFunc(samplerID, nullptr);
|
||||
Sampler::LinearFunc nearest = Sampler::GetNearestFunc(samplerID, nullptr);
|
||||
// Can't compile during runtime. This failing is a bit of a problem when undoing...
|
||||
if (linear && nearest) {
|
||||
// Since the definitions are the same, just force this setting using the func pointer.
|
||||
if (g_Config.iTexFiltering == TEX_FILTER_FORCE_LINEAR) {
|
||||
state->nearest = linear;
|
||||
state->linear = linear;
|
||||
} else if (g_Config.iTexFiltering == TEX_FILTER_FORCE_NEAREST) {
|
||||
state->nearest = nearest;
|
||||
state->linear = nearest;
|
||||
} else {
|
||||
state->nearest = nearest;
|
||||
state->linear = linear;
|
||||
}
|
||||
memcpy(&state->samplerID, &samplerID, sizeof(SamplerID));
|
||||
state->flags = ReplaceSamplerIDFlags(state->flags, optimize) | RasterizerStateFlags::OPTIMIZED;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
state->lastFlags = state->flags;
|
||||
return changed;
|
||||
}
|
||||
|
||||
bool OptimizeRasterState(RasterizerState *state) {
|
||||
if (state->flags == state->lastFlags)
|
||||
return false;
|
||||
|
||||
RasterizerStateFlags optimize = DetectStateOptimizations(state);
|
||||
|
||||
// If it was optimized before, just revert and don't churn.
|
||||
if ((state->flags & RasterizerStateFlags::OPTIMIZED) && OptimizeAllFlags(state->flags) != OptimizeAllFlags(optimize)) {
|
||||
optimize = RasterizerStateFlags::NONE;
|
||||
} else if (optimize == RasterizerStateFlags::NONE && !(state->flags & RasterizerStateFlags::OPTIMIZED)) {
|
||||
state->lastFlags = state->flags;
|
||||
return false;
|
||||
}
|
||||
|
||||
return ApplyStateOptimizations(state, optimize);
|
||||
}
|
||||
|
||||
RasterizerState OptimizeFlatRasterizerState(const RasterizerState &origState, const VertexData &v1) {
|
||||
uint8_t alpha = v1.color0 >> 24;
|
||||
RasterizerState state = origState;
|
||||
|
||||
bool changedPixelID = false;
|
||||
bool changedSamplerID = false;
|
||||
if (!state.pixelID.clearMode) {
|
||||
auto &pixelID = state.pixelID;
|
||||
auto &cached = pixelID.cached;
|
||||
// Sometimes, a particular draw can do better than the overall state.
|
||||
state.flags = ClearFlags(state.flags, RasterizerStateFlags::VERTEX_FLAT_RESET);
|
||||
CalculateRasterStateFlags(&state, v1, true);
|
||||
|
||||
bool useTextureAlpha = state.enableTextures && state.samplerID.useTextureAlpha;
|
||||
if (pixelID.alphaBlend && pixelID.AlphaBlendSrc() == PixelBlendFactor::SRCALPHA && !useTextureAlpha) {
|
||||
// Okay, we may be able to convert this to a fixed value.
|
||||
if (alpha == 0) {
|
||||
pixelID.alphaBlendSrc = (uint8_t)PixelBlendFactor::ZERO;
|
||||
changedPixelID = true;
|
||||
} else if (alpha == 0xFF) {
|
||||
pixelID.alphaBlendSrc = (uint8_t)PixelBlendFactor::ONE;
|
||||
changedPixelID = true;
|
||||
}
|
||||
}
|
||||
if (pixelID.alphaBlend && pixelID.AlphaBlendDst() == PixelBlendFactor::INVSRCALPHA && !useTextureAlpha) {
|
||||
if (alpha == 0) {
|
||||
pixelID.alphaBlendDst = (uint8_t)PixelBlendFactor::ONE;
|
||||
changedPixelID = true;
|
||||
} else if (alpha == 0xFF) {
|
||||
pixelID.alphaBlendDst = (uint8_t)PixelBlendFactor::ZERO;
|
||||
changedPixelID = true;
|
||||
}
|
||||
}
|
||||
if (pixelID.alphaBlend && pixelID.AlphaBlendSrc() == PixelBlendFactor::ONE && pixelID.AlphaBlendDst() == PixelBlendFactor::ZERO) {
|
||||
if (pixelID.AlphaBlendEq() == GE_BLENDMODE_MUL_AND_ADD) {
|
||||
pixelID.alphaBlend = false;
|
||||
changedPixelID = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (state.enableTextures) {
|
||||
if (v1.color0 == 0xFFFFFFFF) {
|
||||
// Modulate is common, sometimes even with a fixed color. Replace is cheaper.
|
||||
if (state.samplerID.TexFunc() == GE_TEXFUNC_MODULATE) {
|
||||
state.samplerID.texFunc = (uint8_t)GE_TEXFUNC_REPLACE;
|
||||
changedSamplerID = true;
|
||||
}
|
||||
}
|
||||
RasterizerStateFlags optimize = DetectStateOptimizations(&state);
|
||||
if (OptimizeAllFlags(state.flags) != OptimizeAllFlags(optimize)) {
|
||||
ApplyStateOptimizations(&state, optimize);
|
||||
return state;
|
||||
}
|
||||
|
||||
if (changedPixelID) {
|
||||
state.drawPixel = Rasterizer::GetSingleFunc(state.pixelID, nullptr);
|
||||
// Can't compile during runtime.
|
||||
if (!state.drawPixel)
|
||||
return origState;
|
||||
}
|
||||
if (changedSamplerID) {
|
||||
state.linear = Sampler::GetLinearFunc(state.samplerID, nullptr);
|
||||
state.nearest = Sampler::GetNearestFunc(state.samplerID, nullptr);
|
||||
// Can't compile during runtime.
|
||||
if (!state.linear || !state.nearest)
|
||||
return origState;
|
||||
|
||||
// Since the definitions are the same, just force this setting using the func pointer.
|
||||
if (g_Config.iTexFiltering == TEX_FILTER_FORCE_LINEAR)
|
||||
state.nearest = state.linear;
|
||||
else if (g_Config.iTexFiltering == TEX_FILTER_FORCE_NEAREST)
|
||||
state.linear = state.nearest;
|
||||
}
|
||||
|
||||
return state;
|
||||
return origState;
|
||||
}
|
||||
|
||||
static inline u8 ClampFogDepth(float fogdepth) {
|
||||
@ -602,7 +748,7 @@ void DrawTriangleSlice(
|
||||
// All the z values are the same, no interpolation required.
|
||||
// This is common, and when we interpolate, we lose accuracy.
|
||||
const bool flatZ = v0.screenpos.z == v1.screenpos.z && v0.screenpos.z == v2.screenpos.z;
|
||||
const bool flatColorAll = clearMode || !state.shadeGouraud;
|
||||
const bool flatColorAll = !state.shadeGouraud;
|
||||
const bool flatColor0 = flatColorAll || (v0.color0 == v1.color0 && v0.color0 == v2.color0);
|
||||
const bool flatColor1 = flatColorAll || (v0.color1 == v1.color1 && v0.color1 == v2.color1);
|
||||
const bool noFog = clearMode || !pixelID.applyFog || (v0.fogdepth >= 1.0f && v1.fogdepth >= 1.0f && v2.fogdepth >= 1.0f);
|
||||
|
@ -33,6 +33,30 @@ struct BinCoords;
|
||||
|
||||
namespace Rasterizer {
|
||||
|
||||
enum class RasterizerStateFlags {
|
||||
NONE = 0,
|
||||
VERTEX_NON_FULL_WHITE = 0x0001,
|
||||
VERTEX_ALPHA_NON_ZERO = 0x0002,
|
||||
VERTEX_ALPHA_NON_FULL = 0x0004,
|
||||
VERTEX_HAS_FOG = 0x0008,
|
||||
|
||||
VERTEX_FLAT_RESET = VERTEX_NON_FULL_WHITE | VERTEX_ALPHA_NON_FULL | VERTEX_ALPHA_NON_ZERO | VERTEX_HAS_FOG,
|
||||
|
||||
OPTIMIZED = 0x0001'0000,
|
||||
OPTIMIZED_BLEND_SRC = 0x0002'0000,
|
||||
OPTIMIZED_BLEND_DST = 0x0004'0000,
|
||||
OPTIMIZED_BLEND_OFF = 0x0008'0000,
|
||||
OPTIMIZED_TEXREPLACE = 0x0010'0000,
|
||||
OPTIMIZED_FOG_OFF = 0x0020'0000,
|
||||
|
||||
// Anything that changes the actual pixel or sampler func.
|
||||
OPTIMIZED_PIXELID = OPTIMIZED_BLEND_SRC | OPTIMIZED_BLEND_DST | OPTIMIZED_BLEND_OFF | OPTIMIZED_FOG_OFF,
|
||||
OPTIMIZED_SAMPLERID = OPTIMIZED_TEXREPLACE,
|
||||
|
||||
INVALID = 0x7FFFFFFF,
|
||||
};
|
||||
ENUM_CLASS_BITOPS(RasterizerStateFlags);
|
||||
|
||||
struct RasterizerState {
|
||||
PixelFuncID pixelID;
|
||||
SamplerID samplerID;
|
||||
@ -43,6 +67,8 @@ struct RasterizerState {
|
||||
uint16_t texbufw[8]{};
|
||||
const u8 *texptr[8]{};
|
||||
float textureLodSlope;
|
||||
RasterizerStateFlags flags = RasterizerStateFlags::NONE;
|
||||
RasterizerStateFlags lastFlags = RasterizerStateFlags::INVALID;
|
||||
|
||||
struct {
|
||||
uint8_t maxTexLevel : 3;
|
||||
@ -68,6 +94,10 @@ struct RasterizerState {
|
||||
};
|
||||
|
||||
void ComputeRasterizerState(RasterizerState *state, std::function<void()> flushForCompile);
|
||||
void CalculateRasterStateFlags(RasterizerState *state, const VertexData &v0);
|
||||
void CalculateRasterStateFlags(RasterizerState *state, const VertexData &v0, const VertexData &v1, bool forceFlat);
|
||||
void CalculateRasterStateFlags(RasterizerState *state, const VertexData &v0, const VertexData &v1, const VertexData &v2);
|
||||
bool OptimizeRasterState(RasterizerState *state);
|
||||
|
||||
// Draws a triangle if its vertices are specified in counter-clockwise order
|
||||
void DrawTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2, const BinCoords &range, const RasterizerState &state);
|
||||
|
@ -98,12 +98,12 @@ FetchFunc GetFetchFunc(SamplerID id, std::function<void()> flushForCompile) {
|
||||
}
|
||||
|
||||
// 256k should be enough.
|
||||
SamplerJitCache::SamplerJitCache() : Rasterizer::CodeBlock(1024 * 64 * 4) {
|
||||
SamplerJitCache::SamplerJitCache() : Rasterizer::CodeBlock(1024 * 64 * 4), cache_(64) {
|
||||
}
|
||||
|
||||
void SamplerJitCache::Clear() {
|
||||
CodeBlock::Clear();
|
||||
cache_.clear();
|
||||
cache_.Clear();
|
||||
addresses_.clear();
|
||||
|
||||
const10All16_ = nullptr;
|
||||
@ -144,8 +144,12 @@ std::string SamplerJitCache::DescribeCodePtr(const u8 *ptr) {
|
||||
|
||||
void SamplerJitCache::Flush() {
|
||||
std::unique_lock<std::mutex> guard(jitCacheLock);
|
||||
for (const auto &queued : compileQueue_)
|
||||
Compile(queued);
|
||||
for (const auto &queued : compileQueue_) {
|
||||
// Might've been compiled after enqueue, but before now.
|
||||
size_t queuedKey = std::hash<SamplerID>()(queued);
|
||||
if (!cache_.Get(queuedKey))
|
||||
Compile(queued);
|
||||
}
|
||||
compileQueue_.clear();
|
||||
}
|
||||
|
||||
@ -154,10 +158,11 @@ NearestFunc SamplerJitCache::GetByID(const SamplerID &id, std::function<void()>
|
||||
return nullptr;
|
||||
|
||||
std::unique_lock<std::mutex> guard(jitCacheLock);
|
||||
const size_t key = std::hash<SamplerID>()(id);
|
||||
|
||||
auto it = cache_.find(id);
|
||||
if (it != cache_.end())
|
||||
return it->second;
|
||||
auto it = cache_.Get(key);
|
||||
if (it != nullptr)
|
||||
return it;
|
||||
|
||||
if (!flushForCompile) {
|
||||
// Can't compile, let's try to do it later when there's an opportunity.
|
||||
@ -169,17 +174,18 @@ NearestFunc SamplerJitCache::GetByID(const SamplerID &id, std::function<void()>
|
||||
flushForCompile();
|
||||
guard.lock();
|
||||
|
||||
for (const auto &queued : compileQueue_)
|
||||
Compile(queued);
|
||||
for (const auto &queued : compileQueue_) {
|
||||
// Might've been compiled after enqueue, but before now.
|
||||
size_t queuedKey = std::hash<SamplerID>()(queued);
|
||||
if (!cache_.Get(queuedKey))
|
||||
Compile(queued);
|
||||
}
|
||||
compileQueue_.clear();
|
||||
|
||||
Compile(id);
|
||||
|
||||
// Okay, should be there now.
|
||||
it = cache_.find(id);
|
||||
if (it != cache_.end())
|
||||
return it->second;
|
||||
return nullptr;
|
||||
return cache_.Get(key);
|
||||
}
|
||||
|
||||
NearestFunc SamplerJitCache::GetNearest(const SamplerID &id, std::function<void()> flushForCompile) {
|
||||
@ -207,19 +213,19 @@ void SamplerJitCache::Compile(const SamplerID &id) {
|
||||
fetchID.linear = false;
|
||||
fetchID.fetch = true;
|
||||
addresses_[fetchID] = GetCodePointer();
|
||||
cache_[fetchID] = (NearestFunc)CompileFetch(fetchID);
|
||||
cache_.Insert(std::hash<SamplerID>()(fetchID), (NearestFunc)CompileFetch(fetchID));
|
||||
|
||||
SamplerID nearestID = id;
|
||||
nearestID.linear = false;
|
||||
nearestID.fetch = false;
|
||||
addresses_[nearestID] = GetCodePointer();
|
||||
cache_[nearestID] = (NearestFunc)CompileNearest(nearestID);
|
||||
cache_.Insert(std::hash<SamplerID>()(nearestID), (NearestFunc)CompileNearest(nearestID));
|
||||
|
||||
SamplerID linearID = id;
|
||||
linearID.linear = true;
|
||||
linearID.fetch = false;
|
||||
addresses_[linearID] = GetCodePointer();
|
||||
cache_[linearID] = (NearestFunc)CompileLinear(linearID);
|
||||
cache_.Insert(std::hash<SamplerID>()(linearID), (NearestFunc)CompileLinear(linearID));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <functional>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include "Common/Data/Collections/Hashmaps.h"
|
||||
#include "GPU/Math3D.h"
|
||||
#include "GPU/Software/FuncId.h"
|
||||
#include "GPU/Software/RasterizerRegCache.h"
|
||||
@ -126,7 +127,7 @@ private:
|
||||
const u8 *const5551Swizzle_ = nullptr;
|
||||
const u8 *const5650Swizzle_ = nullptr;
|
||||
|
||||
std::unordered_map<SamplerID, NearestFunc> cache_;
|
||||
DenseHashMap<size_t, NearestFunc, nullptr> cache_;
|
||||
std::unordered_map<SamplerID, const u8 *> addresses_;
|
||||
std::unordered_set<SamplerID> compileQueue_;
|
||||
};
|
||||
|
@ -107,7 +107,7 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = {
|
||||
{ GE_CMD_FOG1, 0, SoftDirty::TRANSFORM_FOG },
|
||||
{ GE_CMD_FOG2, 0, SoftDirty::TRANSFORM_FOG },
|
||||
|
||||
{ GE_CMD_CLEARMODE, 0, SoftDirty::TRANSFORM_BASIC | SoftDirty::RAST_TEX | SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_ALPHA | SoftDirty::PIXEL_STENCIL | SoftDirty::PIXEL_CACHED | SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP },
|
||||
{ GE_CMD_CLEARMODE, 0, SoftDirty::TRANSFORM_BASIC | SoftDirty::RAST_BASIC | SoftDirty::RAST_TEX | SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_ALPHA | SoftDirty::PIXEL_STENCIL | SoftDirty::PIXEL_CACHED | SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP },
|
||||
{ GE_CMD_TEXTUREMAPENABLE, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::RAST_TEX | SoftDirty::TRANSFORM_BASIC | SoftDirty::BINNER_OVERLAP },
|
||||
{ GE_CMD_FOGENABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED | SoftDirty::TRANSFORM_BASIC | SoftDirty::TRANSFORM_FOG | SoftDirty::TRANSFORM_MATRIX },
|
||||
{ GE_CMD_TEXMODE, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::RAST_TEX },
|
||||
|
Loading…
Reference in New Issue
Block a user