Merge pull request #16493 from unknownbrackets/softgpu-opt

softgpu: Check CLUT alpha to optimize out blend/alpha test
This commit is contained in:
Henrik Rydgård 2022-12-03 22:55:53 +01:00 committed by GitHub
commit c310d1471e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 91 additions and 22 deletions

View File

@ -641,16 +641,12 @@ void DecodeDXT5Block(u32 *dst, const DXT5Block *src, int pitch, int height) {
#ifdef _M_SSE
inline u32 SSEReduce32And(__m128i value) {
// TODO: Should use a shuffle instead of slri, probably.
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
value = _mm_and_si128(value, _mm_shuffle_epi32(value, _MM_SHUFFLE(1, 0, 3, 2)));
value = _mm_and_si128(value, _mm_shuffle_epi32(value, _MM_SHUFFLE(1, 1, 1, 1)));
return _mm_cvtsi128_si32(value);
}
inline u32 SSEReduce16And(__m128i value) {
// TODO: Should use a shuffle instead of slri, probably.
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
u32 mask = _mm_cvtsi128_si32(value);
u32 mask = SSEReduce32And(value);
return mask & (mask >> 16);
}
#endif

View File

@ -816,7 +816,9 @@ SingleFunc PixelJitCache::GetSingle(const PixelFuncID &id, std::function<void()>
}
compileQueue_.clear();
Compile(id);
// Might've been in the queue.
if (!cache_.Get(key))
Compile(id);
return cache_.Get(key);
}

View File

@ -211,17 +211,65 @@ static inline RasterizerStateFlags ReplaceSamplerIDFlags(const RasterizerStateFl
return updated | (RasterizerStateFlags)OptimizeSamplerIDFlags(replace);
}
static bool CheckClutAlphaFull(RasterizerState *state) {
// We only need to check it once.
if (state->flags & RasterizerStateFlags::CLUT_ALPHA_CHECKED)
return !(state->flags & RasterizerStateFlags::CLUT_ALPHA_NON_FULL);
// For now, let's keep things simple.
const SamplerID &samplerID = state->samplerID;
if (samplerID.hasClutOffset || !samplerID.useSharedClut)
return false;
uint32_t count = samplerID.TexFmt() == GE_TFMT_CLUT4 ? 16 : 256;
if (samplerID.hasClutMask)
count = std::min(count, ((samplerID.cached.clutFormat >> 8) & 0xFF) + 1);
bool onlyFull = true;
switch (samplerID.ClutFmt()) {
case GE_CMODE_16BIT_BGR5650:
break;
case GE_CMODE_16BIT_ABGR5551:
onlyFull = CheckAlpha16((const uint16_t *)samplerID.cached.clut, count, 0x8000) == CHECKALPHA_FULL;
break;
case GE_CMODE_16BIT_ABGR4444:
onlyFull = CheckAlpha16((const uint16_t *)samplerID.cached.clut, count, 0xF000) == CHECKALPHA_FULL;
break;
case GE_CMODE_32BIT_ABGR8888:
onlyFull = CheckAlpha32((const uint32_t *)samplerID.cached.clut, count, 0xFF000000) == CHECKALPHA_FULL;
break;
}
if (!onlyFull)
state->flags |= RasterizerStateFlags::CLUT_ALPHA_NON_FULL;
state->flags |= RasterizerStateFlags::CLUT_ALPHA_CHECKED;
return onlyFull;
}
static RasterizerStateFlags DetectStateOptimizations(RasterizerState *state) {
// Note: all optimizations must be undoable.
RasterizerStateFlags optimize = RasterizerStateFlags::NONE;
auto &pixelID = state->pixelID;
auto &samplerID = state->samplerID;
if (!state->pixelID.clearMode) {
auto &pixelID = state->pixelID;
bool alphaZero = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_ZERO);
bool alphaFull = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_FULL);
bool needTextureAlpha = state->enableTextures && samplerID.useTextureAlpha;
if (!pixelID.clearMode) {
auto &cached = pixelID.cached;
bool useTextureAlpha = state->enableTextures && state->samplerID.useTextureAlpha;
bool alphaBlend = pixelID.alphaBlend || (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_OFF);
if (alphaBlend && !useTextureAlpha) {
if (needTextureAlpha && alphaBlend && alphaFull) {
bool usesClut = (samplerID.texfmt & 4) != 0;
if (usesClut && CheckClutAlphaFull(state))
needTextureAlpha = false;
}
if (alphaBlend && !needTextureAlpha) {
PixelBlendFactor src = pixelID.AlphaBlendSrc();
PixelBlendFactor dst = pixelID.AlphaBlendDst();
if (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_SRC)
@ -229,17 +277,15 @@ static RasterizerStateFlags DetectStateOptimizations(RasterizerState *state) {
if (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_DST)
dst = PixelBlendFactor::INVSRCALPHA;
bool canZero = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_ZERO);
bool canFull = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_FULL);
// Okay, we may be able to convert this to a fixed value.
if (canZero || canFull) {
if (alphaZero || alphaFull) {
// If it was already set and we still can, set it again.
if (src == PixelBlendFactor::SRCALPHA)
optimize |= RasterizerStateFlags::OPTIMIZED_BLEND_SRC;
if (dst == PixelBlendFactor::INVSRCALPHA)
optimize |= RasterizerStateFlags::OPTIMIZED_BLEND_DST;
}
if (canFull && (src == PixelBlendFactor::SRCALPHA || src == PixelBlendFactor::ONE) && (dst == PixelBlendFactor::INVSRCALPHA || dst == PixelBlendFactor::ZERO)) {
if (alphaFull && (src == PixelBlendFactor::SRCALPHA || src == PixelBlendFactor::ONE) && (dst == PixelBlendFactor::INVSRCALPHA || dst == PixelBlendFactor::ZERO)) {
optimize |= RasterizerStateFlags::OPTIMIZED_BLEND_OFF;
}
}
@ -253,18 +299,31 @@ static RasterizerStateFlags DetectStateOptimizations(RasterizerState *state) {
}
if (state->enableTextures) {
bool useTextureAlpha = state->samplerID.useTextureAlpha;
bool alphaFull = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_FULL);
bool colorFull = !(state->flags & RasterizerStateFlags::VERTEX_NON_FULL_WHITE);
if (colorFull && (!useTextureAlpha || alphaFull)) {
if (colorFull && (!needTextureAlpha || alphaFull)) {
// Modulate is common, sometimes even with a fixed color. Replace is cheaper.
GETexFunc texFunc = state->samplerID.TexFunc();
GETexFunc texFunc = samplerID.TexFunc();
if (state->flags & RasterizerStateFlags::OPTIMIZED_TEXREPLACE)
texFunc = GE_TEXFUNC_MODULATE;
if (texFunc == GE_TEXFUNC_MODULATE)
optimize |= RasterizerStateFlags::OPTIMIZED_TEXREPLACE;
}
bool usesClut = (samplerID.texfmt & 4) != 0;
if (usesClut && alphaFull && samplerID.useTextureAlpha) {
GEComparison alphaTestFunc = pixelID.AlphaTestFunc();
// We optimize > 0 to != 0, so this is especially common.
if (state->flags & RasterizerStateFlags::OPTIMIZED_ALPHATEST_OFF_NE)
alphaTestFunc = GE_COMP_NOTEQUAL;
// > 16, 8, or similar are also very common.
if (state->flags & RasterizerStateFlags::OPTIMIZED_ALPHATEST_OFF_GT)
alphaTestFunc = GE_COMP_GREATER;
bool alphaTest = (alphaTestFunc == GE_COMP_NOTEQUAL || alphaTestFunc == GE_COMP_GREATER) && pixelID.alphaTestRef < 0xFF && !state->pixelID.hasAlphaTestMask;
if (alphaTest && CheckClutAlphaFull(state))
optimize |= alphaTestFunc == GE_COMP_NOTEQUAL ? RasterizerStateFlags::OPTIMIZED_ALPHATEST_OFF_NE : RasterizerStateFlags::OPTIMIZED_ALPHATEST_OFF_GT;
}
}
return optimize;
@ -294,6 +353,12 @@ static bool ApplyStateOptimizations(RasterizerState *state, const RasterizerStat
pixelID.applyFog = false;
else if (state->flags & RasterizerStateFlags::OPTIMIZED_FOG_OFF)
pixelID.applyFog = true;
if (optimize & (RasterizerStateFlags::OPTIMIZED_ALPHATEST_OFF_NE | RasterizerStateFlags::OPTIMIZED_ALPHATEST_OFF_GT))
pixelID.alphaTestFunc = GE_COMP_ALWAYS;
else if (state->flags & RasterizerStateFlags::OPTIMIZED_ALPHATEST_OFF_NE)
pixelID.alphaTestFunc = GE_COMP_NOTEQUAL;
else if (state->flags & RasterizerStateFlags::OPTIMIZED_ALPHATEST_OFF_GT)
pixelID.alphaTestFunc = GE_COMP_GREATER;
SingleFunc drawPixel = Rasterizer::GetSingleFunc(pixelID, nullptr);
// Can't compile during runtime. This failing is a bit of a problem when undoing...

View File

@ -40,6 +40,9 @@ enum class RasterizerStateFlags {
VERTEX_ALPHA_NON_FULL = 0x0004,
VERTEX_HAS_FOG = 0x0008,
CLUT_ALPHA_CHECKED = 0x0010,
CLUT_ALPHA_NON_FULL = 0x0020,
VERTEX_FLAT_RESET = VERTEX_NON_FULL_WHITE | VERTEX_ALPHA_NON_FULL | VERTEX_ALPHA_NON_ZERO | VERTEX_HAS_FOG,
OPTIMIZED = 0x0001'0000,
@ -48,9 +51,11 @@ enum class RasterizerStateFlags {
OPTIMIZED_BLEND_OFF = 0x0008'0000,
OPTIMIZED_TEXREPLACE = 0x0010'0000,
OPTIMIZED_FOG_OFF = 0x0020'0000,
OPTIMIZED_ALPHATEST_OFF_NE = 0x0040'0000,
OPTIMIZED_ALPHATEST_OFF_GT = 0x0080'0000,
// Anything that changes the actual pixel or sampler func.
OPTIMIZED_PIXELID = OPTIMIZED_BLEND_SRC | OPTIMIZED_BLEND_DST | OPTIMIZED_BLEND_OFF | OPTIMIZED_FOG_OFF,
OPTIMIZED_PIXELID = OPTIMIZED_BLEND_SRC | OPTIMIZED_BLEND_DST | OPTIMIZED_BLEND_OFF | OPTIMIZED_FOG_OFF | RasterizerStateFlags::OPTIMIZED_ALPHATEST_OFF_NE | RasterizerStateFlags::OPTIMIZED_ALPHATEST_OFF_GT,
OPTIMIZED_SAMPLERID = OPTIMIZED_TEXREPLACE,
INVALID = 0x7FFFFFFF,

View File

@ -182,7 +182,8 @@ NearestFunc SamplerJitCache::GetByID(const SamplerID &id, std::function<void()>
}
compileQueue_.clear();
Compile(id);
if (!cache_.Get(key))
Compile(id);
// Okay, should be there now.
return cache_.Get(key);