Merge pull request #15190 from unknownbrackets/softjit-cleanup

Minor cleanup to software renderer jit
This commit is contained in:
Henrik Rydgård 2021-12-02 09:02:55 +01:00 committed by GitHub
commit 676ed6c15d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 67 additions and 13 deletions

View File

@ -595,7 +595,7 @@ bool PixelJitCache::Jit_StencilAndDepthTest(const PixelFuncID &id) {
bool success = true;
success = success && Jit_StencilTest(id, stencilReg, maskedReg);
if (maskedReg != stencilReg)
regCache_.Unlock(maskedReg, RegCache::GEN_TEMP0);
regCache_.Release(maskedReg, RegCache::GEN_TEMP0);
// Next up, the depth test.
if (stencilReg == INVALID_REG) {
@ -1382,19 +1382,18 @@ bool PixelJitCache::Jit_Dither(const PixelFuncID &id) {
MOVSX(32, 8, valueReg, R(valueReg));
SAR(8, R(valueReg), Imm8(4));
#else
// Sum up (x + y * 4) * 2 + ditherMatrix offset to valueReg.
SHL(32, R(argXReg), Imm8(1));
LEA(32, valueReg, MComplex(argXReg, valueReg, 8, offsetof(PixelFuncID, cached.ditherMatrix)));
// Sum up (x + y * 4) + ditherMatrix offset to valueReg.
LEA(32, valueReg, MComplex(argXReg, valueReg, 4, offsetof(PixelFuncID, cached.ditherMatrix)));
// Okay, now abuse argXReg to read the PixelFuncID pointer on the stack.
if (regCache_.Has(RegCache::GEN_ARG_ID)) {
X64Reg idReg = regCache_.Find(RegCache::GEN_ARG_ID);
MOVSX(32, 16, valueReg, MRegSum(idReg, valueReg));
MOVSX(32, 8, valueReg, MRegSum(idReg, valueReg));
regCache_.Unlock(idReg, RegCache::GEN_ARG_ID);
} else {
_assert_(stackIDOffset_ != -1);
MOV(PTRBITS, R(argXReg), MDisp(RSP, stackIDOffset_));
MOVSX(32, 16, valueReg, MRegSum(argXReg, valueReg));
MOVSX(32, 8, valueReg, MRegSum(argXReg, valueReg));
}
#endif
if (argXReg != INVALID_REG) {
@ -1665,7 +1664,7 @@ bool PixelJitCache::Jit_WriteColor(const PixelFuncID &id) {
bool PixelJitCache::Jit_ApplyLogicOp(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg maskReg) {
X64Reg gstateReg = GetGState();
X64Reg logicOpReg = regCache_.Alloc(RegCache::GEN_TEMP3);
X64Reg logicOpReg = regCache_.Alloc(RegCache::GEN_TEMP4);
MOVZX(32, 8, logicOpReg, MDisp(gstateReg, offsetof(GPUgstate, lop)));
AND(8, R(logicOpReg), Imm8(0x0F));
regCache_.Unlock(gstateReg, RegCache::GEN_GSTATE);
@ -1676,7 +1675,7 @@ bool PixelJitCache::Jit_ApplyLogicOp(const PixelFuncID &id, RegCache::Reg colorR
// Should already be allocated.
X64Reg colorOff = regCache_.Find(RegCache::GEN_COLOR_OFF);
X64Reg temp1Reg = regCache_.Find(RegCache::GEN_TEMP1);
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP5);
// We'll use these in several cases, so prepare.
int bits = id.fbFormat == GE_FORMAT_8888 ? 32 : 16;
@ -1996,8 +1995,8 @@ bool PixelJitCache::Jit_ApplyLogicOp(const PixelFuncID &id, RegCache::Reg colorR
SetJumpTarget(fixup);
regCache_.Unlock(colorOff, RegCache::GEN_COLOR_OFF);
regCache_.Unlock(temp1Reg, RegCache::GEN_TEMP1);
regCache_.Unlock(logicOpReg, RegCache::GEN_TEMP3);
regCache_.Release(logicOpReg, RegCache::GEN_TEMP4);
regCache_.Release(temp1Reg, RegCache::GEN_TEMP5);
if (stencilReg != INVALID_REG)
regCache_.Unlock(stencilReg, RegCache::GEN_STENCIL);

View File

@ -27,6 +27,21 @@ static_assert(sizeof(PixelFuncID) == sizeof(PixelFuncID::fullKey) + sizeof(Pixel
static_assert(sizeof(PixelFuncID) == sizeof(PixelFuncID::fullKey), "Bad pixel func ID size");
#endif
static inline GEComparison OptimizeRefByteCompare(GEComparison func, u8 ref) {
// Not equal tests are easier.
if (ref == 0 && func == GE_COMP_GREATER)
return GE_COMP_NOTEQUAL;
if (ref == 0xFF && func == GE_COMP_LESS)
return GE_COMP_NOTEQUAL;
// Sometimes games pointlessly use tests like these.
if (ref == 0 && func == GE_COMP_GEQUAL)
return GE_COMP_ALWAYS;
if (ref == 0xFF && func == GE_COMP_LEQUAL)
return GE_COMP_ALWAYS;
return func;
}
void ComputePixelFuncID(PixelFuncID *id) {
id->fullKey = 0;
@ -60,8 +75,8 @@ void ComputePixelFuncID(PixelFuncID *id) {
id->depthWrite = gstate.isDepthTestEnabled() && gstate.isDepthWriteEnabled();
if (id->stencilTest) {
id->stencilTestFunc = gstate.getStencilTestFunction();
id->stencilTestRef = gstate.getStencilTestRef() & gstate.getStencilTestMask();
id->stencilTestFunc = OptimizeRefByteCompare(gstate.getStencilTestFunction(), id->stencilTestRef);
id->hasStencilTestMask = gstate.getStencilTestMask() != 0xFF && gstate.FrameBufFormat() != GE_FORMAT_565;
// Stencil can't be written on 565, and any invalid op acts like KEEP, which is 0.
@ -71,6 +86,39 @@ void ComputePixelFuncID(PixelFuncID *id) {
id->zFail = gstate.isDepthTestEnabled() ? gstate.getStencilOpZFail() : GE_STENCILOP_KEEP;
if (gstate.FrameBufFormat() != GE_FORMAT_565 && gstate.getStencilOpZPass() <= GE_STENCILOP_DECR)
id->zPass = gstate.getStencilOpZPass();
// Always treat zPass/zFail the same if there's no depth test.
if (!gstate.isDepthTestEnabled() || gstate.getDepthTestFunction() == GE_COMP_ALWAYS)
id->zFail = id->zPass;
// And same for sFail if there's no stencil test.
if (id->StencilTestFunc() == GE_COMP_ALWAYS)
id->sFail = id->zPass;
// Normalize REPLACE 00 to ZERO, especially if using a mask.
if (gstate.getStencilTestRef() == 0) {
if (id->SFail() == GE_STENCILOP_REPLACE)
id->sFail = GE_STENCILOP_ZERO;
if (id->ZFail() == GE_STENCILOP_REPLACE)
id->zFail = GE_STENCILOP_ZERO;
if (id->ZPass() == GE_STENCILOP_REPLACE)
id->zPass = GE_STENCILOP_ZERO;
}
// For 5551, DECR is also the same as ZERO.
if (id->FBFormat() == GE_FORMAT_5551) {
if (id->SFail() == GE_STENCILOP_DECR)
id->sFail = GE_STENCILOP_ZERO;
if (id->ZFail() == GE_STENCILOP_DECR)
id->zFail = GE_STENCILOP_ZERO;
if (id->ZPass() == GE_STENCILOP_DECR)
id->zPass = GE_STENCILOP_ZERO;
}
// Turn off stencil testing if it's doing nothing.
if (id->SFail() == GE_STENCILOP_KEEP && id->ZFail() == GE_STENCILOP_KEEP && id->ZPass() == GE_STENCILOP_KEEP) {
if (id->StencilTestFunc() == GE_COMP_ALWAYS)
id->stencilTest = false;
}
}
id->depthTestFunc = gstate.isDepthTestEnabled() ? gstate.getDepthTestFunction() : GE_COMP_ALWAYS;
@ -78,6 +126,12 @@ void ComputePixelFuncID(PixelFuncID *id) {
if (id->AlphaTestFunc() != GE_COMP_ALWAYS) {
id->alphaTestRef = gstate.getAlphaTestRef() & gstate.getAlphaTestMask();
id->hasAlphaTestMask = gstate.getAlphaTestMask() != 0xFF;
// Try to pick a more optimal variant.
id->alphaTestFunc = OptimizeRefByteCompare(id->AlphaTestFunc(), id->alphaTestRef);
if (id->alphaTestFunc == GE_COMP_ALWAYS) {
id->alphaTestRef = 0;
id->hasAlphaTestMask = false;
}
}
// If invalid (6 or 7), doesn't do any blending, so force off.

View File

@ -35,7 +35,7 @@ struct PixelFuncID {
struct {
// Warning: these are not hashed or compared for equal. Just cached values.
uint32_t colorWriteMask{};
int16_t ditherMatrix[16]{};
int8_t ditherMatrix[16]{};
} cached;
#endif

View File

@ -56,7 +56,7 @@ void RegCache::SetupABI(const std::vector<Purpose> &args, bool forceRetain) {
Add(vecArgs[i], VEC_INVALID);
// Add all other caller saved regs without purposes yet.
static const Reg genTemps[] = { X8, X9, X10, X11, X12, X13, X14, X15, X16, X17 };
static const Reg genTemps[] = { X8, X9, X10, X11, X12, X13, X14, X15 };
for (Reg r : genTemps)
Add(r, GEN_INVALID);
static const Reg vecTemps[] = { Q16, Q17, Q18, Q19, Q20, Q21, Q22, Q23 };
@ -205,6 +205,7 @@ void RegCache::Release(Reg &r, Purpose p) {
}
void RegCache::Unlock(Reg &r, Purpose p) {
_assert_msg_((p & FLAG_TEMP) == 0, "softjit Unlock() temp reg (%04X)", p);
RegStatus *status = FindReg(r, p);
if (status) {
_assert_msg_(status->locked > 0, "softjit Unlock() reg that isn't locked (%04X)", p);