mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-27 07:20:49 +00:00
Merge pull request #15190 from unknownbrackets/softjit-cleanup
Minor cleanup to software renderer jit
This commit is contained in:
commit
676ed6c15d
@ -595,7 +595,7 @@ bool PixelJitCache::Jit_StencilAndDepthTest(const PixelFuncID &id) {
|
||||
bool success = true;
|
||||
success = success && Jit_StencilTest(id, stencilReg, maskedReg);
|
||||
if (maskedReg != stencilReg)
|
||||
regCache_.Unlock(maskedReg, RegCache::GEN_TEMP0);
|
||||
regCache_.Release(maskedReg, RegCache::GEN_TEMP0);
|
||||
|
||||
// Next up, the depth test.
|
||||
if (stencilReg == INVALID_REG) {
|
||||
@ -1382,19 +1382,18 @@ bool PixelJitCache::Jit_Dither(const PixelFuncID &id) {
|
||||
MOVSX(32, 8, valueReg, R(valueReg));
|
||||
SAR(8, R(valueReg), Imm8(4));
|
||||
#else
|
||||
// Sum up (x + y * 4) * 2 + ditherMatrix offset to valueReg.
|
||||
SHL(32, R(argXReg), Imm8(1));
|
||||
LEA(32, valueReg, MComplex(argXReg, valueReg, 8, offsetof(PixelFuncID, cached.ditherMatrix)));
|
||||
// Sum up (x + y * 4) + ditherMatrix offset to valueReg.
|
||||
LEA(32, valueReg, MComplex(argXReg, valueReg, 4, offsetof(PixelFuncID, cached.ditherMatrix)));
|
||||
|
||||
// Okay, now abuse argXReg to read the PixelFuncID pointer on the stack.
|
||||
if (regCache_.Has(RegCache::GEN_ARG_ID)) {
|
||||
X64Reg idReg = regCache_.Find(RegCache::GEN_ARG_ID);
|
||||
MOVSX(32, 16, valueReg, MRegSum(idReg, valueReg));
|
||||
MOVSX(32, 8, valueReg, MRegSum(idReg, valueReg));
|
||||
regCache_.Unlock(idReg, RegCache::GEN_ARG_ID);
|
||||
} else {
|
||||
_assert_(stackIDOffset_ != -1);
|
||||
MOV(PTRBITS, R(argXReg), MDisp(RSP, stackIDOffset_));
|
||||
MOVSX(32, 16, valueReg, MRegSum(argXReg, valueReg));
|
||||
MOVSX(32, 8, valueReg, MRegSum(argXReg, valueReg));
|
||||
}
|
||||
#endif
|
||||
if (argXReg != INVALID_REG) {
|
||||
@ -1665,7 +1664,7 @@ bool PixelJitCache::Jit_WriteColor(const PixelFuncID &id) {
|
||||
|
||||
bool PixelJitCache::Jit_ApplyLogicOp(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg maskReg) {
|
||||
X64Reg gstateReg = GetGState();
|
||||
X64Reg logicOpReg = regCache_.Alloc(RegCache::GEN_TEMP3);
|
||||
X64Reg logicOpReg = regCache_.Alloc(RegCache::GEN_TEMP4);
|
||||
MOVZX(32, 8, logicOpReg, MDisp(gstateReg, offsetof(GPUgstate, lop)));
|
||||
AND(8, R(logicOpReg), Imm8(0x0F));
|
||||
regCache_.Unlock(gstateReg, RegCache::GEN_GSTATE);
|
||||
@ -1676,7 +1675,7 @@ bool PixelJitCache::Jit_ApplyLogicOp(const PixelFuncID &id, RegCache::Reg colorR
|
||||
|
||||
// Should already be allocated.
|
||||
X64Reg colorOff = regCache_.Find(RegCache::GEN_COLOR_OFF);
|
||||
X64Reg temp1Reg = regCache_.Find(RegCache::GEN_TEMP1);
|
||||
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP5);
|
||||
|
||||
// We'll use these in several cases, so prepare.
|
||||
int bits = id.fbFormat == GE_FORMAT_8888 ? 32 : 16;
|
||||
@ -1996,8 +1995,8 @@ bool PixelJitCache::Jit_ApplyLogicOp(const PixelFuncID &id, RegCache::Reg colorR
|
||||
SetJumpTarget(fixup);
|
||||
|
||||
regCache_.Unlock(colorOff, RegCache::GEN_COLOR_OFF);
|
||||
regCache_.Unlock(temp1Reg, RegCache::GEN_TEMP1);
|
||||
regCache_.Unlock(logicOpReg, RegCache::GEN_TEMP3);
|
||||
regCache_.Release(logicOpReg, RegCache::GEN_TEMP4);
|
||||
regCache_.Release(temp1Reg, RegCache::GEN_TEMP5);
|
||||
if (stencilReg != INVALID_REG)
|
||||
regCache_.Unlock(stencilReg, RegCache::GEN_STENCIL);
|
||||
|
||||
|
@ -27,6 +27,21 @@ static_assert(sizeof(PixelFuncID) == sizeof(PixelFuncID::fullKey) + sizeof(Pixel
|
||||
static_assert(sizeof(PixelFuncID) == sizeof(PixelFuncID::fullKey), "Bad pixel func ID size");
|
||||
#endif
|
||||
|
||||
static inline GEComparison OptimizeRefByteCompare(GEComparison func, u8 ref) {
|
||||
// Not equal tests are easier.
|
||||
if (ref == 0 && func == GE_COMP_GREATER)
|
||||
return GE_COMP_NOTEQUAL;
|
||||
if (ref == 0xFF && func == GE_COMP_LESS)
|
||||
return GE_COMP_NOTEQUAL;
|
||||
|
||||
// Sometimes games pointlessly use tests like these.
|
||||
if (ref == 0 && func == GE_COMP_GEQUAL)
|
||||
return GE_COMP_ALWAYS;
|
||||
if (ref == 0xFF && func == GE_COMP_LEQUAL)
|
||||
return GE_COMP_ALWAYS;
|
||||
return func;
|
||||
}
|
||||
|
||||
void ComputePixelFuncID(PixelFuncID *id) {
|
||||
id->fullKey = 0;
|
||||
|
||||
@ -60,8 +75,8 @@ void ComputePixelFuncID(PixelFuncID *id) {
|
||||
id->depthWrite = gstate.isDepthTestEnabled() && gstate.isDepthWriteEnabled();
|
||||
|
||||
if (id->stencilTest) {
|
||||
id->stencilTestFunc = gstate.getStencilTestFunction();
|
||||
id->stencilTestRef = gstate.getStencilTestRef() & gstate.getStencilTestMask();
|
||||
id->stencilTestFunc = OptimizeRefByteCompare(gstate.getStencilTestFunction(), id->stencilTestRef);
|
||||
id->hasStencilTestMask = gstate.getStencilTestMask() != 0xFF && gstate.FrameBufFormat() != GE_FORMAT_565;
|
||||
|
||||
// Stencil can't be written on 565, and any invalid op acts like KEEP, which is 0.
|
||||
@ -71,6 +86,39 @@ void ComputePixelFuncID(PixelFuncID *id) {
|
||||
id->zFail = gstate.isDepthTestEnabled() ? gstate.getStencilOpZFail() : GE_STENCILOP_KEEP;
|
||||
if (gstate.FrameBufFormat() != GE_FORMAT_565 && gstate.getStencilOpZPass() <= GE_STENCILOP_DECR)
|
||||
id->zPass = gstate.getStencilOpZPass();
|
||||
|
||||
// Always treat zPass/zFail the same if there's no depth test.
|
||||
if (!gstate.isDepthTestEnabled() || gstate.getDepthTestFunction() == GE_COMP_ALWAYS)
|
||||
id->zFail = id->zPass;
|
||||
// And same for sFail if there's no stencil test.
|
||||
if (id->StencilTestFunc() == GE_COMP_ALWAYS)
|
||||
id->sFail = id->zPass;
|
||||
|
||||
// Normalize REPLACE 00 to ZERO, especially if using a mask.
|
||||
if (gstate.getStencilTestRef() == 0) {
|
||||
if (id->SFail() == GE_STENCILOP_REPLACE)
|
||||
id->sFail = GE_STENCILOP_ZERO;
|
||||
if (id->ZFail() == GE_STENCILOP_REPLACE)
|
||||
id->zFail = GE_STENCILOP_ZERO;
|
||||
if (id->ZPass() == GE_STENCILOP_REPLACE)
|
||||
id->zPass = GE_STENCILOP_ZERO;
|
||||
}
|
||||
|
||||
// For 5551, DECR is also the same as ZERO.
|
||||
if (id->FBFormat() == GE_FORMAT_5551) {
|
||||
if (id->SFail() == GE_STENCILOP_DECR)
|
||||
id->sFail = GE_STENCILOP_ZERO;
|
||||
if (id->ZFail() == GE_STENCILOP_DECR)
|
||||
id->zFail = GE_STENCILOP_ZERO;
|
||||
if (id->ZPass() == GE_STENCILOP_DECR)
|
||||
id->zPass = GE_STENCILOP_ZERO;
|
||||
}
|
||||
|
||||
// Turn off stencil testing if it's doing nothing.
|
||||
if (id->SFail() == GE_STENCILOP_KEEP && id->ZFail() == GE_STENCILOP_KEEP && id->ZPass() == GE_STENCILOP_KEEP) {
|
||||
if (id->StencilTestFunc() == GE_COMP_ALWAYS)
|
||||
id->stencilTest = false;
|
||||
}
|
||||
}
|
||||
|
||||
id->depthTestFunc = gstate.isDepthTestEnabled() ? gstate.getDepthTestFunction() : GE_COMP_ALWAYS;
|
||||
@ -78,6 +126,12 @@ void ComputePixelFuncID(PixelFuncID *id) {
|
||||
if (id->AlphaTestFunc() != GE_COMP_ALWAYS) {
|
||||
id->alphaTestRef = gstate.getAlphaTestRef() & gstate.getAlphaTestMask();
|
||||
id->hasAlphaTestMask = gstate.getAlphaTestMask() != 0xFF;
|
||||
// Try to pick a more optimal variant.
|
||||
id->alphaTestFunc = OptimizeRefByteCompare(id->AlphaTestFunc(), id->alphaTestRef);
|
||||
if (id->alphaTestFunc == GE_COMP_ALWAYS) {
|
||||
id->alphaTestRef = 0;
|
||||
id->hasAlphaTestMask = false;
|
||||
}
|
||||
}
|
||||
|
||||
// If invalid (6 or 7), doesn't do any blending, so force off.
|
||||
|
@ -35,7 +35,7 @@ struct PixelFuncID {
|
||||
struct {
|
||||
// Warning: these are not hashed or compared for equal. Just cached values.
|
||||
uint32_t colorWriteMask{};
|
||||
int16_t ditherMatrix[16]{};
|
||||
int8_t ditherMatrix[16]{};
|
||||
} cached;
|
||||
#endif
|
||||
|
||||
|
@ -56,7 +56,7 @@ void RegCache::SetupABI(const std::vector<Purpose> &args, bool forceRetain) {
|
||||
Add(vecArgs[i], VEC_INVALID);
|
||||
|
||||
// Add all other caller saved regs without purposes yet.
|
||||
static const Reg genTemps[] = { X8, X9, X10, X11, X12, X13, X14, X15, X16, X17 };
|
||||
static const Reg genTemps[] = { X8, X9, X10, X11, X12, X13, X14, X15 };
|
||||
for (Reg r : genTemps)
|
||||
Add(r, GEN_INVALID);
|
||||
static const Reg vecTemps[] = { Q16, Q17, Q18, Q19, Q20, Q21, Q22, Q23 };
|
||||
@ -205,6 +205,7 @@ void RegCache::Release(Reg &r, Purpose p) {
|
||||
}
|
||||
|
||||
void RegCache::Unlock(Reg &r, Purpose p) {
|
||||
_assert_msg_((p & FLAG_TEMP) == 0, "softjit Unlock() temp reg (%04X)", p);
|
||||
RegStatus *status = FindReg(r, p);
|
||||
if (status) {
|
||||
_assert_msg_(status->locked > 0, "softjit Unlock() reg that isn't locked (%04X)", p);
|
||||
|
Loading…
Reference in New Issue
Block a user