softjit: Use BMI to simplify some masking.

This commit is contained in:
Unknown W. Brackets 2022-01-31 19:50:48 -08:00
parent 16dca4f69b
commit ad43380ef6

View File

@ -1631,8 +1631,12 @@ bool PixelJitCache::Jit_WriteColor(const PixelFuncID &id) {
if (maskReg != INVALID_REG) {
// Zero all other bits, then flip maskReg to clear the bits we're keeping in colorReg.
AND(16, MatR(colorOff), R(maskReg));
NOT(32, R(maskReg));
AND(32, R(colorReg), R(maskReg));
if (cpu_info.bBMI1) {
ANDN(32, colorReg, maskReg, R(colorReg));
} else {
NOT(32, R(maskReg));
AND(32, R(colorReg), R(maskReg));
}
OR(16, MatR(colorOff), R(colorReg));
} else if (fixedKeepMask == 0) {
MOV(16, MatR(colorOff), R(colorReg));
@ -1647,8 +1651,12 @@ bool PixelJitCache::Jit_WriteColor(const PixelFuncID &id) {
if (maskReg != INVALID_REG) {
// Zero all other bits, then flip maskReg to clear the bits we're keeping in colorReg.
AND(32, MatR(colorOff), R(maskReg));
NOT(32, R(maskReg));
AND(32, R(colorReg), R(maskReg));
if (cpu_info.bBMI1) {
ANDN(32, colorReg, maskReg, R(colorReg));
} else {
NOT(32, R(maskReg));
AND(32, R(colorReg), R(maskReg));
}
OR(32, MatR(colorOff), R(colorReg));
} else if (fixedKeepMask == 0) {
MOV(32, MatR(colorOff), R(colorReg));
@ -1774,8 +1782,12 @@ bool PixelJitCache::Jit_ApplyLogicOp(const PixelFuncID &id, RegCache::Reg colorR
tableValues[GE_LOGIC_AND_REVERSE] = GetCodePointer();
// Reverse memory in a temp reg so we can apply the write mask easily.
MOV(bits, R(temp1Reg), MatR(colorOff));
NOT(32, R(temp1Reg));
AND(32, R(colorReg), R(temp1Reg));
if (cpu_info.bBMI1) {
ANDN(32, colorReg, temp1Reg, R(colorReg));
} else {
NOT(32, R(temp1Reg));
AND(32, R(colorReg), R(temp1Reg));
}
// Now add in the stencil bits (must be zero before, since we used AND.)
if (stencilReg != INVALID_REG) {
OR(32, R(colorReg), R(stencilReg));
@ -1825,9 +1837,13 @@ bool PixelJitCache::Jit_ApplyLogicOp(const PixelFuncID &id, RegCache::Reg colorR
tableValues[GE_LOGIC_NOOP] = GetCodePointer();
if (stencilReg != INVALID_REG && maskReg != INVALID_REG) {
// Start by clearing masked bits from stencilReg.
NOT(32, R(maskReg));
AND(32, R(stencilReg), R(maskReg));
NOT(32, R(maskReg));
if (cpu_info.bBMI1) {
ANDN(32, stencilReg, maskReg, R(stencilReg));
} else {
NOT(32, R(maskReg));
AND(32, R(stencilReg), R(maskReg));
NOT(32, R(maskReg));
}
// Now mask out the stencil bits we're writing from memory.
OR(bits, R(maskReg), notStencilMask);
@ -1862,9 +1878,13 @@ bool PixelJitCache::Jit_ApplyLogicOp(const PixelFuncID &id, RegCache::Reg colorR
OR(32, R(colorReg), R(stencilReg));
// Clear the bits we should be masking out.
NOT(32, R(maskReg));
AND(32, R(colorReg), R(maskReg));
NOT(32, R(maskReg));
if (cpu_info.bBMI1) {
ANDN(32, colorReg, maskReg, R(colorReg));
} else {
NOT(32, R(maskReg));
AND(32, R(colorReg), R(maskReg));
NOT(32, R(maskReg));
}
// Clear all the unmasked stencil bits, so we can set our own.
OR(bits, R(maskReg), notStencilMask);
@ -1875,8 +1895,12 @@ bool PixelJitCache::Jit_ApplyLogicOp(const PixelFuncID &id, RegCache::Reg colorR
AND(bits, MatR(colorOff), notStencilMask);
} else if (maskReg != INVALID_REG) {
// Clear the bits we should be masking out.
NOT(32, R(maskReg));
AND(32, R(colorReg), R(maskReg));
if (cpu_info.bBMI1) {
ANDN(32, colorReg, maskReg, R(colorReg));
} else {
NOT(32, R(maskReg));
AND(32, R(colorReg), R(maskReg));
}
} else if (id.FBFormat() == GE_FORMAT_8888) {
// We only need to do this for 8888, the others already have 0 stencil.
AND(bits, R(colorReg), notStencilMask);
@ -1954,9 +1978,13 @@ bool PixelJitCache::Jit_ApplyLogicOp(const PixelFuncID &id, RegCache::Reg colorR
OR(32, R(colorReg), R(stencilReg));
// Clear the bits we should be masking out.
NOT(32, R(maskReg));
AND(32, R(colorReg), R(maskReg));
NOT(32, R(maskReg));
if (cpu_info.bBMI1) {
ANDN(32, colorReg, maskReg, R(colorReg));
} else {
NOT(32, R(maskReg));
AND(32, R(colorReg), R(maskReg));
NOT(32, R(maskReg));
}
// Clear all the unmasked stencil bits, so we can set our own.
OR(bits, R(maskReg), notStencilMask);