softjit: Implement min/max/absdiff blending.

Alpha not yet implemented.
This commit is contained in:
Unknown W. Brackets 2021-11-25 20:22:41 -08:00
parent 771d459025
commit 7f167c3660
3 changed files with 273 additions and 9 deletions

View File

@ -574,6 +574,61 @@ SingleFunc PixelJitCache::GetSingle(const PixelFuncID &id) {
return nullptr;
}
void ComputePixelBlendState(PixelBlendState &state, const PixelFuncID &id) {
switch (id.AlphaBlendEq()) {
case GE_BLENDMODE_MUL_AND_ADD:
case GE_BLENDMODE_MUL_AND_SUBTRACT:
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
state.usesFactors = true;
break;
case GE_BLENDMODE_MIN:
case GE_BLENDMODE_MAX:
case GE_BLENDMODE_ABSDIFF:
break;
}
if (state.usesFactors) {
switch (id.AlphaBlendSrc()) {
case GE_SRCBLEND_SRCALPHA:
case GE_SRCBLEND_INVSRCALPHA:
case GE_SRCBLEND_DOUBLESRCALPHA:
case GE_SRCBLEND_DOUBLEINVSRCALPHA:
state.srcFactorUsesSrcAlpha = true;
break;
case GE_SRCBLEND_DSTALPHA:
case GE_SRCBLEND_INVDSTALPHA:
case GE_SRCBLEND_DOUBLEDSTALPHA:
case GE_SRCBLEND_DOUBLEINVDSTALPHA:
state.srcFactorUsesDstAlpha = true;
break;
default:
break;
}
switch (id.AlphaBlendDst()) {
case GE_DSTBLEND_SRCALPHA:
case GE_DSTBLEND_INVSRCALPHA:
case GE_DSTBLEND_DOUBLESRCALPHA:
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
state.dstFactorUsesSrcAlpha = true;
break;
case GE_DSTBLEND_DSTALPHA:
case GE_DSTBLEND_INVDSTALPHA:
case GE_DSTBLEND_DOUBLEDSTALPHA:
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
state.dstFactorUsesDstAlpha = true;
break;
default:
break;
}
}
}
void PixelRegCache::Reset() {
regs.clear();
}

View File

@ -65,7 +65,8 @@ struct PixelRegCache {
INVALID,
GSTATE,
CONST_BASE,
ALPHA,
SRC_ALPHA,
DST_ALPHA,
STENCIL,
COLOR_OFF,
DEPTH_OFF,
@ -115,6 +116,15 @@ private:
std::vector<RegStatus> regs;
};
struct PixelBlendState {
bool usesFactors = false;
bool srcFactorUsesSrcAlpha = false;
bool srcFactorUsesDstAlpha = false;
bool dstFactorUsesSrcAlpha = false;
bool dstFactorUsesDstAlpha = false;
};
void ComputePixelBlendState(PixelBlendState &state, const PixelFuncID &id);
#if PPSSPP_ARCH(ARM)
class PixelJitCache : public ArmGen::ARMXCodeBlock {
#elif PPSSPP_ARCH(ARM64)
@ -168,6 +178,9 @@ private:
bool Jit_ConvertTo565(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg);
bool Jit_ConvertTo5551(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha);
bool Jit_ConvertTo4444(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha);
bool Jit_ConvertFrom565(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg);
bool Jit_ConvertFrom5551(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha);
bool Jit_ConvertFrom4444(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha);
std::unordered_map<PixelFuncID, SingleFunc> cache_;
std::unordered_map<PixelFuncID, const u8 *> addresses_;

View File

@ -344,10 +344,10 @@ bool PixelJitCache::Jit_AlphaTest(const PixelFuncID &id) {
// Load alpha into its own general reg.
X64Reg alphaReg;
if (regCache_.Has(PixelRegCache::ALPHA, PixelRegCache::T_GEN)) {
alphaReg = regCache_.Find(PixelRegCache::ALPHA, PixelRegCache::T_GEN);
if (regCache_.Has(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN)) {
alphaReg = regCache_.Find(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN);
} else {
alphaReg = regCache_.Alloc(PixelRegCache::ALPHA, PixelRegCache::T_GEN);
alphaReg = regCache_.Alloc(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN);
MOVD_xmm(R(alphaReg), argColorReg);
SHR(32, R(alphaReg), Imm8(24));
}
@ -499,10 +499,10 @@ bool PixelJitCache::Jit_ApplyFog(const PixelFuncID &id) {
// Save A so we can put it back, we don't "fog" A.
X64Reg alphaReg;
if (regCache_.Has(PixelRegCache::ALPHA, PixelRegCache::T_GEN)) {
alphaReg = regCache_.Find(PixelRegCache::ALPHA, PixelRegCache::T_GEN);
if (regCache_.Has(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN)) {
alphaReg = regCache_.Find(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN);
} else {
alphaReg = regCache_.Alloc(PixelRegCache::ALPHA, PixelRegCache::T_GEN);
alphaReg = regCache_.Alloc(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN);
PEXTRW(alphaReg, argColorReg, 3);
}
@ -951,8 +951,97 @@ bool PixelJitCache::Jit_AlphaBlend(const PixelFuncID &id) {
if (!id.alphaBlend)
return true;
// TODO: Will need old color in some cases, too.
return false;
// Check if we need to load and prep factors.
PixelBlendState blendState;
ComputePixelBlendState(blendState, id);
bool success = true;
// Step 1: Load and expand dest color.
X64Reg dstReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_VEC);
X64Reg colorOff = GetColorOff(id);
if (id.FBFormat() == GE_FORMAT_8888) {
MOVD_xmm(dstReg, MatR(colorOff));
regCache_.Unlock(colorOff, PixelRegCache::T_GEN);
} else {
X64Reg dstGenReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_GEN);
MOVZX(32, 16, dstGenReg, MatR(colorOff));
regCache_.Unlock(colorOff, PixelRegCache::T_GEN);
bool keepAlpha = blendState.srcFactorUsesDstAlpha || blendState.dstFactorUsesDstAlpha;
X64Reg temp1Reg = regCache_.Alloc(PixelRegCache::TEMP1, PixelRegCache::T_GEN);
X64Reg temp2Reg = regCache_.Alloc(PixelRegCache::TEMP2, PixelRegCache::T_GEN);
switch (id.fbFormat) {
case GE_FORMAT_565:
success = success && Jit_ConvertFrom565(id, dstGenReg, temp1Reg, temp2Reg);
break;
case GE_FORMAT_5551:
success = success && Jit_ConvertFrom5551(id, dstGenReg, temp1Reg, temp2Reg, keepAlpha);
break;
case GE_FORMAT_4444:
success = success && Jit_ConvertFrom4444(id, dstGenReg, temp1Reg, temp2Reg, keepAlpha);
break;
case GE_FORMAT_8888:
break;
}
MOVD_xmm(dstReg, R(dstGenReg));
regCache_.Release(temp1Reg, PixelRegCache::T_GEN);
regCache_.Release(temp2Reg, PixelRegCache::T_GEN);
regCache_.Release(dstGenReg, PixelRegCache::T_GEN);
}
// Step 2: Load and apply factors.
if (blendState.usesFactors) {
return false;
}
// Step 3: Apply equation.
// Note: below, we completely ignore what happens to the alpha bits.
// It won't matter, since we'll replace those with stencil anyway.
X64Reg tempReg = regCache_.Alloc(PixelRegCache::TEMP1, PixelRegCache::T_VEC);
switch (id.AlphaBlendEq()) {
case GE_BLENDMODE_MUL_AND_ADD:
// TODO
break;
case GE_BLENDMODE_MUL_AND_SUBTRACT:
// TODO
break;
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
// TODO
break;
case GE_BLENDMODE_MIN:
PMINUB(argColorReg, R(dstReg));
break;
case GE_BLENDMODE_MAX:
PMAXUB(argColorReg, R(dstReg));
break;
case GE_BLENDMODE_ABSDIFF:
// Calculate A=(dst-src < 0 ? 0 : dst-src) and B=(src-dst < 0 ? 0 : src-dst)...
MOVDQA(tempReg, R(dstReg));
PSUBUSB(tempReg, R(argColorReg));
PSUBUSB(argColorReg, R(dstReg));
// Now, one of those must be zero, and the other one is the result (could also be zero.)
POR(argColorReg, R(tempReg));
break;
}
regCache_.Release(tempReg, PixelRegCache::T_VEC);
regCache_.Release(dstReg, PixelRegCache::T_VEC);
return true;
}
bool PixelJitCache::Jit_Dither(const PixelFuncID &id) {
@ -1654,6 +1743,113 @@ bool PixelJitCache::Jit_ConvertTo4444(const PixelFuncID &id, PixelRegCache::Reg
return true;
}
bool PixelJitCache::Jit_ConvertFrom565(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg) {
// Filter out red only into temp1.
MOV(32, R(temp1Reg), R(colorReg));
AND(16, R(temp1Reg), Imm16(0x1F << 0));
// Move it left to the top of the 8 bits.
SHL(32, R(temp1Reg), Imm8(3));
// Now we bring in blue, since it's also 5 like red.
MOV(32, R(temp2Reg), R(colorReg));
AND(16, R(temp2Reg), Imm16(0x1F << 11));
// Shift blue into place, 8 left (at 19), and merge back to temp1.
SHL(32, R(temp2Reg), Imm8(8));
OR(32, R(temp1Reg), R(temp2Reg));
// Make a copy back in temp2, and shift left 1 so we can swizzle together with G.
OR(32, R(temp2Reg), R(temp1Reg));
SHL(32, R(temp2Reg), Imm8(1));
// We go to green last because it's the different one. Put it in place.
AND(16, R(colorReg), Imm16(0x3F << 5));
SHL(32, R(colorReg), Imm8(5));
// Combine with temp2 (for swizzling), then merge in temp1 (R+B pre-swizzle.)
OR(32, R(temp2Reg), R(colorReg));
OR(32, R(colorReg), R(temp1Reg));
// Now shift and mask temp2 for swizzle.
SHR(32, R(temp2Reg), Imm8(6));
AND(32, R(temp2Reg), Imm32(0x00070307));
// And then OR that in too. We're done.
OR(32, R(colorReg), R(temp2Reg));
return true;
}
bool PixelJitCache::Jit_ConvertFrom5551(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha) {
// Filter out red only into temp1.
MOV(32, R(temp1Reg), R(colorReg));
AND(16, R(temp1Reg), Imm16(0x1F << 0));
// Move it left to the top of the 8 bits.
SHL(32, R(temp1Reg), Imm8(3));
// Add in green and shift into place (top bits.)
MOV(32, R(temp2Reg), R(colorReg));
AND(16, R(temp2Reg), Imm16(0x1F << 5));
SHL(32, R(temp2Reg), Imm8(6));
OR(32, R(temp1Reg), R(temp2Reg));
if (keepAlpha) {
// Now take blue and alpha together.
AND(16, R(colorReg), Imm16(0x8000 | (0x1F << 10)));
// We move all the way left, then sign extend right to expand alpha.
SHL(32, R(colorReg), Imm8(16));
SAR(32, R(colorReg), Imm8(7));
} else {
AND(16, R(colorReg), Imm16(0x1F << 10));
SHL(32, R(colorReg), Imm8(9));
}
// Combine both together, we still need to swizzle.
OR(32, R(colorReg), R(temp1Reg));
OR(32, R(temp1Reg), R(colorReg));
// Now for swizzle, we'll mask carefully to avoid overflow.
SHR(32, R(temp1Reg), Imm8(5));
AND(32, R(temp1Reg), Imm32(0x00070707));
// Then finally merge in the swizzle bits.
OR(32, R(colorReg), R(temp1Reg));
return true;
}
bool PixelJitCache::Jit_ConvertFrom4444(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha) {
// Move red into position within temp1.
MOV(32, R(temp1Reg), R(colorReg));
AND(16, R(temp1Reg), Imm16(0xF << 0));
SHL(32, R(temp1Reg), Imm8(4));
// Green is just as simple.
MOV(32, R(temp2Reg), R(colorReg));
AND(16, R(temp2Reg), Imm16(0xF << 4));
SHL(32, R(temp2Reg), Imm8(8));
OR(32, R(temp1Reg), R(temp2Reg));
// Blue isn't last this time, but it's next.
MOV(32, R(temp2Reg), R(colorReg));
AND(16, R(temp2Reg), Imm16(0xF << 8));
SHL(32, R(temp2Reg), Imm8(12));
OR(32, R(temp1Reg), R(temp2Reg));
if (keepAlpha) {
// Last but not least, alpha.
AND(16, R(colorReg), Imm16(0xF << 12));
SHL(32, R(colorReg), Imm8(16));
OR(32, R(colorReg), R(temp1Reg));
// Copy to temp1 again for swizzling.
OR(32, R(temp1Reg), R(colorReg));
} else {
// Overwrite colorReg (we need temp1 as a copy anyway.)
MOV(32, R(colorReg), R(temp1Reg));
}
// Masking isn't necessary here since everything is 4 wide.
SHR(32, R(temp1Reg), Imm8(4));
OR(32, R(colorReg), R(temp1Reg));
return true;
}
};
#endif