mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-26 23:10:38 +00:00
softjit: Implement min/max/absdiff blending.
Alpha not yet implemented.
This commit is contained in:
parent
771d459025
commit
7f167c3660
@ -574,6 +574,61 @@ SingleFunc PixelJitCache::GetSingle(const PixelFuncID &id) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void ComputePixelBlendState(PixelBlendState &state, const PixelFuncID &id) {
|
||||
switch (id.AlphaBlendEq()) {
|
||||
case GE_BLENDMODE_MUL_AND_ADD:
|
||||
case GE_BLENDMODE_MUL_AND_SUBTRACT:
|
||||
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
|
||||
state.usesFactors = true;
|
||||
break;
|
||||
|
||||
case GE_BLENDMODE_MIN:
|
||||
case GE_BLENDMODE_MAX:
|
||||
case GE_BLENDMODE_ABSDIFF:
|
||||
break;
|
||||
}
|
||||
|
||||
if (state.usesFactors) {
|
||||
switch (id.AlphaBlendSrc()) {
|
||||
case GE_SRCBLEND_SRCALPHA:
|
||||
case GE_SRCBLEND_INVSRCALPHA:
|
||||
case GE_SRCBLEND_DOUBLESRCALPHA:
|
||||
case GE_SRCBLEND_DOUBLEINVSRCALPHA:
|
||||
state.srcFactorUsesSrcAlpha = true;
|
||||
break;
|
||||
|
||||
case GE_SRCBLEND_DSTALPHA:
|
||||
case GE_SRCBLEND_INVDSTALPHA:
|
||||
case GE_SRCBLEND_DOUBLEDSTALPHA:
|
||||
case GE_SRCBLEND_DOUBLEINVDSTALPHA:
|
||||
state.srcFactorUsesDstAlpha = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (id.AlphaBlendDst()) {
|
||||
case GE_DSTBLEND_SRCALPHA:
|
||||
case GE_DSTBLEND_INVSRCALPHA:
|
||||
case GE_DSTBLEND_DOUBLESRCALPHA:
|
||||
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
|
||||
state.dstFactorUsesSrcAlpha = true;
|
||||
break;
|
||||
|
||||
case GE_DSTBLEND_DSTALPHA:
|
||||
case GE_DSTBLEND_INVDSTALPHA:
|
||||
case GE_DSTBLEND_DOUBLEDSTALPHA:
|
||||
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
|
||||
state.dstFactorUsesDstAlpha = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PixelRegCache::Reset() {
|
||||
regs.clear();
|
||||
}
|
||||
|
@ -65,7 +65,8 @@ struct PixelRegCache {
|
||||
INVALID,
|
||||
GSTATE,
|
||||
CONST_BASE,
|
||||
ALPHA,
|
||||
SRC_ALPHA,
|
||||
DST_ALPHA,
|
||||
STENCIL,
|
||||
COLOR_OFF,
|
||||
DEPTH_OFF,
|
||||
@ -115,6 +116,15 @@ private:
|
||||
std::vector<RegStatus> regs;
|
||||
};
|
||||
|
||||
struct PixelBlendState {
|
||||
bool usesFactors = false;
|
||||
bool srcFactorUsesSrcAlpha = false;
|
||||
bool srcFactorUsesDstAlpha = false;
|
||||
bool dstFactorUsesSrcAlpha = false;
|
||||
bool dstFactorUsesDstAlpha = false;
|
||||
};
|
||||
void ComputePixelBlendState(PixelBlendState &state, const PixelFuncID &id);
|
||||
|
||||
#if PPSSPP_ARCH(ARM)
|
||||
class PixelJitCache : public ArmGen::ARMXCodeBlock {
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
@ -168,6 +178,9 @@ private:
|
||||
bool Jit_ConvertTo565(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg);
|
||||
bool Jit_ConvertTo5551(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha);
|
||||
bool Jit_ConvertTo4444(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha);
|
||||
bool Jit_ConvertFrom565(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg);
|
||||
bool Jit_ConvertFrom5551(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha);
|
||||
bool Jit_ConvertFrom4444(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha);
|
||||
|
||||
std::unordered_map<PixelFuncID, SingleFunc> cache_;
|
||||
std::unordered_map<PixelFuncID, const u8 *> addresses_;
|
||||
|
@ -344,10 +344,10 @@ bool PixelJitCache::Jit_AlphaTest(const PixelFuncID &id) {
|
||||
|
||||
// Load alpha into its own general reg.
|
||||
X64Reg alphaReg;
|
||||
if (regCache_.Has(PixelRegCache::ALPHA, PixelRegCache::T_GEN)) {
|
||||
alphaReg = regCache_.Find(PixelRegCache::ALPHA, PixelRegCache::T_GEN);
|
||||
if (regCache_.Has(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN)) {
|
||||
alphaReg = regCache_.Find(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN);
|
||||
} else {
|
||||
alphaReg = regCache_.Alloc(PixelRegCache::ALPHA, PixelRegCache::T_GEN);
|
||||
alphaReg = regCache_.Alloc(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN);
|
||||
MOVD_xmm(R(alphaReg), argColorReg);
|
||||
SHR(32, R(alphaReg), Imm8(24));
|
||||
}
|
||||
@ -499,10 +499,10 @@ bool PixelJitCache::Jit_ApplyFog(const PixelFuncID &id) {
|
||||
|
||||
// Save A so we can put it back, we don't "fog" A.
|
||||
X64Reg alphaReg;
|
||||
if (regCache_.Has(PixelRegCache::ALPHA, PixelRegCache::T_GEN)) {
|
||||
alphaReg = regCache_.Find(PixelRegCache::ALPHA, PixelRegCache::T_GEN);
|
||||
if (regCache_.Has(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN)) {
|
||||
alphaReg = regCache_.Find(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN);
|
||||
} else {
|
||||
alphaReg = regCache_.Alloc(PixelRegCache::ALPHA, PixelRegCache::T_GEN);
|
||||
alphaReg = regCache_.Alloc(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN);
|
||||
PEXTRW(alphaReg, argColorReg, 3);
|
||||
}
|
||||
|
||||
@ -951,8 +951,97 @@ bool PixelJitCache::Jit_AlphaBlend(const PixelFuncID &id) {
|
||||
if (!id.alphaBlend)
|
||||
return true;
|
||||
|
||||
// TODO: Will need old color in some cases, too.
|
||||
return false;
|
||||
// Check if we need to load and prep factors.
|
||||
PixelBlendState blendState;
|
||||
ComputePixelBlendState(blendState, id);
|
||||
|
||||
bool success = true;
|
||||
|
||||
// Step 1: Load and expand dest color.
|
||||
X64Reg dstReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_VEC);
|
||||
X64Reg colorOff = GetColorOff(id);
|
||||
if (id.FBFormat() == GE_FORMAT_8888) {
|
||||
MOVD_xmm(dstReg, MatR(colorOff));
|
||||
regCache_.Unlock(colorOff, PixelRegCache::T_GEN);
|
||||
} else {
|
||||
X64Reg dstGenReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_GEN);
|
||||
MOVZX(32, 16, dstGenReg, MatR(colorOff));
|
||||
regCache_.Unlock(colorOff, PixelRegCache::T_GEN);
|
||||
|
||||
bool keepAlpha = blendState.srcFactorUsesDstAlpha || blendState.dstFactorUsesDstAlpha;
|
||||
X64Reg temp1Reg = regCache_.Alloc(PixelRegCache::TEMP1, PixelRegCache::T_GEN);
|
||||
X64Reg temp2Reg = regCache_.Alloc(PixelRegCache::TEMP2, PixelRegCache::T_GEN);
|
||||
|
||||
switch (id.fbFormat) {
|
||||
case GE_FORMAT_565:
|
||||
success = success && Jit_ConvertFrom565(id, dstGenReg, temp1Reg, temp2Reg);
|
||||
break;
|
||||
|
||||
case GE_FORMAT_5551:
|
||||
success = success && Jit_ConvertFrom5551(id, dstGenReg, temp1Reg, temp2Reg, keepAlpha);
|
||||
break;
|
||||
|
||||
case GE_FORMAT_4444:
|
||||
success = success && Jit_ConvertFrom4444(id, dstGenReg, temp1Reg, temp2Reg, keepAlpha);
|
||||
|
||||
break;
|
||||
|
||||
case GE_FORMAT_8888:
|
||||
break;
|
||||
}
|
||||
|
||||
MOVD_xmm(dstReg, R(dstGenReg));
|
||||
|
||||
regCache_.Release(temp1Reg, PixelRegCache::T_GEN);
|
||||
regCache_.Release(temp2Reg, PixelRegCache::T_GEN);
|
||||
regCache_.Release(dstGenReg, PixelRegCache::T_GEN);
|
||||
}
|
||||
|
||||
// Step 2: Load and apply factors.
|
||||
if (blendState.usesFactors) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Step 3: Apply equation.
|
||||
// Note: below, we completely ignore what happens to the alpha bits.
|
||||
// It won't matter, since we'll replace those with stencil anyway.
|
||||
X64Reg tempReg = regCache_.Alloc(PixelRegCache::TEMP1, PixelRegCache::T_VEC);
|
||||
switch (id.AlphaBlendEq()) {
|
||||
case GE_BLENDMODE_MUL_AND_ADD:
|
||||
// TODO
|
||||
break;
|
||||
|
||||
case GE_BLENDMODE_MUL_AND_SUBTRACT:
|
||||
// TODO
|
||||
break;
|
||||
|
||||
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
|
||||
// TODO
|
||||
break;
|
||||
|
||||
case GE_BLENDMODE_MIN:
|
||||
PMINUB(argColorReg, R(dstReg));
|
||||
break;
|
||||
|
||||
case GE_BLENDMODE_MAX:
|
||||
PMAXUB(argColorReg, R(dstReg));
|
||||
break;
|
||||
|
||||
case GE_BLENDMODE_ABSDIFF:
|
||||
// Calculate A=(dst-src < 0 ? 0 : dst-src) and B=(src-dst < 0 ? 0 : src-dst)...
|
||||
MOVDQA(tempReg, R(dstReg));
|
||||
PSUBUSB(tempReg, R(argColorReg));
|
||||
PSUBUSB(argColorReg, R(dstReg));
|
||||
|
||||
// Now, one of those must be zero, and the other one is the result (could also be zero.)
|
||||
POR(argColorReg, R(tempReg));
|
||||
break;
|
||||
}
|
||||
|
||||
regCache_.Release(tempReg, PixelRegCache::T_VEC);
|
||||
regCache_.Release(dstReg, PixelRegCache::T_VEC);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PixelJitCache::Jit_Dither(const PixelFuncID &id) {
|
||||
@ -1654,6 +1743,113 @@ bool PixelJitCache::Jit_ConvertTo4444(const PixelFuncID &id, PixelRegCache::Reg
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PixelJitCache::Jit_ConvertFrom565(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg) {
|
||||
// Filter out red only into temp1.
|
||||
MOV(32, R(temp1Reg), R(colorReg));
|
||||
AND(16, R(temp1Reg), Imm16(0x1F << 0));
|
||||
// Move it left to the top of the 8 bits.
|
||||
SHL(32, R(temp1Reg), Imm8(3));
|
||||
|
||||
// Now we bring in blue, since it's also 5 like red.
|
||||
MOV(32, R(temp2Reg), R(colorReg));
|
||||
AND(16, R(temp2Reg), Imm16(0x1F << 11));
|
||||
// Shift blue into place, 8 left (at 19), and merge back to temp1.
|
||||
SHL(32, R(temp2Reg), Imm8(8));
|
||||
OR(32, R(temp1Reg), R(temp2Reg));
|
||||
|
||||
// Make a copy back in temp2, and shift left 1 so we can swizzle together with G.
|
||||
OR(32, R(temp2Reg), R(temp1Reg));
|
||||
SHL(32, R(temp2Reg), Imm8(1));
|
||||
|
||||
// We go to green last because it's the different one. Put it in place.
|
||||
AND(16, R(colorReg), Imm16(0x3F << 5));
|
||||
SHL(32, R(colorReg), Imm8(5));
|
||||
// Combine with temp2 (for swizzling), then merge in temp1 (R+B pre-swizzle.)
|
||||
OR(32, R(temp2Reg), R(colorReg));
|
||||
OR(32, R(colorReg), R(temp1Reg));
|
||||
|
||||
// Now shift and mask temp2 for swizzle.
|
||||
SHR(32, R(temp2Reg), Imm8(6));
|
||||
AND(32, R(temp2Reg), Imm32(0x00070307));
|
||||
// And then OR that in too. We're done.
|
||||
OR(32, R(colorReg), R(temp2Reg));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PixelJitCache::Jit_ConvertFrom5551(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha) {
|
||||
// Filter out red only into temp1.
|
||||
MOV(32, R(temp1Reg), R(colorReg));
|
||||
AND(16, R(temp1Reg), Imm16(0x1F << 0));
|
||||
// Move it left to the top of the 8 bits.
|
||||
SHL(32, R(temp1Reg), Imm8(3));
|
||||
|
||||
// Add in green and shift into place (top bits.)
|
||||
MOV(32, R(temp2Reg), R(colorReg));
|
||||
AND(16, R(temp2Reg), Imm16(0x1F << 5));
|
||||
SHL(32, R(temp2Reg), Imm8(6));
|
||||
OR(32, R(temp1Reg), R(temp2Reg));
|
||||
|
||||
if (keepAlpha) {
|
||||
// Now take blue and alpha together.
|
||||
AND(16, R(colorReg), Imm16(0x8000 | (0x1F << 10)));
|
||||
// We move all the way left, then sign extend right to expand alpha.
|
||||
SHL(32, R(colorReg), Imm8(16));
|
||||
SAR(32, R(colorReg), Imm8(7));
|
||||
} else {
|
||||
AND(16, R(colorReg), Imm16(0x1F << 10));
|
||||
SHL(32, R(colorReg), Imm8(9));
|
||||
}
|
||||
|
||||
// Combine both together, we still need to swizzle.
|
||||
OR(32, R(colorReg), R(temp1Reg));
|
||||
OR(32, R(temp1Reg), R(colorReg));
|
||||
// Now for swizzle, we'll mask carefully to avoid overflow.
|
||||
SHR(32, R(temp1Reg), Imm8(5));
|
||||
AND(32, R(temp1Reg), Imm32(0x00070707));
|
||||
|
||||
// Then finally merge in the swizzle bits.
|
||||
OR(32, R(colorReg), R(temp1Reg));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PixelJitCache::Jit_ConvertFrom4444(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha) {
|
||||
// Move red into position within temp1.
|
||||
MOV(32, R(temp1Reg), R(colorReg));
|
||||
AND(16, R(temp1Reg), Imm16(0xF << 0));
|
||||
SHL(32, R(temp1Reg), Imm8(4));
|
||||
|
||||
// Green is just as simple.
|
||||
MOV(32, R(temp2Reg), R(colorReg));
|
||||
AND(16, R(temp2Reg), Imm16(0xF << 4));
|
||||
SHL(32, R(temp2Reg), Imm8(8));
|
||||
OR(32, R(temp1Reg), R(temp2Reg));
|
||||
|
||||
// Blue isn't last this time, but it's next.
|
||||
MOV(32, R(temp2Reg), R(colorReg));
|
||||
AND(16, R(temp2Reg), Imm16(0xF << 8));
|
||||
SHL(32, R(temp2Reg), Imm8(12));
|
||||
OR(32, R(temp1Reg), R(temp2Reg));
|
||||
|
||||
if (keepAlpha) {
|
||||
// Last but not least, alpha.
|
||||
AND(16, R(colorReg), Imm16(0xF << 12));
|
||||
SHL(32, R(colorReg), Imm8(16));
|
||||
OR(32, R(colorReg), R(temp1Reg));
|
||||
|
||||
// Copy to temp1 again for swizzling.
|
||||
OR(32, R(temp1Reg), R(colorReg));
|
||||
} else {
|
||||
// Overwrite colorReg (we need temp1 as a copy anyway.)
|
||||
MOV(32, R(colorReg), R(temp1Reg));
|
||||
}
|
||||
|
||||
// Masking isn't necessary here since everything is 4 wide.
|
||||
SHR(32, R(temp1Reg), Imm8(4));
|
||||
OR(32, R(colorReg), R(temp1Reg));
|
||||
return true;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user