softjit: Implement min/max/absdiff blending.

Alpha not yet implemented.
2025-02-18 21:27:52 +00:00 · 2021-11-25 20:22:41 -08:00 · 2021-11-25 20:22:41 -08:00 · 7f167c3660
commit 7f167c3660
parent 771d459025
3 changed files with 273 additions and 9 deletions
--- a/GPU/Software/DrawPixel.cpp
+++ b/GPU/Software/DrawPixel.cpp
@ -574,6 +574,61 @@ SingleFunc PixelJitCache::GetSingle(const PixelFuncID &id) {
 	return nullptr;
 }

+void ComputePixelBlendState(PixelBlendState &state, const PixelFuncID &id) {
+	switch (id.AlphaBlendEq()) {
+	case GE_BLENDMODE_MUL_AND_ADD:
+	case GE_BLENDMODE_MUL_AND_SUBTRACT:
+	case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
+		state.usesFactors = true;
+		break;
+
+	case GE_BLENDMODE_MIN:
+	case GE_BLENDMODE_MAX:
+	case GE_BLENDMODE_ABSDIFF:
+		break;
+	}
+
+	if (state.usesFactors) {
+		switch (id.AlphaBlendSrc()) {
+		case GE_SRCBLEND_SRCALPHA:
+		case GE_SRCBLEND_INVSRCALPHA:
+		case GE_SRCBLEND_DOUBLESRCALPHA:
+		case GE_SRCBLEND_DOUBLEINVSRCALPHA:
+			state.srcFactorUsesSrcAlpha = true;
+			break;
+
+		case GE_SRCBLEND_DSTALPHA:
+		case GE_SRCBLEND_INVDSTALPHA:
+		case GE_SRCBLEND_DOUBLEDSTALPHA:
+		case GE_SRCBLEND_DOUBLEINVDSTALPHA:
+			state.srcFactorUsesDstAlpha = true;
+			break;
+
+		default:
+			break;
+		}
+
+		switch (id.AlphaBlendDst()) {
+		case GE_DSTBLEND_SRCALPHA:
+		case GE_DSTBLEND_INVSRCALPHA:
+		case GE_DSTBLEND_DOUBLESRCALPHA:
+		case GE_DSTBLEND_DOUBLEINVSRCALPHA:
+			state.dstFactorUsesSrcAlpha = true;
+			break;
+
+		case GE_DSTBLEND_DSTALPHA:
+		case GE_DSTBLEND_INVDSTALPHA:
+		case GE_DSTBLEND_DOUBLEDSTALPHA:
+		case GE_DSTBLEND_DOUBLEINVDSTALPHA:
+			state.dstFactorUsesDstAlpha = true;
+			break;
+
+		default:
+			break;
+		}
+	}
+}
+
 void PixelRegCache::Reset() {
 	regs.clear();
 }
--- a/GPU/Software/DrawPixel.h
+++ b/GPU/Software/DrawPixel.h
@ -65,7 +65,8 @@ struct PixelRegCache {
 		INVALID,
 		GSTATE,
 		CONST_BASE,
-		ALPHA,
+		SRC_ALPHA,
+		DST_ALPHA,
 		STENCIL,
 		COLOR_OFF,
 		DEPTH_OFF,
@ -115,6 +116,15 @@ private:
 	std::vector<RegStatus> regs;
 };

+struct PixelBlendState {
+	bool usesFactors = false;
+	bool srcFactorUsesSrcAlpha = false;
+	bool srcFactorUsesDstAlpha = false;
+	bool dstFactorUsesSrcAlpha = false;
+	bool dstFactorUsesDstAlpha = false;
+};
+void ComputePixelBlendState(PixelBlendState &state, const PixelFuncID &id);
+
 #if PPSSPP_ARCH(ARM)
 class PixelJitCache : public ArmGen::ARMXCodeBlock {
 #elif PPSSPP_ARCH(ARM64)
@ -168,6 +178,9 @@ private:
 	bool Jit_ConvertTo565(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg);
 	bool Jit_ConvertTo5551(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha);
 	bool Jit_ConvertTo4444(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha);
+	bool Jit_ConvertFrom565(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg);
+	bool Jit_ConvertFrom5551(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha);
+	bool Jit_ConvertFrom4444(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha);

 	std::unordered_map<PixelFuncID, SingleFunc> cache_;
 	std::unordered_map<PixelFuncID, const u8 *> addresses_;
--- a/GPU/Software/DrawPixelX86.cpp
+++ b/GPU/Software/DrawPixelX86.cpp
@ -344,10 +344,10 @@ bool PixelJitCache::Jit_AlphaTest(const PixelFuncID &id) {

 	// Load alpha into its own general reg.
 	X64Reg alphaReg;
-	if (regCache_.Has(PixelRegCache::ALPHA, PixelRegCache::T_GEN)) {
-		alphaReg = regCache_.Find(PixelRegCache::ALPHA, PixelRegCache::T_GEN);
+	if (regCache_.Has(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN)) {
+		alphaReg = regCache_.Find(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN);
 	} else {
-		alphaReg = regCache_.Alloc(PixelRegCache::ALPHA, PixelRegCache::T_GEN);
+		alphaReg = regCache_.Alloc(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN);
 		MOVD_xmm(R(alphaReg), argColorReg);
 		SHR(32, R(alphaReg), Imm8(24));
 	}
@ -499,10 +499,10 @@ bool PixelJitCache::Jit_ApplyFog(const PixelFuncID &id) {

 	// Save A so we can put it back, we don't "fog" A.
 	X64Reg alphaReg;
-	if (regCache_.Has(PixelRegCache::ALPHA, PixelRegCache::T_GEN)) {
-		alphaReg = regCache_.Find(PixelRegCache::ALPHA, PixelRegCache::T_GEN);
+	if (regCache_.Has(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN)) {
+		alphaReg = regCache_.Find(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN);
 	} else {
-		alphaReg = regCache_.Alloc(PixelRegCache::ALPHA, PixelRegCache::T_GEN);
+		alphaReg = regCache_.Alloc(PixelRegCache::SRC_ALPHA, PixelRegCache::T_GEN);
 		PEXTRW(alphaReg, argColorReg, 3);
 	}

@ -951,8 +951,97 @@ bool PixelJitCache::Jit_AlphaBlend(const PixelFuncID &id) {
 	if (!id.alphaBlend)
 		return true;

-	// TODO: Will need old color in some cases, too.
-	return false;
+	// Check if we need to load and prep factors.
+	PixelBlendState blendState;
+	ComputePixelBlendState(blendState, id);
+
+	bool success = true;
+
+	// Step 1: Load and expand dest color.
+	X64Reg dstReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_VEC);
+	X64Reg colorOff = GetColorOff(id);
+	if (id.FBFormat() == GE_FORMAT_8888) {
+		MOVD_xmm(dstReg, MatR(colorOff));
+		regCache_.Unlock(colorOff, PixelRegCache::T_GEN);
+	} else {
+		X64Reg dstGenReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_GEN);
+		MOVZX(32, 16, dstGenReg, MatR(colorOff));
+		regCache_.Unlock(colorOff, PixelRegCache::T_GEN);
+
+		bool keepAlpha = blendState.srcFactorUsesDstAlpha || blendState.dstFactorUsesDstAlpha;
+		X64Reg temp1Reg = regCache_.Alloc(PixelRegCache::TEMP1, PixelRegCache::T_GEN);
+		X64Reg temp2Reg = regCache_.Alloc(PixelRegCache::TEMP2, PixelRegCache::T_GEN);
+
+		switch (id.fbFormat) {
+		case GE_FORMAT_565:
+			success = success && Jit_ConvertFrom565(id, dstGenReg, temp1Reg, temp2Reg);
+			break;
+
+		case GE_FORMAT_5551:
+			success = success && Jit_ConvertFrom5551(id, dstGenReg, temp1Reg, temp2Reg, keepAlpha);
+			break;
+
+		case GE_FORMAT_4444:
+			success = success && Jit_ConvertFrom4444(id, dstGenReg, temp1Reg, temp2Reg, keepAlpha);
+
+			break;
+
+		case GE_FORMAT_8888:
+			break;
+		}
+
+		MOVD_xmm(dstReg, R(dstGenReg));
+
+		regCache_.Release(temp1Reg, PixelRegCache::T_GEN);
+		regCache_.Release(temp2Reg, PixelRegCache::T_GEN);
+		regCache_.Release(dstGenReg, PixelRegCache::T_GEN);
+	}
+
+	// Step 2: Load and apply factors.
+	if (blendState.usesFactors) {
+		return false;
+	}
+
+	// Step 3: Apply equation.
+	// Note: below, we completely ignore what happens to the alpha bits.
+	// It won't matter, since we'll replace those with stencil anyway.
+	X64Reg tempReg = regCache_.Alloc(PixelRegCache::TEMP1, PixelRegCache::T_VEC);
+	switch (id.AlphaBlendEq()) {
+	case GE_BLENDMODE_MUL_AND_ADD:
+		// TODO
+		break;
+
+	case GE_BLENDMODE_MUL_AND_SUBTRACT:
+		// TODO
+		break;
+
+	case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
+		// TODO
+		break;
+
+	case GE_BLENDMODE_MIN:
+		PMINUB(argColorReg, R(dstReg));
+		break;
+
+	case GE_BLENDMODE_MAX:
+		PMAXUB(argColorReg, R(dstReg));
+		break;
+
+	case GE_BLENDMODE_ABSDIFF:
+		// Calculate A=(dst-src < 0 ? 0 : dst-src) and B=(src-dst < 0 ? 0 : src-dst)...
+		MOVDQA(tempReg, R(dstReg));
+		PSUBUSB(tempReg, R(argColorReg));
+		PSUBUSB(argColorReg, R(dstReg));
+
+		// Now, one of those must be zero, and the other one is the result (could also be zero.)
+		POR(argColorReg, R(tempReg));
+		break;
+	}
+
+	regCache_.Release(tempReg, PixelRegCache::T_VEC);
+	regCache_.Release(dstReg, PixelRegCache::T_VEC);
+
+	return true;
 }

 bool PixelJitCache::Jit_Dither(const PixelFuncID &id) {
@ -1654,6 +1743,113 @@ bool PixelJitCache::Jit_ConvertTo4444(const PixelFuncID &id, PixelRegCache::Reg
 	return true;
 }

+bool PixelJitCache::Jit_ConvertFrom565(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg) {
+	// Filter out red only into temp1.
+	MOV(32, R(temp1Reg), R(colorReg));
+	AND(16, R(temp1Reg), Imm16(0x1F << 0));
+	// Move it left to the top of the 8 bits.
+	SHL(32, R(temp1Reg), Imm8(3));
+
+	// Now we bring in blue, since it's also 5 like red.
+	MOV(32, R(temp2Reg), R(colorReg));
+	AND(16, R(temp2Reg), Imm16(0x1F << 11));
+	// Shift blue into place, 8 left (at 19), and merge back to temp1.
+	SHL(32, R(temp2Reg), Imm8(8));
+	OR(32, R(temp1Reg), R(temp2Reg));
+
+	// Make a copy back in temp2, and shift left 1 so we can swizzle together with G.
+	OR(32, R(temp2Reg), R(temp1Reg));
+	SHL(32, R(temp2Reg), Imm8(1));
+
+	// We go to green last because it's the different one.  Put it in place.
+	AND(16, R(colorReg), Imm16(0x3F << 5));
+	SHL(32, R(colorReg), Imm8(5));
+	// Combine with temp2 (for swizzling), then merge in temp1 (R+B pre-swizzle.)
+	OR(32, R(temp2Reg), R(colorReg));
+	OR(32, R(colorReg), R(temp1Reg));
+
+	// Now shift and mask temp2 for swizzle.
+	SHR(32, R(temp2Reg), Imm8(6));
+	AND(32, R(temp2Reg), Imm32(0x00070307));
+	// And then OR that in too.  We're done.
+	OR(32, R(colorReg), R(temp2Reg));
+
+	return true;
+}
+
+bool PixelJitCache::Jit_ConvertFrom5551(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha) {
+	// Filter out red only into temp1.
+	MOV(32, R(temp1Reg), R(colorReg));
+	AND(16, R(temp1Reg), Imm16(0x1F << 0));
+	// Move it left to the top of the 8 bits.
+	SHL(32, R(temp1Reg), Imm8(3));
+
+	// Add in green and shift into place (top bits.)
+	MOV(32, R(temp2Reg), R(colorReg));
+	AND(16, R(temp2Reg), Imm16(0x1F << 5));
+	SHL(32, R(temp2Reg), Imm8(6));
+	OR(32, R(temp1Reg), R(temp2Reg));
+
+	if (keepAlpha) {
+		// Now take blue and alpha together.
+		AND(16, R(colorReg), Imm16(0x8000 | (0x1F << 10)));
+		// We move all the way left, then sign extend right to expand alpha.
+		SHL(32, R(colorReg), Imm8(16));
+		SAR(32, R(colorReg), Imm8(7));
+	} else {
+		AND(16, R(colorReg), Imm16(0x1F << 10));
+		SHL(32, R(colorReg), Imm8(9));
+	}
+
+	// Combine both together, we still need to swizzle.
+	OR(32, R(colorReg), R(temp1Reg));
+	OR(32, R(temp1Reg), R(colorReg));
+	// Now for swizzle, we'll mask carefully to avoid overflow.
+	SHR(32, R(temp1Reg), Imm8(5));
+	AND(32, R(temp1Reg), Imm32(0x00070707));
+
+	// Then finally merge in the swizzle bits.
+	OR(32, R(colorReg), R(temp1Reg));
+	return true;
+}
+
+bool PixelJitCache::Jit_ConvertFrom4444(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha) {
+	// Move red into position within temp1.
+	MOV(32, R(temp1Reg), R(colorReg));
+	AND(16, R(temp1Reg), Imm16(0xF << 0));
+	SHL(32, R(temp1Reg), Imm8(4));
+
+	// Green is just as simple.
+	MOV(32, R(temp2Reg), R(colorReg));
+	AND(16, R(temp2Reg), Imm16(0xF << 4));
+	SHL(32, R(temp2Reg), Imm8(8));
+	OR(32, R(temp1Reg), R(temp2Reg));
+
+	// Blue isn't last this time, but it's next.
+	MOV(32, R(temp2Reg), R(colorReg));
+	AND(16, R(temp2Reg), Imm16(0xF << 8));
+	SHL(32, R(temp2Reg), Imm8(12));
+	OR(32, R(temp1Reg), R(temp2Reg));
+
+	if (keepAlpha) {
+		// Last but not least, alpha.
+		AND(16, R(colorReg), Imm16(0xF << 12));
+		SHL(32, R(colorReg), Imm8(16));
+		OR(32, R(colorReg), R(temp1Reg));
+
+		// Copy to temp1 again for swizzling.
+		OR(32, R(temp1Reg), R(colorReg));
+	} else {
+		// Overwrite colorReg (we need temp1 as a copy anyway.)
+		MOV(32, R(colorReg), R(temp1Reg));
+	}
+
+	// Masking isn't necessary here since everything is 4 wide.
+	SHR(32, R(temp1Reg), Imm8(4));
+	OR(32, R(colorReg), R(temp1Reg));
+	return true;
+}
+
 };

 #endif