softgpu: Move fixed blend factor to draw pix state.

This is the last of the gstate.
This commit is contained in:
Unknown W. Brackets 2022-01-15 12:08:00 -08:00
parent f4f7ea2736
commit 092b03bd67
5 changed files with 25 additions and 30 deletions

View File

@ -72,7 +72,6 @@ private:
Arm64Gen::ARM64FloatEmitter fp;
#endif
RegCache::Reg GetGState();
RegCache::Reg GetPixelID();
void UnlockPixelID(RegCache::Reg &r);
RegCache::Reg GetConstBase();

View File

@ -138,15 +138,6 @@ SingleFunc PixelJitCache::CompileSingle(const PixelFuncID &id) {
return (SingleFunc)start;
}
RegCache::Reg PixelJitCache::GetGState() {
if (!regCache_.Has(RegCache::GEN_GSTATE)) {
X64Reg r = regCache_.Alloc(RegCache::GEN_GSTATE);
MOV(PTRBITS, R(r), ImmPtr(&gstate.nop));
return r;
}
return regCache_.Find(RegCache::GEN_GSTATE);
}
RegCache::Reg PixelJitCache::GetPixelID() {
if (regCache_.Has(RegCache::GEN_ARG_ID))
return regCache_.Find(RegCache::GEN_ARG_ID);
@ -1267,7 +1258,7 @@ bool PixelJitCache::Jit_AlphaBlend(const PixelFuncID &id) {
bool PixelJitCache::Jit_BlendFactor(const PixelFuncID &id, RegCache::Reg factorReg, RegCache::Reg dstReg, PixelBlendFactor factor) {
X64Reg constReg = INVALID_REG;
X64Reg gstateReg = INVALID_REG;
X64Reg idReg = INVALID_REG;
X64Reg tempReg = INVALID_REG;
X64Reg argColorReg = regCache_.Find(RegCache::VEC_ARG_COLOR);
@ -1368,12 +1359,12 @@ bool PixelJitCache::Jit_BlendFactor(const PixelFuncID &id, RegCache::Reg factorR
case PixelBlendFactor::FIX:
default:
gstateReg = GetGState();
idReg = GetPixelID();
if (cpu_info.bSSE4_1) {
PMOVZXBW(factorReg, MDisp(gstateReg, offsetof(GPUgstate, blendfixa)));
PMOVZXBW(factorReg, MDisp(idReg, offsetof(PixelFuncID, cached.alphaBlendSrc)));
} else {
X64Reg zeroReg = GetZeroVec();
MOVD_xmm(factorReg, MDisp(gstateReg, offsetof(GPUgstate, blendfixa)));
MOVD_xmm(factorReg, MDisp(idReg, offsetof(PixelFuncID, cached.alphaBlendSrc)));
PUNPCKLBW(factorReg, R(zeroReg));
regCache_.Unlock(zeroReg, RegCache::VEC_ZERO);
}
@ -1382,8 +1373,8 @@ bool PixelJitCache::Jit_BlendFactor(const PixelFuncID &id, RegCache::Reg factorR
break;
}
if (gstateReg != INVALID_REG)
regCache_.Unlock(gstateReg, RegCache::GEN_GSTATE);
if (idReg != INVALID_REG)
UnlockPixelID(idReg);
if (tempReg != INVALID_REG)
regCache_.Release(tempReg, RegCache::VEC_TEMP3);
regCache_.Unlock(argColorReg, RegCache::VEC_ARG_COLOR);
@ -1394,7 +1385,7 @@ bool PixelJitCache::Jit_BlendFactor(const PixelFuncID &id, RegCache::Reg factorR
bool PixelJitCache::Jit_DstBlendFactor(const PixelFuncID &id, RegCache::Reg srcFactorReg, RegCache::Reg dstFactorReg, RegCache::Reg dstReg) {
bool success = true;
X64Reg constReg = INVALID_REG;
X64Reg gstateReg = INVALID_REG;
X64Reg idReg = INVALID_REG;
X64Reg argColorReg = regCache_.Find(RegCache::VEC_ARG_COLOR);
// Everything below expects an expanded 16-bit color
@ -1447,12 +1438,12 @@ bool PixelJitCache::Jit_DstBlendFactor(const PixelFuncID &id, RegCache::Reg srcF
case PixelBlendFactor::FIX:
default:
gstateReg = GetGState();
idReg = GetPixelID();
if (cpu_info.bSSE4_1) {
PMOVZXBW(dstFactorReg, MDisp(gstateReg, offsetof(GPUgstate, blendfixb)));
PMOVZXBW(dstFactorReg, MDisp(idReg, offsetof(PixelFuncID, cached.alphaBlendDst)));
} else {
X64Reg zeroReg = GetZeroVec();
MOVD_xmm(dstFactorReg, MDisp(gstateReg, offsetof(GPUgstate, blendfixb)));
MOVD_xmm(dstFactorReg, MDisp(idReg, offsetof(PixelFuncID, cached.alphaBlendDst)));
PUNPCKLBW(dstFactorReg, R(zeroReg));
regCache_.Unlock(zeroReg, RegCache::VEC_ZERO);
}
@ -1463,8 +1454,8 @@ bool PixelJitCache::Jit_DstBlendFactor(const PixelFuncID &id, RegCache::Reg srcF
if (constReg != INVALID_REG)
regCache_.Unlock(constReg, RegCache::GEN_CONST_BASE);
if (gstateReg != INVALID_REG)
regCache_.Unlock(gstateReg, RegCache::GEN_GSTATE);
if (idReg != INVALID_REG)
UnlockPixelID(idReg);
regCache_.Unlock(argColorReg, RegCache::VEC_ARG_COLOR);
return success;

View File

@ -216,6 +216,10 @@ void ComputePixelFuncID(PixelFuncID *id) {
id->cached.colorTestMask = gstate.getColorTestMask();
id->cached.colorTestRef = gstate.getColorTestRef() & id->cached.colorTestMask;
}
if (id->alphaBlendSrc == GE_SRCBLEND_FIXA)
id->cached.alphaBlendSrc = gstate.getFixA();
if (id->alphaBlendDst == GE_DSTBLEND_FIXB)
id->cached.alphaBlendDst = gstate.getFixB();
}
std::string DescribePixelFuncID(const PixelFuncID &id) {

View File

@ -63,6 +63,8 @@ struct PixelFuncID {
GEComparison colorTestFunc;
uint32_t colorTestMask;
uint32_t colorTestRef;
uint32_t alphaBlendSrc;
uint32_t alphaBlendDst;
} cached;
union {

View File

@ -331,7 +331,7 @@ Vec4IntResult SOFTRAST_CALL GetTextureFunctionOutput(Vec4IntArg prim_color_in, V
return ToVec4IntResult(Vec4<int>(out_rgb, out_a));
}
static inline Vec3<int> GetSourceFactor(GEBlendSrcFactor factor, const Vec4<int> &source, const Vec4<int> &dst) {
static inline Vec3<int> GetSourceFactor(GEBlendSrcFactor factor, const Vec4<int> &source, const Vec4<int> &dst, uint32_t fix) {
switch (factor) {
case GE_SRCBLEND_DSTCOLOR:
return dst.rgb();
@ -374,11 +374,11 @@ static inline Vec3<int> GetSourceFactor(GEBlendSrcFactor factor, const Vec4<int>
case GE_SRCBLEND_FIXA:
default:
// All other dest factors (> 10) are treated as FIXA.
return Vec3<int>::FromRGB(gstate.getFixA());
return Vec3<int>::FromRGB(fix);
}
}
static inline Vec3<int> GetDestFactor(GEBlendDstFactor factor, const Vec4<int> &source, const Vec4<int> &dst) {
static inline Vec3<int> GetDestFactor(GEBlendDstFactor factor, const Vec4<int> &source, const Vec4<int> &dst, uint32_t fix) {
switch (factor) {
case GE_DSTBLEND_SRCCOLOR:
return source.rgb();
@ -421,16 +421,15 @@ static inline Vec3<int> GetDestFactor(GEBlendDstFactor factor, const Vec4<int> &
case GE_DSTBLEND_FIXB:
default:
// All other dest factors (> 10) are treated as FIXB.
return Vec3<int>::FromRGB(gstate.getFixB());
return Vec3<int>::FromRGB(fix);
}
}
// Removed inline here - it was never chosen to be inlined by the compiler anyway, too complex.
Vec3<int> AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4<int> &source, const Vec4<int> &dst)
{
Vec3<int> AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4<int> &source, const Vec4<int> &dst) {
// Note: These factors cannot go below 0, but they can go above 255 when doubling.
Vec3<int> srcfactor = GetSourceFactor(GEBlendSrcFactor(pixelID.AlphaBlendSrc()), source, dst);
Vec3<int> dstfactor = GetDestFactor(GEBlendDstFactor(pixelID.AlphaBlendDst()), source, dst);
Vec3<int> srcfactor = GetSourceFactor(GEBlendSrcFactor(pixelID.AlphaBlendSrc()), source, dst, pixelID.cached.alphaBlendSrc);
Vec3<int> dstfactor = GetDestFactor(GEBlendDstFactor(pixelID.AlphaBlendDst()), source, dst, pixelID.cached.alphaBlendDst);
switch (pixelID.AlphaBlendEq()) {
case GE_BLENDMODE_MUL_AND_ADD: