mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-02-07 22:37:15 +00:00
Merge pull request #15171 from unknownbrackets/softgpu-cleanup
Correct some alpha/stencil/blend issues in softgpu
This commit is contained in:
commit
daae09b4ab
@ -83,7 +83,8 @@ static inline void SetPixelDepth(int x, int y, u16 value) {
|
||||
static inline u32 GetPixelColor(GEBufferFormat fmt, int x, int y) {
|
||||
switch (fmt) {
|
||||
case GE_FORMAT_565:
|
||||
return RGB565ToRGBA8888(fb.Get16(x, y, gstate.FrameBufStride()));
|
||||
// A should be zero for the purposes of alpha blending.
|
||||
return RGB565ToRGBA8888(fb.Get16(x, y, gstate.FrameBufStride())) & 0x00FFFFFF;
|
||||
|
||||
case GE_FORMAT_5551:
|
||||
return RGBA5551ToRGBA8888(fb.Get16(x, y, gstate.FrameBufStride()));
|
||||
@ -179,7 +180,7 @@ static inline bool StencilTestPassed(const PixelFuncID &pixelID, u8 stencil) {
|
||||
if (pixelID.hasStencilTestMask)
|
||||
stencil &= gstate.getStencilTestMask();
|
||||
u8 ref = pixelID.stencilTestRef;
|
||||
switch (GEComparison(pixelID.stencilTestFunc)) {
|
||||
switch (pixelID.StencilTestFunc()) {
|
||||
case GE_COMP_NEVER:
|
||||
return false;
|
||||
|
||||
@ -246,6 +247,8 @@ static inline u8 ApplyStencilOp(GEBufferFormat fmt, GEStencilOp op, u8 old_stenc
|
||||
if (old_stencil >= 0x10)
|
||||
return old_stencil - 0x10;
|
||||
break;
|
||||
case GE_FORMAT_5551:
|
||||
return 0;
|
||||
default:
|
||||
if (old_stencil != 0)
|
||||
return old_stencil - 1;
|
||||
@ -460,7 +463,7 @@ inline void DrawSinglePixel(int x, int y, int z, int fog, const Vec4<int> &color
|
||||
|
||||
SingleFunc GetSingleFunc(const PixelFuncID &id) {
|
||||
if (id.clearMode) {
|
||||
switch (id.FBFormat()) {
|
||||
switch (id.fbFormat) {
|
||||
case GE_FORMAT_565:
|
||||
return &DrawSinglePixel<true, GE_FORMAT_565>;
|
||||
case GE_FORMAT_5551:
|
||||
@ -471,7 +474,7 @@ SingleFunc GetSingleFunc(const PixelFuncID &id) {
|
||||
return &DrawSinglePixel<true, GE_FORMAT_8888>;
|
||||
}
|
||||
}
|
||||
switch (id.FBFormat()) {
|
||||
switch (id.fbFormat) {
|
||||
case GE_FORMAT_565:
|
||||
return &DrawSinglePixel<false, GE_FORMAT_565>;
|
||||
case GE_FORMAT_5551:
|
||||
|
@ -38,6 +38,8 @@ void ComputePixelFuncID(PixelFuncID *id) {
|
||||
id->colorTest = gstate.isClearModeColorMask();
|
||||
id->stencilTest = gstate.isClearModeAlphaMask();
|
||||
id->depthWrite = gstate.isClearModeDepthMask();
|
||||
id->depthTestFunc = GE_COMP_ALWAYS;
|
||||
id->alphaTestFunc = GE_COMP_ALWAYS;
|
||||
} else {
|
||||
id->colorTest = gstate.isColorTestEnabled() && gstate.getColorTestFunction() != GE_COMP_ALWAYS;
|
||||
if (gstate.isStencilTestEnabled() && gstate.getStencilTestFunction() == GE_COMP_ALWAYS) {
|
||||
@ -55,10 +57,15 @@ void ComputePixelFuncID(PixelFuncID *id) {
|
||||
if (id->stencilTest) {
|
||||
id->stencilTestFunc = gstate.getStencilTestFunction();
|
||||
id->stencilTestRef = gstate.getStencilTestRef() & gstate.getStencilTestMask();
|
||||
id->hasStencilTestMask = gstate.getStencilTestMask() != 0xFF;
|
||||
id->sFail = gstate.getStencilOpSFail();
|
||||
id->zFail = gstate.isDepthTestEnabled() ? gstate.getStencilOpZFail() : GE_STENCILOP_KEEP;
|
||||
id->zPass = gstate.getStencilOpZPass();
|
||||
id->hasStencilTestMask = gstate.getStencilTestMask() != 0xFF && gstate.FrameBufFormat() != GE_FORMAT_565;
|
||||
|
||||
// Stencil can't be written on 565, and any invalid op acts like KEEP, which is 0.
|
||||
if (gstate.FrameBufFormat() != GE_FORMAT_565 && gstate.getStencilOpSFail() <= GE_STENCILOP_DECR)
|
||||
id->sFail = gstate.getStencilOpSFail();
|
||||
if (gstate.FrameBufFormat() != GE_FORMAT_565 && gstate.getStencilOpZFail() <= GE_STENCILOP_DECR)
|
||||
id->zFail = gstate.isDepthTestEnabled() ? gstate.getStencilOpZFail() : GE_STENCILOP_KEEP;
|
||||
if (gstate.FrameBufFormat() != GE_FORMAT_565 && gstate.getStencilOpZPass() <= GE_STENCILOP_DECR)
|
||||
id->zPass = gstate.getStencilOpZPass();
|
||||
}
|
||||
|
||||
id->depthTestFunc = gstate.isDepthTestEnabled() ? gstate.getDepthTestFunction() : GE_COMP_ALWAYS;
|
||||
@ -68,7 +75,8 @@ void ComputePixelFuncID(PixelFuncID *id) {
|
||||
id->hasAlphaTestMask = gstate.getAlphaTestMask() != 0xFF;
|
||||
}
|
||||
|
||||
id->alphaBlend = gstate.isAlphaBlendEnabled();
|
||||
// If invalid (6 or 7), doesn't do any blending, so force off.
|
||||
id->alphaBlend = gstate.isAlphaBlendEnabled() && gstate.getBlendEq() <= 5;
|
||||
// Force it off if the factors are constant and don't blend. Some games use this...
|
||||
if (id->alphaBlend && gstate.getBlendEq() == GE_BLENDMODE_MUL_AND_ADD) {
|
||||
bool srcFixedOne = gstate.getBlendFuncA() == GE_SRCBLEND_FIXA && gstate.getFixA() == 0x00FFFFFF;
|
||||
@ -108,7 +116,7 @@ std::string DescribePixelFuncID(const PixelFuncID &id) {
|
||||
if (id.applyColorWriteMask)
|
||||
desc += "Msk:";
|
||||
|
||||
switch (id.FBFormat()) {
|
||||
switch (id.fbFormat) {
|
||||
case GE_FORMAT_565: desc += "5650:"; break;
|
||||
case GE_FORMAT_5551: desc += "5551:"; break;
|
||||
case GE_FORMAT_4444: desc += "4444:"; break;
|
||||
|
@ -474,33 +474,67 @@ Vec3<int> AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4<int> &sourc
|
||||
case GE_BLENDMODE_MUL_AND_ADD:
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
const __m128 s = _mm_mul_ps(_mm_cvtepi32_ps(source.ivec), _mm_cvtepi32_ps(srcfactor.ivec));
|
||||
const __m128 d = _mm_mul_ps(_mm_cvtepi32_ps(dst.ivec), _mm_cvtepi32_ps(dstfactor.ivec));
|
||||
return Vec3<int>(_mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(s, d), _mm_set_ps1(1.0f / 255.0f))));
|
||||
// We switch to 16 bit to use mulhi, and we use 4 bits of decimal to make the 16 bit shift free.
|
||||
const __m128i half = _mm_set1_epi16(1 << 3);
|
||||
|
||||
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
|
||||
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
|
||||
const __m128i s = _mm_mulhi_epi16(srgb, sf);
|
||||
|
||||
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
|
||||
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
|
||||
const __m128i d = _mm_mulhi_epi16(drgb, df);
|
||||
|
||||
return Vec3<int>(_mm_unpacklo_epi16(_mm_adds_epi16(s, d), _mm_setzero_si128()));
|
||||
#else
|
||||
return (source.rgb() * srcfactor + dst.rgb() * dstfactor) / 255;
|
||||
Vec3<int> half = Vec3<int>::AssignToAll(1);
|
||||
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
|
||||
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
|
||||
return lhs + rhs;
|
||||
#endif
|
||||
}
|
||||
|
||||
case GE_BLENDMODE_MUL_AND_SUBTRACT:
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
const __m128 s = _mm_mul_ps(_mm_cvtepi32_ps(source.ivec), _mm_cvtepi32_ps(srcfactor.ivec));
|
||||
const __m128 d = _mm_mul_ps(_mm_cvtepi32_ps(dst.ivec), _mm_cvtepi32_ps(dstfactor.ivec));
|
||||
return Vec3<int>(_mm_cvtps_epi32(_mm_mul_ps(_mm_sub_ps(s, d), _mm_set_ps1(1.0f / 255.0f))));
|
||||
const __m128i half = _mm_set1_epi16(1 << 3);
|
||||
|
||||
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
|
||||
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
|
||||
const __m128i s = _mm_mulhi_epi16(srgb, sf);
|
||||
|
||||
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
|
||||
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
|
||||
const __m128i d = _mm_mulhi_epi16(drgb, df);
|
||||
|
||||
return Vec3<int>(_mm_unpacklo_epi16(_mm_max_epi16(_mm_subs_epi16(s, d), _mm_setzero_si128()), _mm_setzero_si128()));
|
||||
#else
|
||||
return (source.rgb() * srcfactor - dst.rgb() * dstfactor) / 255;
|
||||
Vec3<int> half = Vec3<int>::AssignToAll(1);
|
||||
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
|
||||
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
|
||||
return lhs - rhs;
|
||||
#endif
|
||||
}
|
||||
|
||||
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
const __m128 s = _mm_mul_ps(_mm_cvtepi32_ps(source.ivec), _mm_cvtepi32_ps(srcfactor.ivec));
|
||||
const __m128 d = _mm_mul_ps(_mm_cvtepi32_ps(dst.ivec), _mm_cvtepi32_ps(dstfactor.ivec));
|
||||
return Vec3<int>(_mm_cvtps_epi32(_mm_mul_ps(_mm_sub_ps(d, s), _mm_set_ps1(1.0f / 255.0f))));
|
||||
const __m128i half = _mm_set1_epi16(1 << 3);
|
||||
|
||||
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
|
||||
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
|
||||
const __m128i s = _mm_mulhi_epi16(srgb, sf);
|
||||
|
||||
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
|
||||
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
|
||||
const __m128i d = _mm_mulhi_epi16(drgb, df);
|
||||
|
||||
return Vec3<int>(_mm_unpacklo_epi16(_mm_max_epi16(_mm_subs_epi16(d, s), _mm_setzero_si128()), _mm_setzero_si128()));
|
||||
#else
|
||||
return (dst.rgb() * dstfactor - source.rgb() * srcfactor) / 255;
|
||||
Vec3<int> half = Vec3<int>::AssignToAll(1);
|
||||
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
|
||||
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
|
||||
return rhs - lhs;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1015,5 +1015,5 @@ bool SoftGPU::DescribeCodePtr(const u8 *ptr, std::string &name) {
|
||||
name = "RasterizerJit:" + subname;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
return GPUCommon::DescribeCodePtr(ptr, name);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user