Merge pull request #15340 from unknownbrackets/softgpu-textures

Correct UV rotation and through mipmaps, optimize texenv blend a bit
This commit is contained in:
Henrik Rydgård 2022-01-24 08:19:34 +01:00 committed by GitHub
commit eba93f2ee0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 63 additions and 38 deletions

View File

@ -117,7 +117,7 @@ inline float clip_dotprod(const VertexData &vert, float A, float B, float C, flo
} \
}
static void RotateUVThrough(const VertexData &tl, const VertexData &br, VertexData &tr, VertexData &bl) {
static void RotateUV(const VertexData &tl, const VertexData &br, VertexData &tr, VertexData &bl) {
const int x1 = tl.screenpos.x;
const int x2 = br.screenpos.x;
const int y1 = tl.screenpos.y;
@ -194,6 +194,8 @@ void ProcessRect(const VertexData &v0, const VertexData &v1, BinManager &binner)
bottomright = &buf[i];
}
RotateUV(*topleft, *bottomright, *topright, *bottomleft);
// Four triangles to do backfaces as well. Two of them will get backface culled.
ProcessTriangleInternal(*topleft, *topright, *bottomright, buf[3], binner, true);
ProcessTriangleInternal(*bottomright, *topright, *topleft, buf[3], binner, true);
@ -241,7 +243,7 @@ void ProcessRect(const VertexData &v0, const VertexData &v1, BinManager &binner)
bottomright = &buf[i];
}
RotateUVThrough(v0, v1, *topright, *bottomleft);
RotateUV(v0, v1, *topright, *bottomleft);
if (gstate.isModeClear() && !gstate.isDitherEnabled()) {
binner.AddClearRect(v0, v1);

View File

@ -270,8 +270,7 @@ bool RectangleFastPath(const VertexData &v0, const VertexData &v1, BinManager &b
// Currently only works for TL/BR, which is the most common but not required.
bool orient_check = xdiff >= 0 && ydiff >= 0;
// We already have a fast path for clear in ClearRectangle.
bool state_check = !state.pixelID.clearMode && NoClampOrWrap(v0.texturecoords) && NoClampOrWrap(v1.texturecoords);
// TODO: No mipmap levels? Might be a font at level 1...
bool state_check = !state.pixelID.clearMode && !state.samplerID.hasAnyMips && NoClampOrWrap(v0.texturecoords) && NoClampOrWrap(v1.texturecoords);
if ((coord_check || !state.enableTextures) && orient_check && state_check) {
binner.AddSprite(v0, v1);
return true;

View File

@ -1539,27 +1539,7 @@ bool SamplerJitCache::Jit_ApplyTextureFunc(const SamplerID &id) {
Describe("EnvBlend");
PACKSSDW(primColorReg, R(primColorReg));
// Start out with the prim color side. Materialize a 255 to inverse resultReg and round.
PCMPEQD(tempReg, R(tempReg));
PSRLW(tempReg, 8);
// We're going to lose tempReg, so save the 255s.
X64Reg roundValueReg = regCache_.Alloc(RegCache::VEC_TEMP1);
MOVDQA(roundValueReg, R(tempReg));
PSUBW(tempReg, R(resultReg));
PMULLW(tempReg, R(primColorReg));
// Okay, now add the rounding value.
PADDW(tempReg, R(roundValueReg));
regCache_.Release(roundValueReg, RegCache::VEC_TEMP1);
if (id.useTextureAlpha) {
// Before we modify the texture color, let's calculate alpha.
PADDW(primColorReg, M(constOnes16_));
PMULLW(primColorReg, R(resultReg));
// We divide later.
}
// First off, let's grab the color value.
X64Reg idReg = GetSamplerID();
X64Reg texEnvReg = regCache_.Alloc(RegCache::VEC_TEMP1);
if (cpu_info.bSSE4_1) {
@ -1570,22 +1550,66 @@ bool SamplerJitCache::Jit_ApplyTextureFunc(const SamplerID &id) {
PUNPCKLBW(texEnvReg, R(zeroReg));
regCache_.Unlock(zeroReg, RegCache::VEC_ZERO);
}
PMULLW(resultReg, R(texEnvReg));
regCache_.Release(texEnvReg, RegCache::VEC_TEMP1);
UnlockSamplerID(idReg);
// Add in the prim color side and divide.
PADDW(resultReg, R(tempReg));
if (id.useColorDoubling)
PSRLW(resultReg, 7);
else
PSRLW(resultReg, 8);
// Now merge in the prim color so we have them interleaved, texenv low.
PUNPCKLWD(texEnvReg, R(primColorReg));
// Okay, now materialize 255 for inversing resultReg and rounding.
PCMPEQD(tempReg, R(tempReg));
PSRLW(tempReg, 8);
// If alpha is used, we want the roundup and factor to be zero.
if (id.useTextureAlpha)
PSRLDQ(tempReg, 10);
// We're going to lose tempReg, so save the 255s.
X64Reg roundValueReg = regCache_.Alloc(RegCache::VEC_TEMP2);
MOVDQA(roundValueReg, R(tempReg));
// Okay, now inverse, then merge with resultReg low to match texenv low.
PSUBUSW(tempReg, R(resultReg));
PUNPCKLWD(resultReg, R(tempReg));
if (id.useTextureAlpha) {
// We put the alpha in here, just need to divide it after that multiply.
PSRLW(primColorReg, 8);
// Before we multiply, let's include alpha in that multiply.
PADDW(primColorReg, M(constOnes16_));
// Mask off everything but alpha, and move to the second highest short.
PSRLDQ(primColorReg, 6);
PSLLDQ(primColorReg, 12);
// Now simply merge in with texenv.
POR(texEnvReg, R(primColorReg));
}
useAlphaFrom(primColorReg);
// Alright, now to multiply and add all in one go. Note this gives us DWORDs.
PMADDWD(resultReg, R(texEnvReg));
regCache_.Release(texEnvReg, RegCache::VEC_TEMP1);
// Now convert back to 16 bit and add the 255s for rounding.
if (cpu_info.bSSE4_1) {
PACKUSDW(resultReg, R(resultReg));
} else {
PSLLD(resultReg, 16);
PSRAD(resultReg, 16);
PACKSSDW(resultReg, R(resultReg));
}
PADDW(resultReg, R(roundValueReg));
regCache_.Release(roundValueReg, RegCache::VEC_TEMP2);
// Okay, divide by 256 or 128 depending on doubling (we want to preserve the precision.)
if (id.useColorDoubling && id.useTextureAlpha) {
// If doubling, we want to still divide alpha by 256.
PSRLW(resultReg, 7);
PSRLW(primColorReg, resultReg, 1);
useAlphaFrom(primColorReg);
} else if (id.useColorDoubling) {
PSRLW(resultReg, 7);
} else {
PSRLW(resultReg, 8);
}
if (!id.useTextureAlpha)
useAlphaFrom(primColorReg);
break;
}

View File

@ -161,6 +161,8 @@ tests_good = [
"gpu/texfunc/decal",
"gpu/texfunc/modulate",
"gpu/texfunc/replace",
"gpu/textures/mipmap",
"gpu/textures/rotate",
"hash/hash",
"hle/check_not_used_uids",
"intr/intr",
@ -400,8 +402,6 @@ tests_next = [
"gpu/signals/jumps",
"gpu/signals/simple",
"gpu/simple/simple",
"gpu/textures/mipmap",
"gpu/textures/rotate",
"gpu/triangle/triangle",
"gpu/vertices/colors",
"gpu/vertices/texcoords",