mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 21:39:52 +00:00
Turning off the "close memory finder" lets us find more RIP addressing...
This commit is contained in:
parent
8872057a2d
commit
86396ba39b
@ -277,14 +277,15 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
|
||||
case 5: //F(fd) = fabsf(F(fs)); break; //abs
|
||||
fpr.SpillLock(fd, fs);
|
||||
fpr.MapReg(fd, fd == fs, true);
|
||||
MOV(PTRBITS, R(TEMPREG), ImmPtr(&ssNoSignMask[0]));
|
||||
if (fd != fs && fpr.IsMapped(fs)) {
|
||||
MOVAPS(fpr.RX(fd), M(ssNoSignMask));
|
||||
MOVAPS(fpr.RX(fd), MatR(TEMPREG));
|
||||
ANDPS(fpr.RX(fd), fpr.R(fs));
|
||||
} else {
|
||||
if (fd != fs) {
|
||||
MOVSS(fpr.RX(fd), fpr.R(fs));
|
||||
}
|
||||
ANDPS(fpr.RX(fd), M(ssNoSignMask));
|
||||
ANDPS(fpr.RX(fd), MatR(TEMPREG));
|
||||
}
|
||||
break;
|
||||
|
||||
@ -299,14 +300,15 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
|
||||
case 7: //F(fd) = -F(fs); break; //neg
|
||||
fpr.SpillLock(fd, fs);
|
||||
fpr.MapReg(fd, fd == fs, true);
|
||||
MOV(PTRBITS, R(TEMPREG), ImmPtr(&ssSignBits2[0]));
|
||||
if (fd != fs && fpr.IsMapped(fs)) {
|
||||
MOVAPS(fpr.RX(fd), M(ssSignBits2));
|
||||
MOVAPS(fpr.RX(fd), MatR(TEMPREG));
|
||||
XORPS(fpr.RX(fd), fpr.R(fs));
|
||||
} else {
|
||||
if (fd != fs) {
|
||||
MOVSS(fpr.RX(fd), fpr.R(fs));
|
||||
}
|
||||
XORPS(fpr.RX(fd), M(ssSignBits2));
|
||||
XORPS(fpr.RX(fd), MatR(TEMPREG));
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -575,7 +575,12 @@ void Jit::Comp_VIdt(MIPSOpcode op) {
|
||||
GetVectorRegsPrefixD(dregs, sz, _VD);
|
||||
if (fpr.TryMapRegsVS(dregs, sz, MAP_NOINIT | MAP_DIRTY)) {
|
||||
int row = vd & (n - 1);
|
||||
MOVAPS(fpr.VSX(dregs), M(identityMatrix[row]));
|
||||
if (RipAccessible(identityMatrix)) {
|
||||
MOVAPS(fpr.VSX(dregs), M(identityMatrix[row])); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(TEMPREG), ImmPtr(&identityMatrix[row]));
|
||||
MOVAPS(fpr.VSX(dregs), MatR(TEMPREG));
|
||||
}
|
||||
ApplyPrefixD(dregs, sz);
|
||||
fpr.ReleaseSpillLocks();
|
||||
return;
|
||||
@ -1604,6 +1609,11 @@ void Jit::Comp_Vh2f(MIPSOpcode op) {
|
||||
SSE_CONST4(was_infnan, 0x7bff);
|
||||
SSE_CONST4(exp_infnan, 255 << 23);
|
||||
|
||||
// TODO: Fix properly
|
||||
if (!RipAccessible(mask_nosign)) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
#undef SSE_CONST4
|
||||
VectorSize sz = GetVecSize(op);
|
||||
VectorSize outsize;
|
||||
@ -1639,14 +1649,14 @@ void Jit::Comp_Vh2f(MIPSOpcode op) {
|
||||
// OK, 16 bits in each word.
|
||||
// Let's go. Deep magic here.
|
||||
MOVAPS(XMM1, R(XMM0));
|
||||
ANDPS(XMM0, M(mask_nosign)); // xmm0 = expmant
|
||||
ANDPS(XMM0, M(&mask_nosign[0])); // xmm0 = expmant
|
||||
XORPS(XMM1, R(XMM0)); // xmm1 = justsign = expmant ^ xmm0
|
||||
MOVAPS(tempR, R(XMM0));
|
||||
PCMPGTD(tempR, M(was_infnan)); // xmm2 = b_wasinfnan
|
||||
PCMPGTD(tempR, M(&was_infnan[0])); // xmm2 = b_wasinfnan
|
||||
PSLLD(XMM0, 13);
|
||||
MULPS(XMM0, M(magic)); /// xmm0 = scaled
|
||||
PSLLD(XMM1, 16); // xmm1 = sign
|
||||
ANDPS(tempR, M(exp_infnan));
|
||||
ANDPS(tempR, M(&exp_infnan[0]));
|
||||
ORPS(XMM1, R(tempR));
|
||||
ORPS(XMM0, R(XMM1));
|
||||
|
||||
@ -1732,7 +1742,7 @@ void Jit::Comp_Vx2i(MIPSOpcode op) {
|
||||
MOVSS(XMM0, fpr.V(sregs[0]));
|
||||
if (cpu_info.bSSSE3) {
|
||||
// Not really different speed. Generates a bit less code.
|
||||
PSHUFB(XMM0, M(vuc2i_shuffle));
|
||||
PSHUFB(XMM0, M(&vuc2i_shuffle[0]));
|
||||
} else {
|
||||
// First, we change 0xDDCCBBAA to 0xDDDDCCCCBBBBAAAA.
|
||||
PUNPCKLBW(XMM0, R(XMM0));
|
||||
@ -1742,7 +1752,7 @@ void Jit::Comp_Vx2i(MIPSOpcode op) {
|
||||
} else {
|
||||
if (cpu_info.bSSSE3) {
|
||||
MOVSS(XMM0, fpr.V(sregs[0]));
|
||||
PSHUFB(XMM0, M(vc2i_shuffle));
|
||||
PSHUFB(XMM0, M(&vc2i_shuffle[0]));
|
||||
} else {
|
||||
PXOR(XMM1, R(XMM1));
|
||||
MOVSS(XMM0, fpr.V(sregs[0]));
|
||||
@ -1861,8 +1871,14 @@ void Jit::Comp_Vf2i(MIPSOpcode op) {
|
||||
}
|
||||
}
|
||||
|
||||
if (*mult != 1.0f)
|
||||
MOVSD(XMM1, M(mult));
|
||||
if (*mult != 1.0f) {
|
||||
if (RipAccessible(mult)) {
|
||||
MOVSD(XMM1, M(mult)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(TEMPREG), ImmPtr(mult));
|
||||
MOVSD(XMM1, MatR(TEMPREG));
|
||||
}
|
||||
}
|
||||
|
||||
fpr.MapRegsV(tempregs, sz, MAP_DIRTY | MAP_NOINIT);
|
||||
for (int i = 0; i < n; i++) {
|
||||
@ -3453,7 +3469,12 @@ void Jit::CompVrotShuffle(u8 *dregs, int imm, int n, bool negSin) {
|
||||
case 'S':
|
||||
MOVSS(fpr.V(dregs[i]), XMM0);
|
||||
if (negSin) {
|
||||
XORPS(fpr.VX(dregs[i]), M(&signBitLower));
|
||||
if (RipAccessible(&signBitLower)) {
|
||||
XORPS(fpr.VX(dregs[i]), M(&signBitLower)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(TEMPREG), ImmPtr(&signBitLower));
|
||||
XORPS(fpr.VX(dregs[i]), MatR(TEMPREG));
|
||||
}
|
||||
}
|
||||
break;
|
||||
case '0':
|
||||
|
@ -882,18 +882,18 @@ void VertexDecoderJitCache::Jit_TcU16ThroughToFloat() {
|
||||
MOV(32, R(tempReg2), R(tempReg1));
|
||||
SHR(32, R(tempReg2), Imm8(16));
|
||||
|
||||
auto updateSide = [&](X64Reg r, CCFlags skipCC, u16 *value) {
|
||||
CMP(16, R(r), M(value));
|
||||
MOV(PTRBITS, R(tempReg3), ImmPtr(&gstate_c.vertBounds));
|
||||
auto updateSide = [&](X64Reg r, CCFlags skipCC, int offset) {
|
||||
CMP(16, R(r), MDisp(tempReg3, offset));
|
||||
FixupBranch skip = J_CC(skipCC);
|
||||
MOV(16, M(value), R(r));
|
||||
MOV(16, MDisp(tempReg3, offset), R(r));
|
||||
SetJumpTarget(skip);
|
||||
};
|
||||
|
||||
// TODO: Can this actually be fast? Hmm, floats aren't better.
|
||||
updateSide(tempReg1, CC_GE, &gstate_c.vertBounds.minU);
|
||||
updateSide(tempReg1, CC_LE, &gstate_c.vertBounds.maxU);
|
||||
updateSide(tempReg2, CC_GE, &gstate_c.vertBounds.minV);
|
||||
updateSide(tempReg2, CC_LE, &gstate_c.vertBounds.maxV);
|
||||
updateSide(tempReg1, CC_GE, offsetof(KnownVertexBounds, minU));
|
||||
updateSide(tempReg1, CC_LE, offsetof(KnownVertexBounds, maxU));
|
||||
updateSide(tempReg2, CC_GE, offsetof(KnownVertexBounds, minV));
|
||||
updateSide(tempReg2, CC_LE, offsetof(KnownVertexBounds, maxV));
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcFloatThrough() {
|
||||
@ -923,7 +923,6 @@ void VertexDecoderJitCache::Jit_Color8888() {
|
||||
SetJumpTarget(skip);
|
||||
}
|
||||
|
||||
static const u32 MEMORY_ALIGNED16(nibbles[4]) = { 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, };
|
||||
static const u32 MEMORY_ALIGNED16(color4444mask[4]) = { 0xf00ff00f, 0xf00ff00f, 0xf00ff00f, 0xf00ff00f, };
|
||||
|
||||
void VertexDecoderJitCache::Jit_Color4444() {
|
||||
@ -931,7 +930,12 @@ void VertexDecoderJitCache::Jit_Color4444() {
|
||||
MOVD_xmm(fpScratchReg, MDisp(srcReg, dec_->coloff));
|
||||
// Spread to RGBA -> R00GB00A.
|
||||
PUNPCKLBW(fpScratchReg, R(fpScratchReg));
|
||||
PAND(fpScratchReg, M(color4444mask));
|
||||
if (RipAccessible(&color4444mask[0])) {
|
||||
PAND(fpScratchReg, M(&color4444mask[0]));
|
||||
} else {
|
||||
MOV(PTRBITS, R(tempReg1), ImmPtr(&color4444mask));
|
||||
PAND(fpScratchReg, MatR(tempReg1));
|
||||
}
|
||||
MOVSS(fpScratchReg2, R(fpScratchReg));
|
||||
MOVSS(fpScratchReg3, R(fpScratchReg));
|
||||
// Create 0R000B00 and 00G000A0.
|
||||
|
Loading…
Reference in New Issue
Block a user