mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 21:39:52 +00:00
x86jit: Speed up float to int conversions.
This commit is contained in:
parent
7dc18a94af
commit
1c81d47dd4
@ -47,6 +47,7 @@ void X64JitBackend::EmitFPUConstants() {
|
|||||||
EmitConst4x32(&constants.qNAN, 0x7FC00000);
|
EmitConst4x32(&constants.qNAN, 0x7FC00000);
|
||||||
EmitConst4x32(&constants.positiveOnes, 0x3F800000);
|
EmitConst4x32(&constants.positiveOnes, 0x3F800000);
|
||||||
EmitConst4x32(&constants.negativeOnes, 0xBF800000);
|
EmitConst4x32(&constants.negativeOnes, 0xBF800000);
|
||||||
|
EmitConst4x32(&constants.maxIntBelowAsFloat, 0x4EFFFFFF);
|
||||||
|
|
||||||
constants.mulTableVi2f = (const float *)GetCodePointer();
|
constants.mulTableVi2f = (const float *)GetCodePointer();
|
||||||
for (uint8_t i = 0; i < 32; ++i) {
|
for (uint8_t i = 0; i < 32; ++i) {
|
||||||
@ -57,20 +58,14 @@ void X64JitBackend::EmitFPUConstants() {
|
|||||||
Write32(val);
|
Write32(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
constants.mulTableVf2i = (const double *)GetCodePointer();
|
constants.mulTableVf2i = (const float *)GetCodePointer();
|
||||||
for (uint8_t i = 0; i < 32; ++i) {
|
for (uint8_t i = 0; i < 32; ++i) {
|
||||||
double fval = (1UL << i);
|
float fval = (float)(1ULL << i);
|
||||||
uint64_t val;
|
uint32_t val;
|
||||||
memcpy(&val, &fval, sizeof(val));
|
memcpy(&val, &fval, sizeof(val));
|
||||||
|
|
||||||
Write64(val);
|
Write32(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: this first one is (double)(int)0x80000000, sign extended.
|
|
||||||
constants.minIntAsDouble = (const double *)GetCodePointer();
|
|
||||||
Write64(0xC1E0000000000000ULL);
|
|
||||||
constants.maxIntAsDouble = (const double *)GetCodePointer();
|
|
||||||
Write64(0x41DFFFFFFFC00000ULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void X64JitBackend::CopyVec4ToFPRLane0(Gen::X64Reg dest, Gen::X64Reg src, int lane) {
|
void X64JitBackend::CopyVec4ToFPRLane0(Gen::X64Reg dest, Gen::X64Reg src, int lane) {
|
||||||
@ -579,11 +574,14 @@ void X64JitBackend::CompIR_FCvt(IRInst inst) {
|
|||||||
case IROp::FCvtWS:
|
case IROp::FCvtWS:
|
||||||
{
|
{
|
||||||
regs_.Map(inst);
|
regs_.Map(inst);
|
||||||
UCOMISS(regs_.FX(inst.src1), M(constants.positiveInfinity)); // rip accessible
|
UCOMISS(regs_.FX(inst.src1), M(constants.maxIntBelowAsFloat)); // rip accessible
|
||||||
|
|
||||||
CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.src1));
|
CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||||
// UCOMISS set ZF if EQUAL (to infinity) or UNORDERED.
|
// UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
|
||||||
FixupBranch skip = J_CC(CC_NZ);
|
// We want noSignMask otherwise, GREATER or UNORDERED.
|
||||||
|
FixupBranch isNAN = J_CC(CC_P);
|
||||||
|
FixupBranch skip = J_CC(CC_BE);
|
||||||
|
SetJumpTarget(isNAN);
|
||||||
MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
|
MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
|
||||||
|
|
||||||
SetJumpTarget(skip);
|
SetJumpTarget(skip);
|
||||||
@ -599,54 +597,65 @@ void X64JitBackend::CompIR_FCvt(IRInst inst) {
|
|||||||
regs_.Map(inst);
|
regs_.Map(inst);
|
||||||
if (cpu_info.bSSE4_1) {
|
if (cpu_info.bSSE4_1) {
|
||||||
int scale = inst.src2 & 0x1F;
|
int scale = inst.src2 & 0x1F;
|
||||||
int rmode = inst.src2 >> 6;
|
IRRoundMode rmode = (IRRoundMode)(inst.src2 >> 6);
|
||||||
|
|
||||||
CVTSS2SD(regs_.FX(inst.dest), regs_.F(inst.src1));
|
if (scale != 0 && cpu_info.bAVX) {
|
||||||
|
VMULSS(regs_.FX(inst.dest), regs_.FX(inst.src1), M(&constants.mulTableVf2i[scale])); // rip accessible
|
||||||
|
} else {
|
||||||
|
if (inst.dest != inst.src1)
|
||||||
|
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||||
if (scale != 0)
|
if (scale != 0)
|
||||||
MULSD(regs_.FX(inst.dest), M(&constants.mulTableVf2i[scale])); // rip accessible
|
MULSS(regs_.FX(inst.dest), M(&constants.mulTableVf2i[scale])); // rip accessible
|
||||||
|
}
|
||||||
|
|
||||||
// On NAN, we want maxInt anyway, so let's let it be the second param.
|
UCOMISS(regs_.FX(inst.dest), M(constants.maxIntBelowAsFloat)); // rip accessible
|
||||||
MAXSD(regs_.FX(inst.dest), M(constants.minIntAsDouble)); // rip accessible
|
|
||||||
MINSD(regs_.FX(inst.dest), M(constants.maxIntAsDouble)); // rip accessible
|
|
||||||
|
|
||||||
switch (rmode) {
|
switch (rmode) {
|
||||||
case 0:
|
case IRRoundMode::RINT_0:
|
||||||
ROUNDNEARPD(regs_.FX(inst.dest), regs_.F(inst.dest));
|
ROUNDNEARPS(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||||
CVTPD2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 1:
|
case IRRoundMode::CAST_1:
|
||||||
CVTTPD2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
CVTTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 2:
|
case IRRoundMode::CEIL_2:
|
||||||
ROUNDCEILPD(regs_.FX(inst.dest), regs_.F(inst.dest));
|
ROUNDCEILPS(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||||
CVTPD2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 3:
|
case IRRoundMode::FLOOR_3:
|
||||||
ROUNDFLOORPD(regs_.FX(inst.dest), regs_.F(inst.dest));
|
ROUNDFLOORPS(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||||
CVTPD2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
|
||||||
|
// We want noSignMask otherwise, GREATER or UNORDERED.
|
||||||
|
FixupBranch isNAN = J_CC(CC_P);
|
||||||
|
FixupBranch skip = J_CC(CC_BE);
|
||||||
|
SetJumpTarget(isNAN);
|
||||||
|
MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
|
||||||
|
SetJumpTarget(skip);
|
||||||
} else {
|
} else {
|
||||||
int scale = inst.src2 & 0x1F;
|
int scale = inst.src2 & 0x1F;
|
||||||
int rmode = inst.src2 >> 6;
|
IRRoundMode rmode = (IRRoundMode)(inst.src2 >> 6);
|
||||||
|
|
||||||
int setMXCSR = -1;
|
int setMXCSR = -1;
|
||||||
bool useTrunc = false;
|
bool useTrunc = false;
|
||||||
switch (rmode) {
|
switch (rmode) {
|
||||||
case 0:
|
case IRRoundMode::RINT_0:
|
||||||
// TODO: Could skip if hasSetRounding, but we don't have the flag.
|
// TODO: Could skip if hasSetRounding, but we don't have the flag.
|
||||||
setMXCSR = 0;
|
setMXCSR = 0;
|
||||||
break;
|
break;
|
||||||
case 1:
|
case IRRoundMode::CAST_1:
|
||||||
useTrunc = true;
|
useTrunc = true;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case IRRoundMode::CEIL_2:
|
||||||
setMXCSR = 2;
|
setMXCSR = 2;
|
||||||
break;
|
break;
|
||||||
case 3:
|
case IRRoundMode::FLOOR_3:
|
||||||
setMXCSR = 1;
|
setMXCSR = 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -665,21 +674,26 @@ void X64JitBackend::CompIR_FCvt(IRInst inst) {
|
|||||||
LDMXCSR(MDisp(CTXREG, tempOffset));
|
LDMXCSR(MDisp(CTXREG, tempOffset));
|
||||||
}
|
}
|
||||||
|
|
||||||
CVTSS2SD(regs_.FX(inst.dest), regs_.F(inst.src1));
|
if (inst.dest != inst.src1)
|
||||||
|
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||||
if (scale != 0)
|
if (scale != 0)
|
||||||
MULSD(regs_.FX(inst.dest), M(&constants.mulTableVf2i[scale]));
|
MULSS(regs_.FX(inst.dest), M(&constants.mulTableVf2i[scale])); // rip accessible
|
||||||
|
|
||||||
// On NAN, we want maxInt anyway, so let's let it be the second param.
|
UCOMISS(regs_.FX(inst.dest), M(constants.maxIntBelowAsFloat)); // rip accessible
|
||||||
MAXSD(regs_.FX(inst.dest), M(constants.minIntAsDouble));
|
|
||||||
MINSD(regs_.FX(inst.dest), M(constants.maxIntAsDouble));
|
|
||||||
|
|
||||||
if (useTrunc) {
|
if (useTrunc) {
|
||||||
CVTTSD2SI(SCRATCH1, regs_.F(inst.dest));
|
CVTTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||||
} else {
|
} else {
|
||||||
CVTSD2SI(SCRATCH1, regs_.F(inst.dest));
|
CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||||
}
|
}
|
||||||
|
|
||||||
MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1));
|
// UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
|
||||||
|
// We want noSignMask otherwise, GREATER or UNORDERED.
|
||||||
|
FixupBranch isNAN = J_CC(CC_P);
|
||||||
|
FixupBranch skip = J_CC(CC_BE);
|
||||||
|
SetJumpTarget(isNAN);
|
||||||
|
MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
|
||||||
|
SetJumpTarget(skip);
|
||||||
|
|
||||||
// Return MXCSR to its previous value.
|
// Return MXCSR to its previous value.
|
||||||
if (setMXCSR != -1) {
|
if (setMXCSR != -1) {
|
||||||
@ -704,47 +718,106 @@ void X64JitBackend::CompIR_FRound(IRInst inst) {
|
|||||||
CONDITIONAL_DISABLE;
|
CONDITIONAL_DISABLE;
|
||||||
|
|
||||||
switch (inst.op) {
|
switch (inst.op) {
|
||||||
|
case IROp::FCeil:
|
||||||
|
case IROp::FFloor:
|
||||||
case IROp::FRound:
|
case IROp::FRound:
|
||||||
CompIR_Generic(inst);
|
if (cpu_info.bSSE4_1) {
|
||||||
|
regs_.Map(inst);
|
||||||
|
UCOMISS(regs_.FX(inst.src1), M(constants.maxIntBelowAsFloat)); // rip accessible
|
||||||
|
|
||||||
|
switch (inst.op) {
|
||||||
|
case IROp::FCeil:
|
||||||
|
ROUNDCEILPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case IROp::FFloor:
|
||||||
|
ROUNDFLOORPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case IROp::FRound:
|
||||||
|
ROUNDNEARPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
INVALIDOP;
|
||||||
|
}
|
||||||
|
CVTTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||||
|
// UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
|
||||||
|
// We want noSignMask otherwise, GREATER or UNORDERED.
|
||||||
|
FixupBranch isNAN = J_CC(CC_P);
|
||||||
|
FixupBranch skip = J_CC(CC_BE);
|
||||||
|
SetJumpTarget(isNAN);
|
||||||
|
MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
|
||||||
|
|
||||||
|
SetJumpTarget(skip);
|
||||||
|
} else {
|
||||||
|
regs_.Map(inst);
|
||||||
|
|
||||||
|
int setMXCSR = -1;
|
||||||
|
switch (inst.op) {
|
||||||
|
case IROp::FRound:
|
||||||
|
// TODO: Could skip if hasSetRounding, but we don't have the flag.
|
||||||
|
setMXCSR = 0;
|
||||||
|
break;
|
||||||
|
case IROp::FCeil:
|
||||||
|
setMXCSR = 2;
|
||||||
|
break;
|
||||||
|
case IROp::FFloor:
|
||||||
|
setMXCSR = 1;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
INVALIDOP;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Might be possible to cache this and update between instructions?
|
||||||
|
// Probably kinda expensive to switch each time...
|
||||||
|
if (setMXCSR != -1) {
|
||||||
|
STMXCSR(MDisp(CTXREG, mxcsrTempOffset));
|
||||||
|
MOV(32, R(SCRATCH1), MDisp(CTXREG, mxcsrTempOffset));
|
||||||
|
AND(32, R(SCRATCH1), Imm32(~(3 << 13)));
|
||||||
|
if (setMXCSR != 0) {
|
||||||
|
OR(32, R(SCRATCH1), Imm32(setMXCSR << 13));
|
||||||
|
}
|
||||||
|
MOV(32, MDisp(CTXREG, tempOffset), R(SCRATCH1));
|
||||||
|
LDMXCSR(MDisp(CTXREG, tempOffset));
|
||||||
|
}
|
||||||
|
|
||||||
|
UCOMISS(regs_.FX(inst.src1), M(constants.maxIntBelowAsFloat)); // rip accessible
|
||||||
|
|
||||||
|
CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||||
|
// UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
|
||||||
|
// We want noSignMask otherwise, GREATER or UNORDERED.
|
||||||
|
FixupBranch isNAN = J_CC(CC_P);
|
||||||
|
FixupBranch skip = J_CC(CC_BE);
|
||||||
|
SetJumpTarget(isNAN);
|
||||||
|
MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
|
||||||
|
|
||||||
|
SetJumpTarget(skip);
|
||||||
|
|
||||||
|
// Return MXCSR to its previous value.
|
||||||
|
if (setMXCSR != -1) {
|
||||||
|
LDMXCSR(MDisp(CTXREG, mxcsrTempOffset));
|
||||||
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case IROp::FTrunc:
|
case IROp::FTrunc:
|
||||||
{
|
{
|
||||||
regs_.SpillLockFPR(inst.dest, inst.src1);
|
|
||||||
X64Reg tempZero = regs_.GetAndLockTempFPR();
|
|
||||||
regs_.Map(inst);
|
regs_.Map(inst);
|
||||||
|
UCOMISS(regs_.FX(inst.src1), M(constants.maxIntBelowAsFloat)); // rip accessible
|
||||||
|
|
||||||
CVTTSS2SI(SCRATCH1, regs_.F(inst.src1));
|
CVTTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||||
|
// UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
|
||||||
|
// We want noSignMask otherwise, GREATER or UNORDERED.
|
||||||
|
FixupBranch isNAN = J_CC(CC_P);
|
||||||
|
FixupBranch skip = J_CC(CC_BE);
|
||||||
|
SetJumpTarget(isNAN);
|
||||||
|
MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
|
||||||
|
|
||||||
// Did we get an indefinite integer value?
|
SetJumpTarget(skip);
|
||||||
CMP(32, R(SCRATCH1), Imm32(0x80000000));
|
|
||||||
FixupBranch wasExact = J_CC(CC_NE);
|
|
||||||
|
|
||||||
XORPS(tempZero, R(tempZero));
|
|
||||||
if (inst.dest == inst.src1) {
|
|
||||||
CMPSS(regs_.FX(inst.dest), R(tempZero), CMP_LT);
|
|
||||||
} else if (cpu_info.bAVX) {
|
|
||||||
VCMPSS(regs_.FX(inst.dest), regs_.FX(inst.src1), R(tempZero), CMP_LT);
|
|
||||||
} else {
|
|
||||||
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
|
||||||
CMPSS(regs_.FX(inst.dest), R(tempZero), CMP_LT);
|
|
||||||
}
|
|
||||||
|
|
||||||
// At this point, -inf = 0xffffffff, inf/nan = 0x00000000.
|
|
||||||
// We want -inf to be 0x80000000 inf/nan to be 0x7fffffff, so we flip those bits.
|
|
||||||
MOVD_xmm(R(SCRATCH1), regs_.FX(inst.dest));
|
|
||||||
XOR(32, R(SCRATCH1), Imm32(0x7fffffff));
|
|
||||||
|
|
||||||
SetJumpTarget(wasExact);
|
|
||||||
MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1));
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case IROp::FCeil:
|
|
||||||
case IROp::FFloor:
|
|
||||||
CompIR_Generic(inst);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
INVALIDOP;
|
INVALIDOP;
|
||||||
break;
|
break;
|
||||||
|
@ -148,10 +148,9 @@ private:
|
|||||||
const void *positiveOnes;
|
const void *positiveOnes;
|
||||||
const void *negativeOnes;
|
const void *negativeOnes;
|
||||||
const void *qNAN;
|
const void *qNAN;
|
||||||
|
const void *maxIntBelowAsFloat;
|
||||||
const float *mulTableVi2f;
|
const float *mulTableVi2f;
|
||||||
const double *mulTableVf2i;
|
const float *mulTableVf2i;
|
||||||
const double *minIntAsDouble;
|
|
||||||
const double *maxIntAsDouble;
|
|
||||||
const Float4Constant *vec4InitValues;
|
const Float4Constant *vec4InitValues;
|
||||||
};
|
};
|
||||||
Constants constants;
|
Constants constants;
|
||||||
|
Loading…
Reference in New Issue
Block a user