armjit: Fix floor/ceil/cvt.w.s rounding.

Unfortunately, correctly rounding is probably slower.
This commit is contained in:
Unknown W. Brackets 2014-06-28 00:37:13 -07:00
parent f544a87b2f
commit f008bebab4

View File

@ -258,6 +258,9 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
int fs = _FS;
int fd = _FD;
// TODO: Most of these mishandle infinity/NAN.
// Maybe we can try to track per reg if they *could* be INF/NAN to optimize out?
switch (op & 0x3f) {
case 4: //F(fd) = sqrtf(F(fs)); break; //sqrt
fpr.MapDirtyIn(fd, fs);
@ -284,22 +287,44 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO);
break;
case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s
{
fpr.MapDirtyIn(fd, fs);
MOVI2F(S0, 0.4999999f, SCRATCHREG1);
VADD(S0,fpr.R(fs),S0);
VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED);
VCVT(S0, fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO);
VCVT(S1, S0, TO_FLOAT | IS_SIGNED);
// For a positive value, we may have gotten a smaller value, in which case we need to increment.
VCMP(S1, fpr.R(fs));
VMOV(fpr.R(fd), S0);
VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
FixupBranch skip = B_CC(CC_GE);
MOVI2F(S0, 1.0f, SCRATCHREG1);
VADD(S1, S1, S0);
VCVT(fpr.R(fd), S1, TO_INT | IS_SIGNED | ROUND_TO_ZERO);
SetJumpTarget(skip);
break;
}
case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s
{
fpr.MapDirtyIn(fd, fs);
MOVI2F(S0, 0.4999999f, SCRATCHREG1);
VSUB(S0,fpr.R(fs),S0);
VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED);
VCVT(S0, fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO);
VCVT(S1, S0, TO_FLOAT | IS_SIGNED);
// For a negative value, we may have gotten a larger value, in which case we need to decrement.
VCMP(S1, fpr.R(fs));
VMOV(fpr.R(fd), S0);
VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
FixupBranch skip = B_CC(CC_LS);
MOVI2F(S0, 1.0f, SCRATCHREG1);
VSUB(S1, S1, S0);
VCVT(fpr.R(fd), S1, TO_INT | IS_SIGNED | ROUND_TO_ZERO);
SetJumpTarget(skip);
break;
}
case 32: //F(fd) = (float)FsI(fs); break; //cvt.s.w
fpr.MapDirtyIn(fd, fs);
VCVT(fpr.R(fd), fpr.R(fs), TO_FLOAT | IS_SIGNED);
break;
case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s
{
// TODO: This is a monster. Try setting the ARM rounding mode instead?
fpr.MapDirtyIn(fd, fs);
LDR(SCRATCHREG1, CTXREG, offsetof(MIPSState, fcr31));
AND(SCRATCHREG1, SCRATCHREG1, Operand2(3));
@ -308,17 +333,50 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
// 1: Round to zero
// 2: Round up (ceil)
// 3: Round down (floor)
CMP(SCRATCHREG1, Operand2(2));
SetCC(CC_GE); MOVI2F(S0, 0.4999999f, SCRATCHREG2);
SetCC(CC_GT); VSUB(S0,fpr.R(fs),S0);
SetCC(CC_EQ); VADD(S0,fpr.R(fs),S0);
SetCC(CC_GE); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); /* 2,3 */
SetCC(CC_AL);
CMP(SCRATCHREG1, Operand2(1));
SetCC(CC_EQ); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); /* 1 */
SetCC(CC_LT); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); /* 0 */
SetCC(CC_AL);
// Let's hope 0/1 are the most common. Seems likely.
FixupBranch skipCeilFloor = B_CC(CC_LE);
// Okay, here we are in ceil or floor mode only.
VCVT(S0, fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO);
VCVT(S1, S0, TO_FLOAT | IS_SIGNED);
// Compare now, we'll VMRS_APSR a bit later for the ceil/floor case.
VCMP(S1, fpr.R(fs));
VMOV(fpr.R(fd), S0);
CMP(SCRATCHREG1, Operand2(2));
FixupBranch useFloor = B_CC(CC_GT);
// This is the ceil case specifically. We add one if we ended up lower.
VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
FixupBranch finishCeil1 = B_CC(CC_GE);
MOVI2F(S0, 1.0f, SCRATCHREG1);
VADD(S1, S1, S0);
VCVT(fpr.R(fd), S1, TO_INT | IS_SIGNED | ROUND_TO_ZERO);
FixupBranch finishCeil2 = B();
// For floor, we subtract one if we ended up higher.
SetJumpTarget(useFloor);
VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
FixupBranch finishFloor1 = B_CC(CC_LS);
MOVI2F(S0, 1.0f, SCRATCHREG1);
VSUB(S1, S1, S0);
VCVT(fpr.R(fd), S1, TO_INT | IS_SIGNED | ROUND_TO_ZERO);
FixupBranch finishFloor2 = B();
SetJumpTarget(skipCeilFloor);
// LT 1 means 0, nearest. EQ means 1, round to zero.
SetCC(CC_LT);
VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED);
SetCC(CC_EQ);
VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO);
SetJumpTarget(finishCeil1);
SetJumpTarget(finishCeil2);
SetJumpTarget(finishFloor1);
SetJumpTarget(finishFloor2);
break;
}
default:
DISABLE;
}