mirror of
https://github.com/libretro/ppsspp.git
synced 2025-02-18 07:58:14 +00:00
armjit: Fix floor/ceil/cvt.w.s rounding.
Unfortunately, correctly rounding is probably slower.
This commit is contained in:
parent
f544a87b2f
commit
f008bebab4
@ -258,6 +258,9 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
|
||||
int fs = _FS;
|
||||
int fd = _FD;
|
||||
|
||||
// TODO: Most of these mishandle infinity/NAN.
|
||||
// Maybe we can try to track per reg if they *could* be INF/NAN to optimize out?
|
||||
|
||||
switch (op & 0x3f) {
|
||||
case 4: //F(fd) = sqrtf(F(fs)); break; //sqrt
|
||||
fpr.MapDirtyIn(fd, fs);
|
||||
@ -284,22 +287,44 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
|
||||
VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO);
|
||||
break;
|
||||
case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s
|
||||
{
|
||||
fpr.MapDirtyIn(fd, fs);
|
||||
MOVI2F(S0, 0.4999999f, SCRATCHREG1);
|
||||
VADD(S0,fpr.R(fs),S0);
|
||||
VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED);
|
||||
VCVT(S0, fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO);
|
||||
VCVT(S1, S0, TO_FLOAT | IS_SIGNED);
|
||||
// For a positive value, we may have gotten a smaller value, in which case we need to increment.
|
||||
VCMP(S1, fpr.R(fs));
|
||||
VMOV(fpr.R(fd), S0);
|
||||
VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
|
||||
FixupBranch skip = B_CC(CC_GE);
|
||||
MOVI2F(S0, 1.0f, SCRATCHREG1);
|
||||
VADD(S1, S1, S0);
|
||||
VCVT(fpr.R(fd), S1, TO_INT | IS_SIGNED | ROUND_TO_ZERO);
|
||||
SetJumpTarget(skip);
|
||||
break;
|
||||
}
|
||||
case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s
|
||||
{
|
||||
fpr.MapDirtyIn(fd, fs);
|
||||
MOVI2F(S0, 0.4999999f, SCRATCHREG1);
|
||||
VSUB(S0,fpr.R(fs),S0);
|
||||
VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED);
|
||||
VCVT(S0, fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO);
|
||||
VCVT(S1, S0, TO_FLOAT | IS_SIGNED);
|
||||
// For a negative value, we may have gotten a larger value, in which case we need to decrement.
|
||||
VCMP(S1, fpr.R(fs));
|
||||
VMOV(fpr.R(fd), S0);
|
||||
VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
|
||||
FixupBranch skip = B_CC(CC_LS);
|
||||
MOVI2F(S0, 1.0f, SCRATCHREG1);
|
||||
VSUB(S1, S1, S0);
|
||||
VCVT(fpr.R(fd), S1, TO_INT | IS_SIGNED | ROUND_TO_ZERO);
|
||||
SetJumpTarget(skip);
|
||||
break;
|
||||
}
|
||||
case 32: //F(fd) = (float)FsI(fs); break; //cvt.s.w
|
||||
fpr.MapDirtyIn(fd, fs);
|
||||
VCVT(fpr.R(fd), fpr.R(fs), TO_FLOAT | IS_SIGNED);
|
||||
break;
|
||||
case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s
|
||||
{
|
||||
// TODO: This is a monster. Try setting the ARM rounding mode instead?
|
||||
fpr.MapDirtyIn(fd, fs);
|
||||
LDR(SCRATCHREG1, CTXREG, offsetof(MIPSState, fcr31));
|
||||
AND(SCRATCHREG1, SCRATCHREG1, Operand2(3));
|
||||
@ -308,17 +333,50 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
|
||||
// 1: Round to zero
|
||||
// 2: Round up (ceil)
|
||||
// 3: Round down (floor)
|
||||
CMP(SCRATCHREG1, Operand2(2));
|
||||
SetCC(CC_GE); MOVI2F(S0, 0.4999999f, SCRATCHREG2);
|
||||
SetCC(CC_GT); VSUB(S0,fpr.R(fs),S0);
|
||||
SetCC(CC_EQ); VADD(S0,fpr.R(fs),S0);
|
||||
SetCC(CC_GE); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); /* 2,3 */
|
||||
SetCC(CC_AL);
|
||||
CMP(SCRATCHREG1, Operand2(1));
|
||||
SetCC(CC_EQ); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); /* 1 */
|
||||
SetCC(CC_LT); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); /* 0 */
|
||||
SetCC(CC_AL);
|
||||
// Let's hope 0/1 are the most common. Seems likely.
|
||||
FixupBranch skipCeilFloor = B_CC(CC_LE);
|
||||
|
||||
// Okay, here we are in ceil or floor mode only.
|
||||
VCVT(S0, fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO);
|
||||
VCVT(S1, S0, TO_FLOAT | IS_SIGNED);
|
||||
// Compare now, we'll VMRS_APSR a bit later for the ceil/floor case.
|
||||
VCMP(S1, fpr.R(fs));
|
||||
VMOV(fpr.R(fd), S0);
|
||||
|
||||
CMP(SCRATCHREG1, Operand2(2));
|
||||
FixupBranch useFloor = B_CC(CC_GT);
|
||||
|
||||
// This is the ceil case specifically. We add one if we ended up lower.
|
||||
VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
|
||||
FixupBranch finishCeil1 = B_CC(CC_GE);
|
||||
MOVI2F(S0, 1.0f, SCRATCHREG1);
|
||||
VADD(S1, S1, S0);
|
||||
VCVT(fpr.R(fd), S1, TO_INT | IS_SIGNED | ROUND_TO_ZERO);
|
||||
FixupBranch finishCeil2 = B();
|
||||
|
||||
// For floor, we subtract one if we ended up higher.
|
||||
SetJumpTarget(useFloor);
|
||||
VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
|
||||
FixupBranch finishFloor1 = B_CC(CC_LS);
|
||||
MOVI2F(S0, 1.0f, SCRATCHREG1);
|
||||
VSUB(S1, S1, S0);
|
||||
VCVT(fpr.R(fd), S1, TO_INT | IS_SIGNED | ROUND_TO_ZERO);
|
||||
FixupBranch finishFloor2 = B();
|
||||
|
||||
SetJumpTarget(skipCeilFloor);
|
||||
// LT 1 means 0, nearest. EQ means 1, round to zero.
|
||||
SetCC(CC_LT);
|
||||
VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED);
|
||||
SetCC(CC_EQ);
|
||||
VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO);
|
||||
|
||||
SetJumpTarget(finishCeil1);
|
||||
SetJumpTarget(finishCeil2);
|
||||
SetJumpTarget(finishFloor1);
|
||||
SetJumpTarget(finishFloor2);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
DISABLE;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user