Try software rounding mode instead. Use conditionals for VFP functions. Note: Round to Zero not working for some reason?

This commit is contained in:
Sacha 2013-03-08 16:09:27 +10:00
parent fbef590bed
commit 7782be9a3e
2 changed files with 59 additions and 39 deletions

View File

@ -801,13 +801,13 @@ void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, u16 offset)
if (single_reg)
{
Write32(NO_COND | (0x1B << 23) | ((Dest & 0x1) << 22) | (1 << 20) | (Base << 16) \
Write32(condition | (0x1B << 23) | ((Dest & 0x1) << 22) | (1 << 20) | (Base << 16) \
| ((Dest & 0x1E) << 11) | (10 << 8) | (offset >> 2));
}
else
{
Write32(NO_COND | (0x1B << 23) | ((Dest & 0x10) << 18) | (1 << 20) | (Base << 16) \
Write32(condition | (0x1B << 23) | ((Dest & 0x10) << 18) | (1 << 20) | (Base << 16) \
| ((Dest & 0xF) << 12) | (11 << 8) | (offset >> 2));
}
}
@ -828,13 +828,13 @@ void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, u16 offset)
if (single_reg)
{
Write32(NO_COND | (0x1B << 23) | ((Src & 0x1) << 22) | (Base << 16) \
Write32(condition | (0x1B << 23) | ((Src & 0x1) << 22) | (Base << 16) \
| ((Src & 0x1E) << 11) | (10 << 8) | (offset >> 2));
}
else
{
Write32(NO_COND | (0x1B << 23) | ((Src & 0x10) << 18) | (Base << 16) \
Write32(condition | (0x1B << 23) | ((Src & 0x10) << 18) | (Base << 16) \
| ((Src & 0xF) << 12) | (11 << 8) | (offset >> 2));
}
}
@ -848,12 +848,12 @@ void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm, bool E)
if (single_reg)
{
Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x34 << 16) | ((Vd & 0x1E) << 11) \
Write32(condition | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x34 << 16) | ((Vd & 0x1E) << 11) \
| (E << 7) | (0x29 << 6) | ((Vm & 0x1) << 5) | (Vm >> 1));
}
else
{
Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x34 << 16) | ((Vd & 0xF) << 12) \
Write32(condition | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x34 << 16) | ((Vd & 0xF) << 12) \
| (E << 7) | (0x2C << 6) | ((Vm & 0x10) << 1) | (Vm & 0xF));
}
}
@ -866,24 +866,24 @@ void ARMXEmitter::VCMP(ARMReg Vd, bool E)
if (single_reg)
{
Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x35 << 16) | ((Vd & 0x1E) << 11) \
Write32(condition | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x35 << 16) | ((Vd & 0x1E) << 11) \
| (E << 7) | (0x29 << 6));
}
else
{
Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x35 << 16) | ((Vd & 0xF) << 12) \
Write32(condition | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x35 << 16) | ((Vd & 0xF) << 12) \
| (E << 7) | (0x2C << 6));
}
}
void ARMXEmitter::VMRS_APSR() {
Write32(NO_COND | 0xEF10A10 | (15 << 12));
Write32(condition | 0xEF10A10 | (15 << 12));
}
void ARMXEmitter::VMRS(ARMReg Rt) {
Write32(NO_COND | (0xEF << 20) | (1 << 16) | (Rt << 12) | 0xA10);
Write32(condition | (0xEF << 20) | (1 << 16) | (Rt << 12) | 0xA10);
}
void ARMXEmitter::VMSR(ARMReg Rt) {
Write32(NO_COND | (0xEE << 20) | (1 << 16) | (Rt << 12) | 0xA10);
Write32(condition | (0xEE << 20) | (1 << 16) | (Rt << 12) | 0xA10);
}
void ARMXEmitter::VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm)
@ -899,13 +899,13 @@ void ARMXEmitter::VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm)
if (single_reg)
{
Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | ((Vn & 0x1E) << 15) \
Write32(condition | (0x1D << 23) | ((Vd & 0x1) << 22) | ((Vn & 0x1E) << 15) \
| ((Vd & 0x1E) << 11) | (0xA << 8) | ((Vn & 0x1) << 7) | ((Vm & 0x1) << 5) \
| (Vm >> 1));
}
else
{
Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | ((Vn & 0xF) << 16) \
Write32(condition | (0x1D << 23) | ((Vd & 0x10) << 18) | ((Vn & 0xF) << 16) \
| ((Vd & 0xF) << 12) | (0xB << 8) | ((Vn & 0x10) << 3) | ((Vm & 0x10) << 2) \
| (Vm & 0xF));
}
@ -921,12 +921,12 @@ void ARMXEmitter::VSQRT(ARMReg Vd, ARMReg Vm)
if (single_reg)
{
Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x31 << 16) \
Write32(condition | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x31 << 16) \
| ((Vd & 0x1E) << 11) | (0x2B << 6) | ((Vm & 0x1) << 5) | (Vm >> 1));
}
else
{
Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x31 << 16) \
Write32(condition | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x31 << 16) \
| ((Vd & 0xF) << 12) | (0x2F << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF));
}
}
@ -946,7 +946,7 @@ void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm)
if (single_reg)
{
Write32(NO_COND | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x3 << 20) \
Write32(condition | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x3 << 20) \
| ((Vn & 0x1E) << 15) | ((Vd & 0x1E) << 11) | (0x5 << 9) \
| ((Vn & 0x1) << 7) | ((Vm & 0x1) << 5) | (Vm >> 1));
}
@ -954,7 +954,7 @@ void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm)
{
if (double_reg)
{
Write32(NO_COND | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x3 << 20) \
Write32(condition | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x3 << 20) \
| ((Vn & 0xF) << 16) | ((Vd & 0xF) << 12) | (0xB << 8) \
| ((Vn & 0x10) << 3) | ((Vm & 0x10) << 2) | (Vm & 0xF));
}
@ -981,7 +981,7 @@ void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm)
if (single_reg)
{
Write32(NO_COND | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x3 << 20) \
Write32(condition | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x3 << 20) \
| ((Vn & 0x1E) << 15) | ((Vd & 0x1E) << 11) | (0x5 << 9) \
| ((Vn & 0x1) << 7) | (1 << 6) | ((Vm & 0x1) << 5) | (Vm >> 1));
}
@ -989,7 +989,7 @@ void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm)
{
if (double_reg)
{
Write32(NO_COND | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x3 << 20) \
Write32(condition | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x3 << 20) \
| ((Vn & 0xF) << 16) | ((Vd & 0xF) << 12) | (0xB << 8) \
| ((Vn & 0x10) << 3) | (1 << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF));
}
@ -1017,7 +1017,7 @@ void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm)
if (single_reg)
{
Write32(NO_COND | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x2 << 20) \
Write32(condition | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x2 << 20) \
| ((Vn & 0x1E) << 15) | ((Vd & 0x1E) << 11) | (0x5 << 9) \
| ((Vn & 0x1) << 7) | ((Vm & 0x1) << 5) | (Vm >> 1));
}
@ -1025,7 +1025,7 @@ void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm)
{
if (double_reg)
{
Write32(NO_COND | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x2 << 20) \
Write32(condition | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x2 << 20) \
| ((Vn & 0xF) << 16) | ((Vd & 0xF) << 12) | (0xB << 8) \
| ((Vn & 0x10) << 3) | ((Vm & 0x10) << 2) | (Vm & 0xF));
}
@ -1050,7 +1050,7 @@ void ARMXEmitter::VMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm)
if (single_reg)
{
Write32(NO_COND | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x0 << 20) \
Write32(condition | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x0 << 20) \
| ((Vn & 0x1E) << 15) | ((Vd & 0x1E) << 11) | (0x5 << 9) \
| ((Vn & 0x1) << 7) | ((Vm & 0x1) << 5) | (Vm >> 1));
}
@ -1069,12 +1069,12 @@ void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm)
if (single_reg)
{
Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x30 << 16) \
Write32(condition | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x30 << 16) \
| ((Vd & 0x1E) << 11) | (0x2B << 6) | ((Vm & 0x1) << 5) | (Vm >> 1));
}
else
{
Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x30 << 16) \
Write32(condition | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x30 << 16) \
| ((Vd & 0xF) << 12) | (0x2F << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF));
}
}
@ -1088,12 +1088,12 @@ void ARMXEmitter::VNEG(ARMReg Vd, ARMReg Vm)
if (single_reg)
{
Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x31 << 16) \
Write32(condition | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x31 << 16) \
| ((Vd & 0x1E) << 11) | (0x29 << 6) | ((Vm & 0x1) << 5) | (Vm >> 1));
}
else
{
Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x31 << 16) \
Write32(condition | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x31 << 16) \
| ((Vd & 0xF) << 12) | (0x2D << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF));
}
}
@ -1105,7 +1105,7 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src, bool high)
Dest = SubBase(Dest);
Write32(NO_COND | (0xE << 24) | (high << 21) | ((Dest & 0xF) << 16) | (Src << 12) \
Write32(condition | (0xE << 24) | (high << 21) | ((Dest & 0xF) << 16) | (Src << 12) \
| (11 << 8) | ((Dest & 0x10) << 3) | (1 << 4));
}
@ -1119,7 +1119,7 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src)
{
// Moving to a Neon register FROM ARM Reg
Dest = (ARMReg)(Dest - S0);
Write32(NO_COND | (0xE0 << 20) | ((Dest & 0x1E) << 15) | (Src << 12) \
Write32(condition | (0xE0 << 20) | ((Dest & 0x1E) << 15) | (Src << 12) \
| (0xA << 8) | ((Dest & 0x1) << 7) | (1 << 4));
return;
}
@ -1139,7 +1139,7 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src)
{
// Moving to ARM Reg from Neon Register
Src = (ARMReg)(Src - S0);
Write32(NO_COND | (0xE1 << 20) | ((Src & 0x1E) << 15) | (Dest << 12) \
Write32(condition | (0xE1 << 20) | ((Src & 0x1E) << 15) | (Dest << 12) \
| (0xA << 8) | ((Src & 0x1) << 7) | (1 << 4));
return;
}
@ -1169,7 +1169,7 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src)
if (Single)
{
Write32(NO_COND | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x3 << 20) | ((Dest & 0x1E) << 11) \
Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x3 << 20) | ((Dest & 0x1E) << 11) \
| (0x5 << 9) | (1 << 6) | ((Src & 0x1) << 5) | ((Src & 0x1E) >> 1));
}
else
@ -1186,7 +1186,7 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src)
}
else
{
Write32(NO_COND | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x3 << 20) | ((Dest & 0xF) << 12) \
Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x3 << 20) | ((Dest & 0xF) << 12) \
| (0x2D << 6) | ((Src & 0x10) << 1) | (Src & 0xF));
}
}
@ -1202,10 +1202,10 @@ void ARMXEmitter::VCVT(ARMReg Dest, ARMReg Source, int flags)
if (single_reg)
{
Write32(NO_COND | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \
Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \
| ((Dest & 0x1E) << 11) | (op << 7) | (0x29 << 6) | ((Source & 0x1) << 5) | (Source >> 1));
} else {
Write32(NO_COND | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \
Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \
| ((Dest & 0xF) << 12) | (1 << 8) | (op << 7) | (0x29 << 6) | ((Source & 0x10) << 1) | (Source & 0xF));
}
}

View File

@ -222,10 +222,30 @@ void Jit::Comp_FPU2op(u32 op)
VCVT(fpr.R(fd), fpr.R(fs), TO_FLOAT | IS_SIGNED);
break;
case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s
DISABLE;
// Requires correct floating mode set
fpr.MapDirtyIn(fd, fs);
VCVT(fpr.R(fd), fpr.R(fs), TO_INT);
LDR(R0, CTXREG, offsetof(MIPSState, fcr31));
AND(R0, R0, Operand2(3));
// MIPS Rounding Mode:
// 0: Round nearest
// 1: Round to zero
// 2: Round up (ceil)
// 3: Round down (floor)
CMP(R0, Operand2(2));
SetCC(CC_GE);
MOVI2F(S0, 0.5f, R1);
SetCC(CC_GT);
VSUB(S0,fpr.R(fs),S0);
SetCC(CC_EQ);
VADD(S0,fpr.R(fs),S0);
SetCC(CC_GE);
VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); /* 2,3 */
CMP(R0, Operand2(1));
SetCC(CC_EQ);
VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); /* 1 */
SetCC(CC_LT);
VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); /* 0 */
SetCC(CC_AL);
break;
default:
DISABLE;
@ -260,7 +280,7 @@ void Jit::Comp_mxc1(u32 op)
return;
case 6: //currentMIPS->WriteFCR(fs, R(rt)); break; //ctc1
// Rounding isn't right.
// Hardware rounding method.
/*if (fs == 31)
{
fpr.MapReg(rt, 0);
@ -269,9 +289,9 @@ void Jit::Comp_mxc1(u32 op)
// 0, 1, 2, 3 <-> 0, 3, 1, 2
CMP(R0, Operand2(1));
SetCC(CC_EQ);
SUB(R0, R0, Operand2(1));
SetCC(CC_GT);
ADD(R0, R0, Operand2(2));
SetCC(CC_GT);
SUB(R0, R0, Operand2(1));
SetCC(CC_AL);
// Load and Store RM to FPSCR