ARM64: Accurate floating point rounding. For some reason, FTZ doesn't seem to work though.

This commit is contained in:
Henrik Rydgard 2015-03-26 00:11:25 +01:00
parent 7d918c0ad8
commit 0a70618f87
4 changed files with 82 additions and 18 deletions

View File

@ -1056,6 +1056,12 @@ static void GetSystemReg(PStateField field, int &o0, int &op1, int &CRn, int &CR
case FIELD_NZCV:
o0 = 3; op1 = 3; CRn = 4; CRm = 2; op2 = 0;
break;
case FIELD_FPCR:
o0 = 3; op1 = 3; CRn = 4; CRm = 4; op2 = 0;
break;
case FIELD_FPSR:
o0 = 3; op1 = 3; CRn = 4; CRm = 4; op2 = 1;
break;
default:
_assert_msg_(JIT, false, "Invalid PStateField to do a register move from/to");
break;

View File

@ -145,13 +145,6 @@ enum ShiftAmount
SHIFT_48 = 3,
};
// The only system registers accessible from EL0 (user space)
enum SystemRegister { // Three digits : Op1, CRm, Op2
SYSREG_NZCV = 0x320,
SYSREG_FPCR = 0x340,
SYSREG_FPSR = 0x341,
};
enum RoundingMode {
ROUND_A, // round to nearest, ties to away
ROUND_M, // round towards -inf
@ -188,7 +181,9 @@ enum PStateField
FIELD_SPSel = 0,
FIELD_DAIFSet,
FIELD_DAIFClr,
FIELD_NZCV,
FIELD_NZCV, // The only system registers accessible from EL0 (user space)
FIELD_FPCR = 0x340,
FIELD_FPSR = 0x341,
};
enum SystemHint

View File

@ -37,6 +37,14 @@
#define _IMM16 (signed short)(op & 0xFFFF)
#define _IMM26 (op & 0x03FFFFFF)
// FPCR interesting bits:
// 24: FZ (flush-to-zero)
// 23:22: RMode (0 = nearest, 1 = +inf, 2 = -inf, 3 = zero)
// not much else is interesting for us, but should be preserved.
// To access: MRS Xt, FPCR ; MSR FPCR, Xt
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.

View File

@ -216,13 +216,11 @@ void Arm64Jit::Compile(u32 em_address) {
}
}
void Arm64Jit::RunLoopUntil(u64 globalticks)
{
void Arm64Jit::RunLoopUntil(u64 globalticks) {
((void (*)())enterCode)();
}
const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b)
{
const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b) {
js.cancel = false;
js.blockStart = js.compilerPC = mips_->pc;
js.lastContinuedPC = 0;
@ -272,8 +270,7 @@ const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b)
int partialFlushOffset = 0;
js.numInstructions = 0;
while (js.compiling)
{
while (js.compiling) {
gpr.SetCompilerPC(js.compilerPC); // Let it know for log messages
MIPSOpcode inst = Memory::Read_Opcode_JIT(js.compilerPC);
@ -457,8 +454,7 @@ void Arm64Jit::Comp_ReplacementFunc(MIPSOpcode op)
}
}
void Arm64Jit::Comp_Generic(MIPSOpcode op)
{
void Arm64Jit::Comp_Generic(MIPSOpcode op) {
FlushAll();
MIPSInterpretFunc func = MIPSGetInterpretFunc(op);
if (func) {
@ -508,11 +504,70 @@ void Arm64Jit::WriteDownCountR(ARM64Reg reg) {
}
void Arm64Jit::RestoreRoundingMode(bool force) {
// TODO ARM64
// If the game has never set an interesting rounding mode, we can safely skip this.
if (g_Config.bSetRoundingMode && (force || !g_Config.bForceFlushToZero || js.hasSetRounding)) {
MRS(SCRATCH2_64, FIELD_FPCR);
// Assume we're always in round-to-nearest mode beforehand.
// Also on ARM, we're always in flush-to-zero in C++, so stay that way.
if (!g_Config.bForceFlushToZero) {
ORRI2R(SCRATCH2, SCRATCH2, 4 << 22);
}
ANDI2R(SCRATCH2, SCRATCH2, ~(3 << 22));
_MSR(FIELD_FPCR, SCRATCH2_64);
}
}
void Arm64Jit::ApplyRoundingMode(bool force) {
// TODO ARM64
// NOTE: Must not destroy SCRATCH1.
// If the game has never set an interesting rounding mode, we can safely skip this.
if (g_Config.bSetRoundingMode && (force || !g_Config.bForceFlushToZero || js.hasSetRounding)) {
LDR(INDEX_UNSIGNED, SCRATCH2, CTXREG, offsetof(MIPSState, fcr31));
if (!g_Config.bForceFlushToZero) {
TSTI2R(SCRATCH2, 1 << 24);
ANDI2R(SCRATCH2, SCRATCH2, 3);
FixupBranch skip1 = B(CC_EQ);
ADDI2R(SCRATCH2, SCRATCH2, 4);
SetJumpTarget(skip1);
// We can only skip if the rounding mode is zero and flush is set.
CMPI2R(SCRATCH2, 4);
} else {
ANDSI2R(SCRATCH2, SCRATCH2, 3);
}
// At this point, if it was zero, we can skip the rest.
FixupBranch skip = B(CC_EQ);
PUSH(SCRATCH1);
// MIPS Rounding Mode: ARM Rounding Mode
// 0: Round nearest 0
// 1: Round to zero 3
// 2: Round up (ceil) 1
// 3: Round down (floor) 2
if (!g_Config.bForceFlushToZero) {
ANDI2R(SCRATCH1, SCRATCH2, 3);
CMPI2R(SCRATCH1, 1);
} else {
CMPI2R(SCRATCH2, 1);
}
FixupBranch skipadd = B(CC_NEQ);
ADDI2R(SCRATCH2, SCRATCH2, 2);
SetJumpTarget(skipadd);
FixupBranch skipsub = B(CC_LE);
SUBI2R(SCRATCH2, SCRATCH2, 1);
SetJumpTarget(skipsub);
MRS(SCRATCH1_64, FIELD_FPCR);
// Assume we're always in round-to-nearest mode beforehand.
if (!g_Config.bForceFlushToZero) {
// But we need to clear flush to zero in this case anyway.
ANDI2R(SCRATCH1, SCRATCH1, ~(7 << 22));
}
ORR(SCRATCH1, SCRATCH1, SCRATCH2, ArithOption(SCRATCH2, ST_LSL, 22));
_MSR(FIELD_FPCR, SCRATCH1_64);
POP(SCRATCH1);
SetJumpTarget(skip);
}
}
void Arm64Jit::UpdateRoundingMode() {