mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2024-10-07 07:43:36 +00:00
JitArm64: Implement fres
This commit is contained in:
parent
41befc21cd
commit
85226e09f0
@ -2310,6 +2310,12 @@ void ARM64FloatEmitter::EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd
|
|||||||
(DecodeReg(Rn) << 5) | DecodeReg(Rd));
|
(DecodeReg(Rn) << 5) | DecodeReg(Rd));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ARM64FloatEmitter::EmitScalar2RegMisc(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
|
||||||
|
{
|
||||||
|
Write32((1 << 30) | (U << 29) | (0b11110001 << 21) | (size << 22) | (opcode << 12) | (1 << 11) |
|
||||||
|
(DecodeReg(Rn) << 5) | DecodeReg(Rd));
|
||||||
|
}
|
||||||
|
|
||||||
void ARM64FloatEmitter::Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
|
void ARM64FloatEmitter::Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
|
||||||
{
|
{
|
||||||
ASSERT_MSG(DYNA_REC, !IsSingle(Rd), "%s doesn't support singles!", __func__);
|
ASSERT_MSG(DYNA_REC, !IsSingle(Rd), "%s doesn't support singles!", __func__);
|
||||||
@ -3102,6 +3108,15 @@ void ARM64FloatEmitter::FSQRT(ARM64Reg Rd, ARM64Reg Rn)
|
|||||||
EmitScalar1Source(0, 0, IsDouble(Rd), 3, Rd, Rn);
|
EmitScalar1Source(0, 0, IsDouble(Rd), 3, Rd, Rn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ARM64FloatEmitter::FRECPE(ARM64Reg Rd, ARM64Reg Rn)
|
||||||
|
{
|
||||||
|
EmitScalar2RegMisc(0, 2 | IsDouble(Rd), 0x1D, Rd, Rn);
|
||||||
|
}
|
||||||
|
void ARM64FloatEmitter::FRSQRTE(ARM64Reg Rd, ARM64Reg Rn)
|
||||||
|
{
|
||||||
|
EmitScalar2RegMisc(1, 2 | IsDouble(Rd), 0x1D, Rd, Rn);
|
||||||
|
}
|
||||||
|
|
||||||
// Scalar - 2 Source
|
// Scalar - 2 Source
|
||||||
void ARM64FloatEmitter::FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
void ARM64FloatEmitter::FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||||
{
|
{
|
||||||
|
@ -996,6 +996,8 @@ public:
|
|||||||
void FNEG(ARM64Reg Rd, ARM64Reg Rn);
|
void FNEG(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void FSQRT(ARM64Reg Rd, ARM64Reg Rn);
|
void FSQRT(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP
|
void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP
|
||||||
|
void FRECPE(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
void FRSQRTE(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
|
||||||
// Scalar - 2 Source
|
// Scalar - 2 Source
|
||||||
void FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
@ -1145,6 +1147,7 @@ private:
|
|||||||
ARM64Reg Rm);
|
ARM64Reg Rm);
|
||||||
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
|
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
void EmitScalar2RegMisc(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt,
|
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt,
|
||||||
ARM64Reg Rn);
|
ARM64Reg Rn);
|
||||||
|
@ -140,6 +140,7 @@ public:
|
|||||||
void fcmpX(UGeckoInstruction inst);
|
void fcmpX(UGeckoInstruction inst);
|
||||||
void frspx(UGeckoInstruction inst);
|
void frspx(UGeckoInstruction inst);
|
||||||
void fctiwzx(UGeckoInstruction inst);
|
void fctiwzx(UGeckoInstruction inst);
|
||||||
|
void fresx(UGeckoInstruction inst);
|
||||||
|
|
||||||
// Paired
|
// Paired
|
||||||
void ps_maddXX(UGeckoInstruction inst);
|
void ps_maddXX(UGeckoInstruction inst);
|
||||||
@ -147,6 +148,7 @@ public:
|
|||||||
void ps_mulsX(UGeckoInstruction inst);
|
void ps_mulsX(UGeckoInstruction inst);
|
||||||
void ps_sel(UGeckoInstruction inst);
|
void ps_sel(UGeckoInstruction inst);
|
||||||
void ps_sumX(UGeckoInstruction inst);
|
void ps_sumX(UGeckoInstruction inst);
|
||||||
|
void ps_res(UGeckoInstruction inst);
|
||||||
|
|
||||||
// Loadstore paired
|
// Loadstore paired
|
||||||
void psq_l(UGeckoInstruction inst);
|
void psq_l(UGeckoInstruction inst);
|
||||||
@ -232,6 +234,7 @@ protected:
|
|||||||
// AsmRoutines
|
// AsmRoutines
|
||||||
void GenerateAsm();
|
void GenerateAsm();
|
||||||
void GenerateCommonAsm();
|
void GenerateCommonAsm();
|
||||||
|
void GenerateFres();
|
||||||
void GenerateConvertDoubleToSingle();
|
void GenerateConvertDoubleToSingle();
|
||||||
void GenerateConvertSingleToDouble();
|
void GenerateConvertSingleToDouble();
|
||||||
void GenerateFPRF(bool single);
|
void GenerateFPRF(bool single);
|
||||||
|
@ -430,6 +430,32 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
|
|||||||
"Register allocation turned singles into doubles in the middle of fctiwzx");
|
"Register allocation turned singles into doubles in the middle of fctiwzx");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void JitArm64::fresx(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(bJITFloatingPointOff);
|
||||||
|
FALLBACK_IF(inst.Rc);
|
||||||
|
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
||||||
|
|
||||||
|
const u32 b = inst.FB;
|
||||||
|
const u32 d = inst.FD;
|
||||||
|
|
||||||
|
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
|
||||||
|
fpr.Lock(ARM64Reg::Q0);
|
||||||
|
|
||||||
|
const ARM64Reg VB = fpr.R(b, RegType::LowerPair);
|
||||||
|
m_float_emit.FMOV(ARM64Reg::X1, EncodeRegToDouble(VB));
|
||||||
|
m_float_emit.FRECPE(ARM64Reg::D0, EncodeRegToDouble(VB));
|
||||||
|
|
||||||
|
BL(GetAsmRoutines()->fres);
|
||||||
|
|
||||||
|
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
|
||||||
|
fpr.Unlock(ARM64Reg::Q0);
|
||||||
|
|
||||||
|
const ARM64Reg VD = fpr.RW(d, RegType::Duplicated);
|
||||||
|
m_float_emit.FMOV(EncodeRegToDouble(VD), ARM64Reg::X0);
|
||||||
|
}
|
||||||
|
|
||||||
// Since the following float conversion functions are used in non-arithmetic PPC float
|
// Since the following float conversion functions are used in non-arithmetic PPC float
|
||||||
// instructions, they must convert floats bitexact and never flush denormals to zero or turn SNaNs
|
// instructions, they must convert floats bitexact and never flush denormals to zero or turn SNaNs
|
||||||
// into QNaNs. This means we can't just use FCVT/FCVTL/FCVTN.
|
// into QNaNs. This means we can't just use FCVT/FCVTL/FCVTN.
|
||||||
|
@ -353,3 +353,34 @@ void JitArm64::ps_sumX(UGeckoInstruction inst)
|
|||||||
|
|
||||||
SetFPRFIfNeeded(true, VD);
|
SetFPRFIfNeeded(true, VD);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void JitArm64::ps_res(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(bJITPairedOff);
|
||||||
|
FALLBACK_IF(inst.Rc);
|
||||||
|
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
||||||
|
|
||||||
|
const u32 b = inst.FB;
|
||||||
|
const u32 d = inst.FD;
|
||||||
|
|
||||||
|
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
|
||||||
|
fpr.Lock(ARM64Reg::Q0);
|
||||||
|
|
||||||
|
const ARM64Reg VB = fpr.R(b, RegType::Register);
|
||||||
|
const ARM64Reg VD = fpr.RW(d, RegType::Register);
|
||||||
|
|
||||||
|
m_float_emit.FMOV(ARM64Reg::X1, EncodeRegToDouble(VB));
|
||||||
|
m_float_emit.FRECPE(64, ARM64Reg::Q0, EncodeRegToQuad(VB));
|
||||||
|
BL(GetAsmRoutines()->fres);
|
||||||
|
m_float_emit.UMOV(64, ARM64Reg::X1, EncodeRegToQuad(VB), 1);
|
||||||
|
m_float_emit.DUP(64, ARM64Reg::Q0, ARM64Reg::Q0, 1);
|
||||||
|
m_float_emit.FMOV(EncodeRegToDouble(VD), ARM64Reg::X0);
|
||||||
|
BL(GetAsmRoutines()->fres);
|
||||||
|
m_float_emit.INS(64, EncodeRegToQuad(VD), 1, ARM64Reg::X0);
|
||||||
|
|
||||||
|
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
|
||||||
|
fpr.Unlock(ARM64Reg::Q0);
|
||||||
|
|
||||||
|
fpr.FixSinglePrecision(d);
|
||||||
|
}
|
||||||
|
@ -116,7 +116,7 @@ constexpr std::array<GekkoOPTemplate, 17> table4_2{{
|
|||||||
{20, &JitArm64::fp_arith}, // ps_sub
|
{20, &JitArm64::fp_arith}, // ps_sub
|
||||||
{21, &JitArm64::fp_arith}, // ps_add
|
{21, &JitArm64::fp_arith}, // ps_add
|
||||||
{23, &JitArm64::ps_sel}, // ps_sel
|
{23, &JitArm64::ps_sel}, // ps_sel
|
||||||
{24, &JitArm64::FallBackToInterpreter}, // ps_res
|
{24, &JitArm64::ps_res}, // ps_res
|
||||||
{25, &JitArm64::fp_arith}, // ps_mul
|
{25, &JitArm64::fp_arith}, // ps_mul
|
||||||
{26, &JitArm64::FallBackToInterpreter}, // ps_rsqrte
|
{26, &JitArm64::FallBackToInterpreter}, // ps_rsqrte
|
||||||
{28, &JitArm64::ps_maddXX}, // ps_msub
|
{28, &JitArm64::ps_maddXX}, // ps_msub
|
||||||
@ -296,7 +296,7 @@ constexpr std::array<GekkoOPTemplate, 9> table59{{
|
|||||||
{18, &JitArm64::fp_arith}, // fdivsx
|
{18, &JitArm64::fp_arith}, // fdivsx
|
||||||
{20, &JitArm64::fp_arith}, // fsubsx
|
{20, &JitArm64::fp_arith}, // fsubsx
|
||||||
{21, &JitArm64::fp_arith}, // faddsx
|
{21, &JitArm64::fp_arith}, // faddsx
|
||||||
{24, &JitArm64::FallBackToInterpreter}, // fresx
|
{24, &JitArm64::fresx}, // fresx
|
||||||
{25, &JitArm64::fp_arith}, // fmulsx
|
{25, &JitArm64::fp_arith}, // fmulsx
|
||||||
{28, &JitArm64::fp_arith}, // fmsubsx
|
{28, &JitArm64::fp_arith}, // fmsubsx
|
||||||
{29, &JitArm64::fp_arith}, // fmaddsx
|
{29, &JitArm64::fp_arith}, // fmaddsx
|
||||||
|
@ -2,7 +2,10 @@
|
|||||||
// Licensed under GPLv2+
|
// Licensed under GPLv2+
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
#include "Common/Arm64Emitter.h"
|
#include "Common/Arm64Emitter.h"
|
||||||
|
#include "Common/BitUtils.h"
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "Common/FloatUtils.h"
|
#include "Common/FloatUtils.h"
|
||||||
#include "Common/JitRegister.h"
|
#include "Common/JitRegister.h"
|
||||||
@ -198,6 +201,10 @@ void JitArm64::GenerateAsm()
|
|||||||
|
|
||||||
void JitArm64::GenerateCommonAsm()
|
void JitArm64::GenerateCommonAsm()
|
||||||
{
|
{
|
||||||
|
GetAsmRoutines()->fres = GetCodePtr();
|
||||||
|
GenerateFres();
|
||||||
|
JitRegister::Register(GetAsmRoutines()->fres, GetCodePtr(), "JIT_fres");
|
||||||
|
|
||||||
GetAsmRoutines()->cdts = GetCodePtr();
|
GetAsmRoutines()->cdts = GetCodePtr();
|
||||||
GenerateConvertDoubleToSingle();
|
GenerateConvertDoubleToSingle();
|
||||||
JitRegister::Register(GetAsmRoutines()->cdts, GetCodePtr(), "JIT_cdts");
|
JitRegister::Register(GetAsmRoutines()->cdts, GetCodePtr(), "JIT_cdts");
|
||||||
@ -215,6 +222,60 @@ void JitArm64::GenerateCommonAsm()
|
|||||||
GenerateQuantizedLoadStores();
|
GenerateQuantizedLoadStores();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Input: X1 contains input, and D0 contains result of running the input through AArch64 FRECPE.
|
||||||
|
// Output in X0 and memory (PPCState). Clobbers X0-X4 and flags.
|
||||||
|
void JitArm64::GenerateFres()
|
||||||
|
{
|
||||||
|
// The idea behind this implementation: AArch64's frecpe instruction calculates the exponent and
|
||||||
|
// sign the same way as PowerPC's fresx does. For the special inputs zero, NaN and infinity,
|
||||||
|
// even the mantissa matches. But the mantissa does not match for most other inputs, so in the
|
||||||
|
// normal case we calculate the mantissa using the table-based algorithm from the interpreter.
|
||||||
|
|
||||||
|
UBFX(ARM64Reg::X2, ARM64Reg::X1, 52, 11); // Grab the exponent
|
||||||
|
m_float_emit.FMOV(ARM64Reg::X0, ARM64Reg::D0);
|
||||||
|
CMP(ARM64Reg::X2, 895);
|
||||||
|
ANDI2R(ARM64Reg::X3, ARM64Reg::X1, Common::DOUBLE_SIGN);
|
||||||
|
FixupBranch small_exponent = B(CCFlags::CC_LO);
|
||||||
|
|
||||||
|
MOVI2R(ARM64Reg::X4, 1148LL);
|
||||||
|
CMP(ARM64Reg::X2, ARM64Reg::X4);
|
||||||
|
FixupBranch large_exponent = B(CCFlags::CC_HI);
|
||||||
|
|
||||||
|
UBFX(ARM64Reg::X2, ARM64Reg::X1, 47, 5); // Grab upper part of mantissa
|
||||||
|
MOVP2R(ARM64Reg::X3, &Common::fres_expected);
|
||||||
|
ADD(ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X2, ArithOption(ARM64Reg::X2, ShiftType::LSL, 3));
|
||||||
|
LDP(IndexType::Signed, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::X2, 0);
|
||||||
|
UBFX(ARM64Reg::X1, ARM64Reg::X1, 37, 10); // Grab lower part of mantissa
|
||||||
|
MOVI2R(ARM64Reg::W4, 1);
|
||||||
|
ANDI2R(ARM64Reg::X0, ARM64Reg::X0, Common::DOUBLE_SIGN | Common::DOUBLE_EXP);
|
||||||
|
MADD(ARM64Reg::W1, ARM64Reg::W3, ARM64Reg::W1, ARM64Reg::W4);
|
||||||
|
SUB(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W1, ArithOption(ARM64Reg::W1, ShiftType::LSR, 1));
|
||||||
|
ORR(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X1, ArithOption(ARM64Reg::X1, ShiftType::LSL, 29));
|
||||||
|
RET();
|
||||||
|
|
||||||
|
SetJumpTarget(small_exponent);
|
||||||
|
TSTI2R(ARM64Reg::X1, Common::DOUBLE_EXP | Common::DOUBLE_FRAC);
|
||||||
|
FixupBranch zero = B(CCFlags::CC_EQ);
|
||||||
|
MOVI2R(ARM64Reg::X4,
|
||||||
|
Common::BitCast<u64>(static_cast<double>(std::numeric_limits<float>::max())));
|
||||||
|
ORR(ARM64Reg::X0, ARM64Reg::X3, ARM64Reg::X4);
|
||||||
|
RET();
|
||||||
|
|
||||||
|
SetJumpTarget(zero);
|
||||||
|
LDR(IndexType::Unsigned, ARM64Reg::W4, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||||
|
FixupBranch skip_set_zx = TBNZ(ARM64Reg::W4, 26);
|
||||||
|
ORRI2R(ARM64Reg::W4, ARM64Reg::W4, FPSCR_FX | FPSCR_ZX, ARM64Reg::W2);
|
||||||
|
STR(IndexType::Unsigned, ARM64Reg::W4, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||||
|
SetJumpTarget(skip_set_zx);
|
||||||
|
RET();
|
||||||
|
|
||||||
|
SetJumpTarget(large_exponent);
|
||||||
|
MOVI2R(ARM64Reg::X4, 0x7FF);
|
||||||
|
CMP(ARM64Reg::X2, ARM64Reg::X4);
|
||||||
|
CSEL(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X3, CCFlags::CC_EQ);
|
||||||
|
RET();
|
||||||
|
}
|
||||||
|
|
||||||
// Input in X0, output in W1, clobbers X0-X3 and flags.
|
// Input in X0, output in W1, clobbers X0-X3 and flags.
|
||||||
void JitArm64::GenerateConvertDoubleToSingle()
|
void JitArm64::GenerateConvertDoubleToSingle()
|
||||||
{
|
{
|
||||||
|
@ -25,6 +25,7 @@ elseif(_M_ARM_64)
|
|||||||
PowerPC/DivUtilsTest.cpp
|
PowerPC/DivUtilsTest.cpp
|
||||||
PowerPC/JitArm64/ConvertSingleDouble.cpp
|
PowerPC/JitArm64/ConvertSingleDouble.cpp
|
||||||
PowerPC/JitArm64/FPRF.cpp
|
PowerPC/JitArm64/FPRF.cpp
|
||||||
|
PowerPC/JitArm64/Fres.cpp
|
||||||
PowerPC/JitArm64/MovI2R.cpp
|
PowerPC/JitArm64/MovI2R.cpp
|
||||||
)
|
)
|
||||||
else()
|
else()
|
||||||
|
66
Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
Normal file
66
Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
// Copyright 2021 Dolphin Emulator Project
|
||||||
|
// Licensed under GPLv2+
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
|
#include "Common/Arm64Emitter.h"
|
||||||
|
#include "Common/BitUtils.h"
|
||||||
|
#include "Common/CommonTypes.h"
|
||||||
|
#include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"
|
||||||
|
#include "Core/PowerPC/JitArm64/Jit.h"
|
||||||
|
#include "Core/PowerPC/PowerPC.h"
|
||||||
|
|
||||||
|
#include "../TestValues.h"
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
using namespace Arm64Gen;
|
||||||
|
|
||||||
|
class TestFres : public JitArm64
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TestFres()
|
||||||
|
{
|
||||||
|
AllocCodeSpace(4096);
|
||||||
|
|
||||||
|
const u8* raw_fres = GetCodePtr();
|
||||||
|
GenerateFres();
|
||||||
|
|
||||||
|
fres = Common::BitCast<u64 (*)(u64)>(GetCodePtr());
|
||||||
|
MOV(ARM64Reg::X15, ARM64Reg::X30);
|
||||||
|
MOV(ARM64Reg::X14, PPC_REG);
|
||||||
|
MOVP2R(PPC_REG, &PowerPC::ppcState);
|
||||||
|
MOV(ARM64Reg::X1, ARM64Reg::X0);
|
||||||
|
m_float_emit.FMOV(ARM64Reg::D0, ARM64Reg::X0);
|
||||||
|
m_float_emit.FRECPE(ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
BL(raw_fres);
|
||||||
|
MOV(ARM64Reg::X30, ARM64Reg::X15);
|
||||||
|
MOV(PPC_REG, ARM64Reg::X14);
|
||||||
|
RET();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::function<u64(u64)> fres;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
TEST(JitArm64, Fres)
|
||||||
|
{
|
||||||
|
TestFres test;
|
||||||
|
|
||||||
|
for (const u64 ivalue : double_test_values)
|
||||||
|
{
|
||||||
|
const double dvalue = Common::BitCast<double>(ivalue);
|
||||||
|
|
||||||
|
const u64 expected = Common::BitCast<u64>(Common::ApproximateReciprocal(dvalue));
|
||||||
|
const u64 actual = test.fres(ivalue);
|
||||||
|
|
||||||
|
if (expected != actual)
|
||||||
|
fmt::print("{:016x} -> {:016x} == {:016x}\n", ivalue, actual, expected);
|
||||||
|
|
||||||
|
EXPECT_EQ(expected, actual);
|
||||||
|
}
|
||||||
|
}
|
@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
|
|
||||||
constexpr std::array<u64, 49> double_test_values{
|
constexpr std::array<u64, 57> double_test_values{
|
||||||
// Special values
|
// Special values
|
||||||
0x0000'0000'0000'0000, // positive zero
|
0x0000'0000'0000'0000, // positive zero
|
||||||
0x0000'0000'0000'0001, // smallest positive denormal
|
0x0000'0000'0000'0001, // smallest positive denormal
|
||||||
@ -54,13 +54,25 @@ constexpr std::array<u64, 49> double_test_values{
|
|||||||
0x3680'1234'5678'9ABC, 0x36A0'1234'5678'9ABC, 0x36B0'1234'5678'9ABC, 0xB680'1234'5678'9ABC,
|
0x3680'1234'5678'9ABC, 0x36A0'1234'5678'9ABC, 0x36B0'1234'5678'9ABC, 0xB680'1234'5678'9ABC,
|
||||||
0xB6A0'1234'5678'9ABC, 0xB6B0'1234'5678'9ABC,
|
0xB6A0'1234'5678'9ABC, 0xB6B0'1234'5678'9ABC,
|
||||||
|
|
||||||
|
// (exp > 1148) Boundary case for fres
|
||||||
|
0x47C0'0000'0000'0000, // 2^125 = fres result is non-zero
|
||||||
|
0x47D0'0000'0000'0000, // 2^126 = fres result is zero
|
||||||
|
0xC7C0'0000'0000'0000, // -2^125 = fres result is non-zero
|
||||||
|
0xC7D0'0000'0000'0000, // -2^126 = fres result is zero
|
||||||
|
|
||||||
|
// (exp < 895) Boundary case for fres
|
||||||
|
0x37F0'0000'0000'0000, // 2^(-128) = fres result is non-max
|
||||||
|
0x37E0'0000'0000'0000, // 2^(-129) = fres result is max
|
||||||
|
0xB7F0'0000'0000'0000, // -2^(-128) = fres result is non-max
|
||||||
|
0xB7E0'0000'0000'0000, // -2^(-129) = fres result is max
|
||||||
|
|
||||||
// Some typical numbers
|
// Some typical numbers
|
||||||
0x3FF8'0000'0000'0000, // 1.5
|
0x3FF8'0000'0000'0000, // 1.5
|
||||||
0x408F'4000'0000'0000, // 1000
|
0x408F'4000'0000'0000, // 1000
|
||||||
0xC008'0000'0000'0000, // -3
|
0xC008'0000'0000'0000, // -3
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr std::array<u32, 29> single_test_values{
|
constexpr std::array<u32, 33> single_test_values{
|
||||||
// Special values
|
// Special values
|
||||||
0x0000'0000, // positive zero
|
0x0000'0000, // positive zero
|
||||||
0x0000'0001, // smallest positive denormal
|
0x0000'0001, // smallest positive denormal
|
||||||
@ -89,6 +101,12 @@ constexpr std::array<u32, 29> single_test_values{
|
|||||||
0xFFC0'0000, // first negative QNaN
|
0xFFC0'0000, // first negative QNaN
|
||||||
0xFFFF'FFFF, // last negative QNaN
|
0xFFFF'FFFF, // last negative QNaN
|
||||||
|
|
||||||
|
// (exp > 252) Boundary case for fres
|
||||||
|
0x7E00'0000, // 2^125 = fres result is non-zero
|
||||||
|
0x7E80'0000, // 2^126 = fres result is zero
|
||||||
|
0xC7C0'0000, // -2^125 = fres result is non-zero
|
||||||
|
0xC7D0'0000, // -2^126 = fres result is zero
|
||||||
|
|
||||||
// Some typical numbers
|
// Some typical numbers
|
||||||
0x3FC0'0000, // 1.5
|
0x3FC0'0000, // 1.5
|
||||||
0x447A'0000, // 1000
|
0x447A'0000, // 1000
|
||||||
|
@ -84,6 +84,7 @@
|
|||||||
<ItemGroup Condition="'$(Platform)'=='ARM64'">
|
<ItemGroup Condition="'$(Platform)'=='ARM64'">
|
||||||
<ClCompile Include="Core\PowerPC\JitArm64\ConvertSingleDouble.cpp" />
|
<ClCompile Include="Core\PowerPC\JitArm64\ConvertSingleDouble.cpp" />
|
||||||
<ClCompile Include="Core\PowerPC\JitArm64\FPRF.cpp" />
|
<ClCompile Include="Core\PowerPC\JitArm64\FPRF.cpp" />
|
||||||
|
<ClCompile Include="Core\PowerPC\JitArm64\Fres.cpp" />
|
||||||
<ClCompile Include="Core\PowerPC\JitArm64\MovI2R.cpp" />
|
<ClCompile Include="Core\PowerPC\JitArm64\MovI2R.cpp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
Loading…
Reference in New Issue
Block a user